Refactor agent.py for clarity and improved functionality

Reorganized code structure with clearer section headers and comments. Refined prompts, variable naming, and logic flows for better readability and accuracy. Enhanced memory handling and search decision mechanisms while simplifying initialization and user interactions.
This commit is contained in:
sufian 2025-06-29 21:07:45 +06:00
parent d845c29e81
commit 54a84714db

113
agent.py
View File

@ -4,109 +4,114 @@ from transformers import pipeline, AutoTokenizer
from memory import Memory
from web_search_helper import WebSearchHelper
# Initialize clock
begin_time = time.time()
# 🔧 Load model and tokenizer (Llama3.2:1B)
# === 🔧 Initialize model + tokenizer ===
model_id = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
pipe = pipeline(
"text-generation",
model=model_id,
tokenizer=tokenizer,
torch_dtype=torch.bfloat16,
device_map="auto",
pad_token_id=128001 # Suppress warnings
pad_token_id=128001 # Prevents warning spam
)
# 🧩 Agent components
# === 🧠 Core components ===
memory = Memory()
searcher = WebSearchHelper()
# 🧭 System prompt (Kshama's persona + capabilities)
# === 🧭 System behavior prompt ===
SYSTEM_PROMPT = """
You are ষম, Abu's personal AI assistant. You're helpful, respectful, and aligned with his goals and preferences.
You can:
1. Recall relevant information from long-term memory.
2. Decide whether to perform a web search if the memory lacks necessary detail.
3. Summarize text clearly when requested.
You are ষম (Kshama), Abu's personal AI assistant. You are insightful, methodical, and intentional.
Capabilities:
- Recall useful information from persistent memory.
- Decide when a web search is truly necessary.
- Summarize web content when requested using clear language.
You use these tags:
- ##MEM:add("...") to store information in memory.
- ##SEARCH:yes if a web search is needed.
- ##SEARCH:no if memory is sufficient.
Protocols:
- To store new memory: ##MEM:add("...")
- To request search: ##SEARCH:yes
- If no search is needed: ##SEARCH:no
Be concise but friendly. Don't suggest a search unless it is clearly needed.
Be precise and only initiate web search when memory is insufficient. Don't guess. Use memory and web knowledge actively.
"""
# 📝 Wrapper: summarize text with Llama
def summarize_with_llama(text):
prompt = f"Summarize the following webpage text:\n\n{text.strip()}\n\nSummary:"
output = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)
# === 📝 Summarizer using same model ===
def summarize_with_llama(text: str) -> str:
prompt = f"Summarize the following content briefly:\n\n{text.strip()}\n\nSummary:"
output = pipe(prompt, max_new_tokens=256)
return output[0]["generated_text"].replace(prompt, "").strip()
# 🎯 Ask model if it needs web search
def should_search(user_input, memory_hits, kb_hits):
# === 🔍 Check if agent requests web search ===
def should_search(user_input: str, mem_text: str, kb_text: str) -> bool:
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"User asked: {user_input}"},
{"role": "user", "content": f"Known memory:\n{memory_hits or '[None]'}"},
{"role": "user", "content": f"Web knowledge:\n{kb_hits or '[None]'}"},
{"role": "user", "content": "Do you need more information to answer this? Reply with ##SEARCH:yes or ##SEARCH:no."},
{"role": "user", "content": f"Memory:\n{mem_text or '[None]'}"},
{"role": "user", "content": f"Web Knowledge:\n{kb_text or '[None]'}"},
{"role": "user", "content": "Should you search the web to answer this? Reply with ##SEARCH:yes or ##SEARCH:no only on the first line."}
]
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
output = pipe(prompt, max_new_tokens=32, do_sample=False)
output = pipe(prompt, max_new_tokens=16, do_sample=False)
reply = output[0]["generated_text"].strip().lower()
return "##search:yes" in reply
print(output)
return reply.splitlines()[0].strip() == "##SEARCH:yes"
# 🧠 Core reasoning + memory loop
# === 🧠 Main agent response handler ===
def generate_response(user_input: str):
# Step 1: recall memory + web KB
memory_hits = memory.query(user_input, top_k=3)
mem_text = "\n".join([f"- {x}" for x in memory_hits])
# Step 1: Retrieve memory and knowledgebase
mem_hits = memory.query(user_input, top_k=3)
mem_text = "\n".join([f"- {m}" for m in mem_hits])
_, kb = searcher.query_kb(user_input, top_k=3)
kb_text = "\n".join([f"- {x['summary']}" for x in kb])
_, kb_hits = searcher.query_kb(user_input, top_k=3)
kb_text = "\n".join([f"- {k['summary']}" for k in kb_hits])
# Step 2: let Kshama decide if she wants to search
# Step 2: Ask if search is needed
if should_search(user_input, mem_text, kb_text):
print("[🌐 Search Triggered]")
urls = searcher.search_duckduckgo(user_input)
summaries = searcher.crawl_and_summarize(urls, llm_function=summarize_with_llama)
searcher.add_to_kb(summaries)
_, kb = searcher.query_kb(user_input)
kb_text = "\n".join([f"- {x['summary']}" for x in kb])
_, kb_hits = searcher.query_kb(user_input)
kb_text = "\n".join([f"- {k['summary']}" for k in kb_hits])
else:
print("[🔒 Search Skipped]")
# Step 3: Compose final answer prompt
# Step 3: Generate final answer
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"{user_input}"},
{"role": "user", "content": f"Relevant memory:\n{mem_text or '[None]'}"},
{"role": "user", "content": f"Web knowledge:\n{kb_text or '[None]'}"}
{"role": "user", "content": user_input},
{"role": "user", "content": f"Memory:\n{mem_text or '[None]'}"},
{"role": "user", "content": f"Web Knowledge:\n{kb_text or '[None]'}"}
]
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
full_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# Step 4: generate final response
start = time.time()
output = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)
output = pipe(full_prompt, max_new_tokens=512)
elapsed = time.time() - start
response = output[0]["generated_text"].replace(prompt, "").strip()
response = output[0]["generated_text"].replace(full_prompt, "").strip()
# Step 5: parse memory intent
# Step 4: Store memory if requested
if "##MEM:add(" in response:
try:
thought = response.split("##MEM:add(")[1].split(")")[0].strip('"\'')
memory.add(thought)
content = response.split("##MEM:add(")[1].split(")")[0].strip('"\'')
memory.add(content)
print("[✅ Memory Added]")
except:
print("[⚠️ Could not parse memory directive]")
except Exception as e:
print(f"[⚠️ Could not parse memory]: {e}")
return response, elapsed
# 🧪 Interactive loop
# === 👂 Main loop ===
if __name__ == "__main__":
print(f"🚀 Booted in {time.time() - begin_time:.2f}s")
print("👋 Welcome to Kshama. Type 'exit' to quit.")
print(f"🚀 Kshama ready in {time.time() - begin_time:.2f}s")
print("👋 Hello, Abu. Type 'exit' to quit.")
while True:
user_input = input("\n🧑 You: ")
if user_input.strip().lower() in ["exit", "quit"]: break
response, t = generate_response(user_input)
print(f"\n🤖 ক্ষমা [{t:.2f}s]: {response}")
if user_input.strip().lower() in ["exit", "quit"]:
print("👋 Farewell.")
break
response, delay = generate_response(user_input)
print(f"\n🤖 ক্ষমা [{delay:.2f}s]: {response}")