kshama/agent.py
sufian d845c29e81 Refactor agent initialization and response workflow
Streamlined agent initialization, separating components for clarity. Improved memory, web search, and response generation logic for better structure and efficiency. Added elapsed time display and refined interactive REPL messaging.
2025-06-29 20:52:43 +06:00

113 lines
4.3 KiB
Python

import time
import torch
from transformers import pipeline, AutoTokenizer
from memory import Memory
from web_search_helper import WebSearchHelper
# Initialize clock
begin_time = time.time()
# 🔧 Load model and tokenizer (Llama3.2:1B)
model_id = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
pipe = pipeline(
"text-generation",
model=model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
pad_token_id=128001 # Suppress warnings
)
# 🧩 Agent components
memory = Memory()
searcher = WebSearchHelper()
# 🧭 System prompt (Kshama's persona + capabilities)
SYSTEM_PROMPT = """
You are ক্ষমা, Abu's personal AI assistant. You're helpful, respectful, and aligned with his goals and preferences.
You can:
1. Recall relevant information from long-term memory.
2. Decide whether to perform a web search if the memory lacks necessary detail.
3. Summarize text clearly when requested.
You use these tags:
- ##MEM:add("...") to store information in memory.
- ##SEARCH:yes if a web search is needed.
- ##SEARCH:no if memory is sufficient.
Be concise but friendly. Don't suggest a search unless it is clearly needed.
"""
# 📝 Wrapper: summarize text with Llama
def summarize_with_llama(text):
prompt = f"Summarize the following webpage text:\n\n{text.strip()}\n\nSummary:"
output = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)
return output[0]["generated_text"].replace(prompt, "").strip()
# 🎯 Ask model if it needs web search
def should_search(user_input, memory_hits, kb_hits):
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"User asked: {user_input}"},
{"role": "user", "content": f"Known memory:\n{memory_hits or '[None]'}"},
{"role": "user", "content": f"Web knowledge:\n{kb_hits or '[None]'}"},
{"role": "user", "content": "Do you need more information to answer this? Reply with ##SEARCH:yes or ##SEARCH:no."},
]
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
output = pipe(prompt, max_new_tokens=32, do_sample=False)
reply = output[0]["generated_text"].strip().lower()
return "##search:yes" in reply
# 🧠 Core reasoning + memory loop
def generate_response(user_input: str):
# Step 1: recall memory + web KB
memory_hits = memory.query(user_input, top_k=3)
mem_text = "\n".join([f"- {x}" for x in memory_hits])
_, kb = searcher.query_kb(user_input, top_k=3)
kb_text = "\n".join([f"- {x['summary']}" for x in kb])
# Step 2: let Kshama decide if she wants to search
if should_search(user_input, mem_text, kb_text):
urls = searcher.search_duckduckgo(user_input)
summaries = searcher.crawl_and_summarize(urls, llm_function=summarize_with_llama)
searcher.add_to_kb(summaries)
_, kb = searcher.query_kb(user_input)
kb_text = "\n".join([f"- {x['summary']}" for x in kb])
# Step 3: Compose final answer prompt
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"{user_input}"},
{"role": "user", "content": f"Relevant memory:\n{mem_text or '[None]'}"},
{"role": "user", "content": f"Web knowledge:\n{kb_text or '[None]'}"}
]
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# Step 4: generate final response
start = time.time()
output = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)
elapsed = time.time() - start
response = output[0]["generated_text"].replace(prompt, "").strip()
# Step 5: parse memory intent
if "##MEM:add(" in response:
try:
thought = response.split("##MEM:add(")[1].split(")")[0].strip('"\'')
memory.add(thought)
print("[✅ Memory Added]")
except:
print("[⚠️ Could not parse memory directive]")
return response, elapsed
# 🧪 Interactive loop
if __name__ == "__main__":
print(f"🚀 Booted in {time.time() - begin_time:.2f}s")
print("👋 Welcome to Kshama. Type 'exit' to quit.")
while True:
user_input = input("\n🧑 You: ")
if user_input.strip().lower() in ["exit", "quit"]: break
response, t = generate_response(user_input)
print(f"\n🤖 ক্ষমা [{t:.2f}s]: {response}")