Reorganized code structure with clearer section headers and comments. Refined prompts, variable naming, and logic flows for better readability and accuracy. Enhanced memory handling and search decision mechanisms while simplifying initialization and user interactions.
118 lines
4.5 KiB
Python
118 lines
4.5 KiB
Python
import time
|
|
import torch
|
|
from transformers import pipeline, AutoTokenizer
|
|
from memory import Memory
|
|
from web_search_helper import WebSearchHelper
|
|
|
|
begin_time = time.time()
|
|
|
|
# === 🔧 Initialize model + tokenizer ===
|
|
model_id = "meta-llama/Llama-3.2-1B-Instruct"
|
|
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
pipe = pipeline(
|
|
"text-generation",
|
|
model=model_id,
|
|
tokenizer=tokenizer,
|
|
torch_dtype=torch.bfloat16,
|
|
device_map="auto",
|
|
pad_token_id=128001 # Prevents warning spam
|
|
)
|
|
|
|
# === 🧠 Core components ===
|
|
memory = Memory()
|
|
searcher = WebSearchHelper()
|
|
|
|
# === 🧭 System behavior prompt ===
|
|
SYSTEM_PROMPT = """
|
|
You are ক্ষমা (Kshama), Abu's personal AI assistant. You are insightful, methodical, and intentional.
|
|
Capabilities:
|
|
- Recall useful information from persistent memory.
|
|
- Decide when a web search is truly necessary.
|
|
- Summarize web content when requested using clear language.
|
|
|
|
Protocols:
|
|
- To store new memory: ##MEM:add("...")
|
|
- To request search: ##SEARCH:yes
|
|
- If no search is needed: ##SEARCH:no
|
|
|
|
Be precise and only initiate web search when memory is insufficient. Don't guess. Use memory and web knowledge actively.
|
|
"""
|
|
|
|
# === 📝 Summarizer using same model ===
|
|
def summarize_with_llama(text: str) -> str:
|
|
prompt = f"Summarize the following content briefly:\n\n{text.strip()}\n\nSummary:"
|
|
output = pipe(prompt, max_new_tokens=256)
|
|
return output[0]["generated_text"].replace(prompt, "").strip()
|
|
|
|
# === 🔍 Check if agent requests web search ===
|
|
def should_search(user_input: str, mem_text: str, kb_text: str) -> bool:
|
|
messages = [
|
|
{"role": "system", "content": SYSTEM_PROMPT},
|
|
{"role": "user", "content": f"User asked: {user_input}"},
|
|
{"role": "user", "content": f"Memory:\n{mem_text or '[None]'}"},
|
|
{"role": "user", "content": f"Web Knowledge:\n{kb_text or '[None]'}"},
|
|
{"role": "user", "content": "Should you search the web to answer this? Reply with ##SEARCH:yes or ##SEARCH:no only on the first line."}
|
|
]
|
|
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
|
output = pipe(prompt, max_new_tokens=16, do_sample=False)
|
|
reply = output[0]["generated_text"].strip().lower()
|
|
print(output)
|
|
return reply.splitlines()[0].strip() == "##SEARCH:yes"
|
|
|
|
# === 🧠 Main agent response handler ===
|
|
def generate_response(user_input: str):
|
|
# Step 1: Retrieve memory and knowledgebase
|
|
mem_hits = memory.query(user_input, top_k=3)
|
|
mem_text = "\n".join([f"- {m}" for m in mem_hits])
|
|
|
|
_, kb_hits = searcher.query_kb(user_input, top_k=3)
|
|
kb_text = "\n".join([f"- {k['summary']}" for k in kb_hits])
|
|
|
|
# Step 2: Ask if search is needed
|
|
if should_search(user_input, mem_text, kb_text):
|
|
print("[🌐 Search Triggered]")
|
|
urls = searcher.search_duckduckgo(user_input)
|
|
summaries = searcher.crawl_and_summarize(urls, llm_function=summarize_with_llama)
|
|
searcher.add_to_kb(summaries)
|
|
_, kb_hits = searcher.query_kb(user_input)
|
|
kb_text = "\n".join([f"- {k['summary']}" for k in kb_hits])
|
|
else:
|
|
print("[🔒 Search Skipped]")
|
|
|
|
# Step 3: Generate final answer
|
|
messages = [
|
|
{"role": "system", "content": SYSTEM_PROMPT},
|
|
{"role": "user", "content": user_input},
|
|
{"role": "user", "content": f"Memory:\n{mem_text or '[None]'}"},
|
|
{"role": "user", "content": f"Web Knowledge:\n{kb_text or '[None]'}"}
|
|
]
|
|
full_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
|
|
|
start = time.time()
|
|
output = pipe(full_prompt, max_new_tokens=512)
|
|
elapsed = time.time() - start
|
|
response = output[0]["generated_text"].replace(full_prompt, "").strip()
|
|
|
|
# Step 4: Store memory if requested
|
|
if "##MEM:add(" in response:
|
|
try:
|
|
content = response.split("##MEM:add(")[1].split(")")[0].strip('"\'')
|
|
memory.add(content)
|
|
print("[✅ Memory Added]")
|
|
except Exception as e:
|
|
print(f"[⚠️ Could not parse memory]: {e}")
|
|
|
|
return response, elapsed
|
|
|
|
# === 👂 Main loop ===
|
|
if __name__ == "__main__":
|
|
print(f"🚀 Kshama ready in {time.time() - begin_time:.2f}s")
|
|
print("👋 Hello, Abu. Type 'exit' to quit.")
|
|
while True:
|
|
user_input = input("\n🧑 You: ")
|
|
if user_input.strip().lower() in ["exit", "quit"]:
|
|
print("👋 Farewell.")
|
|
break
|
|
response, delay = generate_response(user_input)
|
|
print(f"\n🤖 ক্ষমা [{delay:.2f}s]: {response}")
|