kshama/agent.py
2025-06-29 20:49:04 +06:00

109 lines
3.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import time
begin_time = time.time()
import torch
from transformers import pipeline, AutoTokenizer
from memory import Memory
from web_search_helper import WebSearchHelper
from llm_wrapper import LlmWrapper
# Initialize components
memory = Memory()
searcher = WebSearchHelper()
summarizer = LlmWrapper(model_name="Qwen/Qwen3-0.6B") # optional, could summarize search results
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")
# Load your main LLM (Llama 3.2:1B-Instruct)
model_id = "meta-llama/Llama-3.2-1B-Instruct"
pipe = pipeline(
"text-generation",
model=model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
)
# Define system prompt and Kshama's capabilities
SYSTEM_PROMPT = """
You are ক্ষমা, Abu's personal AI assistant. You're helpful, respectful, and aligned with his goals and preferences.
You maintain and query a persistent memory of past interactions and facts via a vector store.
You can:
1. Recall relevant knowledge from memory using semantic similarity.
2. Add new insights to memory when useful.
3. Perform live web searches and summarize results if memory is insufficient.
Structure your outputs clearly:
- Use ##MEM:add(...) to store thoughts to memory.
- Use ##MEM:recall(...) to request a lookup (already handled externally).
- Use ##SEARCH:trigger(...) when memory lacks the answer.
Respond in clear, friendly tone. Actively use what you know about Abus past work (e.g., GANs, TensorFlow, Exopid).
"""
def generate_response(user_input: str):
# Step 1: Recall relevant memory
recalled = memory.query(user_input, top_k=3)
memory_context = "\n".join([f"- {item}" for item in recalled])
# Step 2: Evaluate recall quality
should_search = searcher.should_trigger_search(text=user_input)
kb_hits = ""
if should_search:
urls = searcher.search_duckduckgo(user_input)
summaries = searcher.crawl_and_summarize(urls, llm_function=summarizer.summarize)
searcher.add_to_kb(summaries)
_, hits = searcher.query_kb(user_input)
kb_hits = "\n".join([f"- {h['summary']}" for h in hits])
else:
_, hits = searcher.query_kb(user_input)
kb_hits = "\n".join([f"- {h['summary']}" for h in hits])
# Step 3: Compose structured messages
context_block = f"""Known facts from memory:
{memory_context or '[None]'}
External knowledge from web:
{kb_hits or '[None]'}
"""
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"{context_block}\nUser asked: {user_input}"},
]
# Convert using chat template
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True # appends assistant tag if needed
)
# Step 4: Call the model
output = pipe(
prompt,
max_new_tokens=512,
do_sample=True,
temperature=0.7,
)
# Step 5: Process model output (add to memory if marked)
response = output[0]["generated_text"].strip()
if "##MEM:add(" in response:
try:
content = response.split("##MEM:add(")[1].split(")")[0].strip('"\'')
memory.add(content)
print("[✅ Memory Added]")
except:
print("[⚠️ Couldn't parse memory add]")
return response
# 💬 REPL for testing
if __name__ == "__main__":
print(f"Time elapsed: {time.time() - begin_time:.2f} seconds")
print("👋 Welcome to Kshama. Type 'exit' to leave.")
while True:
user_input = input("\n🧑 You: ")
if user_input.strip().lower() in ["exit", "quit"]: break
response = generate_response(user_input)
print(f"\n🤖 ক্ষমা: {response}")