Refactor agent.py for clarity and improved functionality
Reorganized code structure with clearer section headers and comments. Refined prompts, variable naming, and logic flows for better readability and accuracy. Enhanced memory handling and search decision mechanisms while simplifying initialization and user interactions.
This commit is contained in:
parent
d845c29e81
commit
54a84714db
113
agent.py
113
agent.py
@ -4,109 +4,114 @@ from transformers import pipeline, AutoTokenizer
|
|||||||
from memory import Memory
|
from memory import Memory
|
||||||
from web_search_helper import WebSearchHelper
|
from web_search_helper import WebSearchHelper
|
||||||
|
|
||||||
# Initialize clock
|
|
||||||
begin_time = time.time()
|
begin_time = time.time()
|
||||||
|
|
||||||
# 🔧 Load model and tokenizer (Llama3.2:1B)
|
# === 🔧 Initialize model + tokenizer ===
|
||||||
model_id = "meta-llama/Llama-3.2-1B-Instruct"
|
model_id = "meta-llama/Llama-3.2-1B-Instruct"
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||||
pipe = pipeline(
|
pipe = pipeline(
|
||||||
"text-generation",
|
"text-generation",
|
||||||
model=model_id,
|
model=model_id,
|
||||||
|
tokenizer=tokenizer,
|
||||||
torch_dtype=torch.bfloat16,
|
torch_dtype=torch.bfloat16,
|
||||||
device_map="auto",
|
device_map="auto",
|
||||||
pad_token_id=128001 # Suppress warnings
|
pad_token_id=128001 # Prevents warning spam
|
||||||
)
|
)
|
||||||
|
|
||||||
# 🧩 Agent components
|
# === 🧠 Core components ===
|
||||||
memory = Memory()
|
memory = Memory()
|
||||||
searcher = WebSearchHelper()
|
searcher = WebSearchHelper()
|
||||||
|
|
||||||
# 🧭 System prompt (Kshama's persona + capabilities)
|
# === 🧭 System behavior prompt ===
|
||||||
SYSTEM_PROMPT = """
|
SYSTEM_PROMPT = """
|
||||||
You are ক্ষমা, Abu's personal AI assistant. You're helpful, respectful, and aligned with his goals and preferences.
|
You are ক্ষমা (Kshama), Abu's personal AI assistant. You are insightful, methodical, and intentional.
|
||||||
You can:
|
Capabilities:
|
||||||
1. Recall relevant information from long-term memory.
|
- Recall useful information from persistent memory.
|
||||||
2. Decide whether to perform a web search if the memory lacks necessary detail.
|
- Decide when a web search is truly necessary.
|
||||||
3. Summarize text clearly when requested.
|
- Summarize web content when requested using clear language.
|
||||||
|
|
||||||
You use these tags:
|
Protocols:
|
||||||
- ##MEM:add("...") to store information in memory.
|
- To store new memory: ##MEM:add("...")
|
||||||
- ##SEARCH:yes if a web search is needed.
|
- To request search: ##SEARCH:yes
|
||||||
- ##SEARCH:no if memory is sufficient.
|
- If no search is needed: ##SEARCH:no
|
||||||
|
|
||||||
Be concise but friendly. Don't suggest a search unless it is clearly needed.
|
Be precise and only initiate web search when memory is insufficient. Don't guess. Use memory and web knowledge actively.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# 📝 Wrapper: summarize text with Llama
|
# === 📝 Summarizer using same model ===
|
||||||
def summarize_with_llama(text):
|
def summarize_with_llama(text: str) -> str:
|
||||||
prompt = f"Summarize the following webpage text:\n\n{text.strip()}\n\nSummary:"
|
prompt = f"Summarize the following content briefly:\n\n{text.strip()}\n\nSummary:"
|
||||||
output = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)
|
output = pipe(prompt, max_new_tokens=256)
|
||||||
return output[0]["generated_text"].replace(prompt, "").strip()
|
return output[0]["generated_text"].replace(prompt, "").strip()
|
||||||
|
|
||||||
# 🎯 Ask model if it needs web search
|
# === 🔍 Check if agent requests web search ===
|
||||||
def should_search(user_input, memory_hits, kb_hits):
|
def should_search(user_input: str, mem_text: str, kb_text: str) -> bool:
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "system", "content": SYSTEM_PROMPT},
|
{"role": "system", "content": SYSTEM_PROMPT},
|
||||||
{"role": "user", "content": f"User asked: {user_input}"},
|
{"role": "user", "content": f"User asked: {user_input}"},
|
||||||
{"role": "user", "content": f"Known memory:\n{memory_hits or '[None]'}"},
|
{"role": "user", "content": f"Memory:\n{mem_text or '[None]'}"},
|
||||||
{"role": "user", "content": f"Web knowledge:\n{kb_hits or '[None]'}"},
|
{"role": "user", "content": f"Web Knowledge:\n{kb_text or '[None]'}"},
|
||||||
{"role": "user", "content": "Do you need more information to answer this? Reply with ##SEARCH:yes or ##SEARCH:no."},
|
{"role": "user", "content": "Should you search the web to answer this? Reply with ##SEARCH:yes or ##SEARCH:no only on the first line."}
|
||||||
]
|
]
|
||||||
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
||||||
output = pipe(prompt, max_new_tokens=32, do_sample=False)
|
output = pipe(prompt, max_new_tokens=16, do_sample=False)
|
||||||
reply = output[0]["generated_text"].strip().lower()
|
reply = output[0]["generated_text"].strip().lower()
|
||||||
return "##search:yes" in reply
|
print(output)
|
||||||
|
return reply.splitlines()[0].strip() == "##SEARCH:yes"
|
||||||
|
|
||||||
# 🧠 Core reasoning + memory loop
|
# === 🧠 Main agent response handler ===
|
||||||
def generate_response(user_input: str):
|
def generate_response(user_input: str):
|
||||||
# Step 1: recall memory + web KB
|
# Step 1: Retrieve memory and knowledgebase
|
||||||
memory_hits = memory.query(user_input, top_k=3)
|
mem_hits = memory.query(user_input, top_k=3)
|
||||||
mem_text = "\n".join([f"- {x}" for x in memory_hits])
|
mem_text = "\n".join([f"- {m}" for m in mem_hits])
|
||||||
|
|
||||||
_, kb = searcher.query_kb(user_input, top_k=3)
|
_, kb_hits = searcher.query_kb(user_input, top_k=3)
|
||||||
kb_text = "\n".join([f"- {x['summary']}" for x in kb])
|
kb_text = "\n".join([f"- {k['summary']}" for k in kb_hits])
|
||||||
|
|
||||||
# Step 2: let Kshama decide if she wants to search
|
# Step 2: Ask if search is needed
|
||||||
if should_search(user_input, mem_text, kb_text):
|
if should_search(user_input, mem_text, kb_text):
|
||||||
|
print("[🌐 Search Triggered]")
|
||||||
urls = searcher.search_duckduckgo(user_input)
|
urls = searcher.search_duckduckgo(user_input)
|
||||||
summaries = searcher.crawl_and_summarize(urls, llm_function=summarize_with_llama)
|
summaries = searcher.crawl_and_summarize(urls, llm_function=summarize_with_llama)
|
||||||
searcher.add_to_kb(summaries)
|
searcher.add_to_kb(summaries)
|
||||||
_, kb = searcher.query_kb(user_input)
|
_, kb_hits = searcher.query_kb(user_input)
|
||||||
kb_text = "\n".join([f"- {x['summary']}" for x in kb])
|
kb_text = "\n".join([f"- {k['summary']}" for k in kb_hits])
|
||||||
|
else:
|
||||||
|
print("[🔒 Search Skipped]")
|
||||||
|
|
||||||
# Step 3: Compose final answer prompt
|
# Step 3: Generate final answer
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "system", "content": SYSTEM_PROMPT},
|
{"role": "system", "content": SYSTEM_PROMPT},
|
||||||
{"role": "user", "content": f"{user_input}"},
|
{"role": "user", "content": user_input},
|
||||||
{"role": "user", "content": f"Relevant memory:\n{mem_text or '[None]'}"},
|
{"role": "user", "content": f"Memory:\n{mem_text or '[None]'}"},
|
||||||
{"role": "user", "content": f"Web knowledge:\n{kb_text or '[None]'}"}
|
{"role": "user", "content": f"Web Knowledge:\n{kb_text or '[None]'}"}
|
||||||
]
|
]
|
||||||
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
full_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
||||||
|
|
||||||
# Step 4: generate final response
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
output = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)
|
output = pipe(full_prompt, max_new_tokens=512)
|
||||||
elapsed = time.time() - start
|
elapsed = time.time() - start
|
||||||
response = output[0]["generated_text"].replace(prompt, "").strip()
|
response = output[0]["generated_text"].replace(full_prompt, "").strip()
|
||||||
|
|
||||||
# Step 5: parse memory intent
|
# Step 4: Store memory if requested
|
||||||
if "##MEM:add(" in response:
|
if "##MEM:add(" in response:
|
||||||
try:
|
try:
|
||||||
thought = response.split("##MEM:add(")[1].split(")")[0].strip('"\'')
|
content = response.split("##MEM:add(")[1].split(")")[0].strip('"\'')
|
||||||
memory.add(thought)
|
memory.add(content)
|
||||||
print("[✅ Memory Added]")
|
print("[✅ Memory Added]")
|
||||||
except:
|
except Exception as e:
|
||||||
print("[⚠️ Could not parse memory directive]")
|
print(f"[⚠️ Could not parse memory]: {e}")
|
||||||
|
|
||||||
return response, elapsed
|
return response, elapsed
|
||||||
|
|
||||||
# 🧪 Interactive loop
|
# === 👂 Main loop ===
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print(f"🚀 Booted in {time.time() - begin_time:.2f}s")
|
print(f"🚀 Kshama ready in {time.time() - begin_time:.2f}s")
|
||||||
print("👋 Welcome to Kshama. Type 'exit' to quit.")
|
print("👋 Hello, Abu. Type 'exit' to quit.")
|
||||||
while True:
|
while True:
|
||||||
user_input = input("\n🧑 You: ")
|
user_input = input("\n🧑 You: ")
|
||||||
if user_input.strip().lower() in ["exit", "quit"]: break
|
if user_input.strip().lower() in ["exit", "quit"]:
|
||||||
response, t = generate_response(user_input)
|
print("👋 Farewell.")
|
||||||
print(f"\n🤖 ক্ষমা [{t:.2f}s]: {response}")
|
break
|
||||||
|
response, delay = generate_response(user_input)
|
||||||
|
print(f"\n🤖 ক্ষমা [{delay:.2f}s]: {response}")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user