diff --git a/agent.py b/agent.py index e8dcf10..ea07a6e 100644 --- a/agent.py +++ b/agent.py @@ -4,109 +4,114 @@ from transformers import pipeline, AutoTokenizer from memory import Memory from web_search_helper import WebSearchHelper -# Initialize clock begin_time = time.time() -# šŸ”§ Load model and tokenizer (Llama3.2:1B) +# === šŸ”§ Initialize model + tokenizer === model_id = "meta-llama/Llama-3.2-1B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_id) pipe = pipeline( "text-generation", model=model_id, + tokenizer=tokenizer, torch_dtype=torch.bfloat16, device_map="auto", - pad_token_id=128001 # Suppress warnings + pad_token_id=128001 # Prevents warning spam ) -# 🧩 Agent components +# === 🧠 Core components === memory = Memory() searcher = WebSearchHelper() -# 🧭 System prompt (Kshama's persona + capabilities) +# === 🧭 System behavior prompt === SYSTEM_PROMPT = """ -You are ą¦•ą§ą¦·ą¦®ą¦¾, Abu's personal AI assistant. You're helpful, respectful, and aligned with his goals and preferences. -You can: -1. Recall relevant information from long-term memory. -2. Decide whether to perform a web search if the memory lacks necessary detail. -3. Summarize text clearly when requested. +You are ą¦•ą§ą¦·ą¦®ą¦¾ (Kshama), Abu's personal AI assistant. You are insightful, methodical, and intentional. +Capabilities: +- Recall useful information from persistent memory. +- Decide when a web search is truly necessary. +- Summarize web content when requested using clear language. -You use these tags: -- ##MEM:add("...") to store information in memory. -- ##SEARCH:yes if a web search is needed. -- ##SEARCH:no if memory is sufficient. +Protocols: +- To store new memory: ##MEM:add("...") +- To request search: ##SEARCH:yes +- If no search is needed: ##SEARCH:no -Be concise but friendly. Don't suggest a search unless it is clearly needed. +Be precise and only initiate web search when memory is insufficient. Don't guess. Use memory and web knowledge actively. """ -# šŸ“ Wrapper: summarize text with Llama -def summarize_with_llama(text): - prompt = f"Summarize the following webpage text:\n\n{text.strip()}\n\nSummary:" - output = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7) +# === šŸ“ Summarizer using same model === +def summarize_with_llama(text: str) -> str: + prompt = f"Summarize the following content briefly:\n\n{text.strip()}\n\nSummary:" + output = pipe(prompt, max_new_tokens=256) return output[0]["generated_text"].replace(prompt, "").strip() -# šŸŽÆ Ask model if it needs web search -def should_search(user_input, memory_hits, kb_hits): +# === šŸ” Check if agent requests web search === +def should_search(user_input: str, mem_text: str, kb_text: str) -> bool: messages = [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": f"User asked: {user_input}"}, - {"role": "user", "content": f"Known memory:\n{memory_hits or '[None]'}"}, - {"role": "user", "content": f"Web knowledge:\n{kb_hits or '[None]'}"}, - {"role": "user", "content": "Do you need more information to answer this? Reply with ##SEARCH:yes or ##SEARCH:no."}, + {"role": "user", "content": f"Memory:\n{mem_text or '[None]'}"}, + {"role": "user", "content": f"Web Knowledge:\n{kb_text or '[None]'}"}, + {"role": "user", "content": "Should you search the web to answer this? Reply with ##SEARCH:yes or ##SEARCH:no only on the first line."} ] prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) - output = pipe(prompt, max_new_tokens=32, do_sample=False) + output = pipe(prompt, max_new_tokens=16, do_sample=False) reply = output[0]["generated_text"].strip().lower() - return "##search:yes" in reply + print(output) + return reply.splitlines()[0].strip() == "##SEARCH:yes" -# 🧠 Core reasoning + memory loop +# === 🧠 Main agent response handler === def generate_response(user_input: str): - # Step 1: recall memory + web KB - memory_hits = memory.query(user_input, top_k=3) - mem_text = "\n".join([f"- {x}" for x in memory_hits]) + # Step 1: Retrieve memory and knowledgebase + mem_hits = memory.query(user_input, top_k=3) + mem_text = "\n".join([f"- {m}" for m in mem_hits]) - _, kb = searcher.query_kb(user_input, top_k=3) - kb_text = "\n".join([f"- {x['summary']}" for x in kb]) + _, kb_hits = searcher.query_kb(user_input, top_k=3) + kb_text = "\n".join([f"- {k['summary']}" for k in kb_hits]) - # Step 2: let Kshama decide if she wants to search + # Step 2: Ask if search is needed if should_search(user_input, mem_text, kb_text): + print("[🌐 Search Triggered]") urls = searcher.search_duckduckgo(user_input) summaries = searcher.crawl_and_summarize(urls, llm_function=summarize_with_llama) searcher.add_to_kb(summaries) - _, kb = searcher.query_kb(user_input) - kb_text = "\n".join([f"- {x['summary']}" for x in kb]) + _, kb_hits = searcher.query_kb(user_input) + kb_text = "\n".join([f"- {k['summary']}" for k in kb_hits]) + else: + print("[šŸ”’ Search Skipped]") - # Step 3: Compose final answer prompt + # Step 3: Generate final answer messages = [ {"role": "system", "content": SYSTEM_PROMPT}, - {"role": "user", "content": f"{user_input}"}, - {"role": "user", "content": f"Relevant memory:\n{mem_text or '[None]'}"}, - {"role": "user", "content": f"Web knowledge:\n{kb_text or '[None]'}"} + {"role": "user", "content": user_input}, + {"role": "user", "content": f"Memory:\n{mem_text or '[None]'}"}, + {"role": "user", "content": f"Web Knowledge:\n{kb_text or '[None]'}"} ] - prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) + full_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) - # Step 4: generate final response start = time.time() - output = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7) + output = pipe(full_prompt, max_new_tokens=512) elapsed = time.time() - start - response = output[0]["generated_text"].replace(prompt, "").strip() + response = output[0]["generated_text"].replace(full_prompt, "").strip() - # Step 5: parse memory intent + # Step 4: Store memory if requested if "##MEM:add(" in response: try: - thought = response.split("##MEM:add(")[1].split(")")[0].strip('"\'') - memory.add(thought) + content = response.split("##MEM:add(")[1].split(")")[0].strip('"\'') + memory.add(content) print("[āœ… Memory Added]") - except: - print("[āš ļø Could not parse memory directive]") + except Exception as e: + print(f"[āš ļø Could not parse memory]: {e}") return response, elapsed -# 🧪 Interactive loop +# === šŸ‘‚ Main loop === if __name__ == "__main__": - print(f"šŸš€ Booted in {time.time() - begin_time:.2f}s") - print("šŸ‘‹ Welcome to Kshama. Type 'exit' to quit.") + print(f"šŸš€ Kshama ready in {time.time() - begin_time:.2f}s") + print("šŸ‘‹ Hello, Abu. Type 'exit' to quit.") while True: user_input = input("\nšŸ§‘ You: ") - if user_input.strip().lower() in ["exit", "quit"]: break - response, t = generate_response(user_input) - print(f"\nšŸ¤– ą¦•ą§ą¦·ą¦®ą¦¾ [{t:.2f}s]: {response}") + if user_input.strip().lower() in ["exit", "quit"]: + print("šŸ‘‹ Farewell.") + break + response, delay = generate_response(user_input) + print(f"\nšŸ¤– ą¦•ą§ą¦·ą¦®ą¦¾ [{delay:.2f}s]: {response}")