Refactor and enhance system logic and memory handling.
Streamline system prompts, rename functions for clarity, and improve search query generation. Fix memory query edge cases and enhance robustness when no indexed data exists. Minor wording adjustments and structure improvements for better maintainability.
This commit is contained in:
parent
54a84714db
commit
79e345097e
81
agent.py
81
agent.py
@ -6,7 +6,7 @@ from web_search_helper import WebSearchHelper
|
|||||||
|
|
||||||
begin_time = time.time()
|
begin_time = time.time()
|
||||||
|
|
||||||
# === 🔧 Initialize model + tokenizer ===
|
# === 🔧 Load model + tokenizer ===
|
||||||
model_id = "meta-llama/Llama-3.2-1B-Instruct"
|
model_id = "meta-llama/Llama-3.2-1B-Instruct"
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||||
pipe = pipeline(
|
pipe = pipeline(
|
||||||
@ -15,63 +15,76 @@ pipe = pipeline(
|
|||||||
tokenizer=tokenizer,
|
tokenizer=tokenizer,
|
||||||
torch_dtype=torch.bfloat16,
|
torch_dtype=torch.bfloat16,
|
||||||
device_map="auto",
|
device_map="auto",
|
||||||
pad_token_id=128001 # Prevents warning spam
|
pad_token_id=128001
|
||||||
)
|
)
|
||||||
|
|
||||||
# === 🧠 Core components ===
|
# === 🔌 Core modules ===
|
||||||
memory = Memory()
|
memory = Memory()
|
||||||
searcher = WebSearchHelper()
|
searcher = WebSearchHelper()
|
||||||
|
|
||||||
# === 🧭 System behavior prompt ===
|
# === 🧭 System behavior instruction ===
|
||||||
SYSTEM_PROMPT = """
|
SYSTEM_PROMPT = """
|
||||||
You are ক্ষমা (Kshama), Abu's personal AI assistant. You are insightful, methodical, and intentional.
|
You are personal AI assistant. You're wise, efficient, and intentional.
|
||||||
Capabilities:
|
|
||||||
- Recall useful information from persistent memory.
|
|
||||||
- Decide when a web search is truly necessary.
|
|
||||||
- Summarize web content when requested using clear language.
|
|
||||||
|
|
||||||
Protocols:
|
You can:
|
||||||
- To store new memory: ##MEM:add("...")
|
- Recall long-term memory and use it to answer.
|
||||||
- To request search: ##SEARCH:yes
|
- Summarize long documents clearly.
|
||||||
- If no search is needed: ##SEARCH:no
|
- Perform web search *only if you believe it's necessary*, and clearly state that with ##SEARCH:yes.
|
||||||
|
|
||||||
Be precise and only initiate web search when memory is insufficient. Don't guess. Use memory and web knowledge actively.
|
You also refine web search queries using what you understand of the user's intent.
|
||||||
|
Always follow this format:
|
||||||
|
- ##MEM:add("...") to add memories
|
||||||
|
- ##SEARCH:yes or ##SEARCH:no on its own line to trigger or skip web search
|
||||||
|
- After search: generate a clear answer, using memory and the retrieved summaries
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# === 📝 Summarizer using same model ===
|
# === 📘 Summarization using main model ===
|
||||||
def summarize_with_llama(text: str) -> str:
|
def summarize_with_llama(text: str) -> str:
|
||||||
prompt = f"Summarize the following content briefly:\n\n{text.strip()}\n\nSummary:"
|
prompt = f"Summarize the following:\n\n{text.strip()}\n\nSummary:"
|
||||||
output = pipe(prompt, max_new_tokens=256)
|
output = pipe(prompt, max_new_tokens=256)
|
||||||
return output[0]["generated_text"].replace(prompt, "").strip()
|
return output[0]["generated_text"].replace(prompt, "").strip()
|
||||||
|
|
||||||
# === 🔍 Check if agent requests web search ===
|
# === 🔍 Ask if search is needed ===
|
||||||
def should_search(user_input: str, mem_text: str, kb_text: str) -> bool:
|
def ask_should_search(user_input, mem_text, kb_text):
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "system", "content": SYSTEM_PROMPT},
|
{"role": "system", "content": SYSTEM_PROMPT},
|
||||||
{"role": "user", "content": f"User asked: {user_input}"},
|
{"role": "user", "content": f"User asked: {user_input}"},
|
||||||
{"role": "user", "content": f"Memory:\n{mem_text or '[None]'}"},
|
{"role": "user", "content": f"Memory:\n{mem_text or '[None]'}"},
|
||||||
{"role": "user", "content": f"Web Knowledge:\n{kb_text or '[None]'}"},
|
{"role": "user", "content": f"Web Knowledge:\n{kb_text or '[None]'}"},
|
||||||
{"role": "user", "content": "Should you search the web to answer this? Reply with ##SEARCH:yes or ##SEARCH:no only on the first line."}
|
{"role": "user", "content": "Do you need to search the web to answer this? Reply ##SEARCH:yes or ##SEARCH:no on the first line only."}
|
||||||
]
|
]
|
||||||
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
||||||
output = pipe(prompt, max_new_tokens=16, do_sample=False)
|
output = pipe(prompt, max_new_tokens=16)
|
||||||
reply = output[0]["generated_text"].strip().lower()
|
reply = output[0]["generated_text"].strip().lower()
|
||||||
print(output)
|
return reply.splitlines()[0].strip().__contains__("##SEARCH:yes")
|
||||||
return reply.splitlines()[0].strip() == "##SEARCH:yes"
|
|
||||||
|
|
||||||
# === 🧠 Main agent response handler ===
|
# === ✍️ Compose better search query ===
|
||||||
|
def compose_search_query(user_input, mem_text):
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": SYSTEM_PROMPT},
|
||||||
|
{"role": "user", "content": f"User asked: {user_input}"},
|
||||||
|
{"role": "user", "content": f"Relevant memory:\n{mem_text or '[None]'}"},
|
||||||
|
{"role": "user", "content": "Rewrite a concise web search query to find useful info. Output only the query string, nothing else."}
|
||||||
|
]
|
||||||
|
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
||||||
|
output = pipe(prompt, max_new_tokens=32)
|
||||||
|
return output[0]["generated_text"].strip().splitlines()[0]
|
||||||
|
|
||||||
|
# === 🧠 Main reasoning function ===
|
||||||
def generate_response(user_input: str):
|
def generate_response(user_input: str):
|
||||||
# Step 1: Retrieve memory and knowledgebase
|
# Step 1: Recall memory and web KB
|
||||||
mem_hits = memory.query(user_input, top_k=3)
|
mem_hits = memory.query(user_input, top_k=3)
|
||||||
mem_text = "\n".join([f"- {m}" for m in mem_hits])
|
mem_text = "\n".join([f"- {x}" for x in mem_hits])
|
||||||
|
|
||||||
_, kb_hits = searcher.query_kb(user_input, top_k=3)
|
_, kb_hits = searcher.query_kb(user_input)
|
||||||
kb_text = "\n".join([f"- {k['summary']}" for k in kb_hits])
|
kb_text = "\n".join([f"- {k['summary']}" for k in kb_hits])
|
||||||
|
|
||||||
# Step 2: Ask if search is needed
|
# Step 2: Ask model if search is truly required
|
||||||
if should_search(user_input, mem_text, kb_text):
|
if ask_should_search(user_input, mem_text, kb_text):
|
||||||
print("[🌐 Search Triggered]")
|
print("[🌐 Search Triggered]")
|
||||||
urls = searcher.search_duckduckgo(user_input)
|
search_query = compose_search_query(user_input, mem_text)
|
||||||
|
print(f"[🔎 Composed Query] {search_query}")
|
||||||
|
urls = searcher.search_duckduckgo(search_query)
|
||||||
summaries = searcher.crawl_and_summarize(urls, llm_function=summarize_with_llama)
|
summaries = searcher.crawl_and_summarize(urls, llm_function=summarize_with_llama)
|
||||||
searcher.add_to_kb(summaries)
|
searcher.add_to_kb(summaries)
|
||||||
_, kb_hits = searcher.query_kb(user_input)
|
_, kb_hits = searcher.query_kb(user_input)
|
||||||
@ -79,7 +92,7 @@ def generate_response(user_input: str):
|
|||||||
else:
|
else:
|
||||||
print("[🔒 Search Skipped]")
|
print("[🔒 Search Skipped]")
|
||||||
|
|
||||||
# Step 3: Generate final answer
|
# Step 3: Final answer generation
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "system", "content": SYSTEM_PROMPT},
|
{"role": "system", "content": SYSTEM_PROMPT},
|
||||||
{"role": "user", "content": user_input},
|
{"role": "user", "content": user_input},
|
||||||
@ -87,31 +100,29 @@ def generate_response(user_input: str):
|
|||||||
{"role": "user", "content": f"Web Knowledge:\n{kb_text or '[None]'}"}
|
{"role": "user", "content": f"Web Knowledge:\n{kb_text or '[None]'}"}
|
||||||
]
|
]
|
||||||
full_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
full_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
output = pipe(full_prompt, max_new_tokens=512)
|
output = pipe(full_prompt, max_new_tokens=512)
|
||||||
elapsed = time.time() - start
|
elapsed = time.time() - start
|
||||||
response = output[0]["generated_text"].replace(full_prompt, "").strip()
|
response = output[0]["generated_text"].replace(full_prompt, "").strip()
|
||||||
|
|
||||||
# Step 4: Store memory if requested
|
|
||||||
if "##MEM:add(" in response:
|
if "##MEM:add(" in response:
|
||||||
try:
|
try:
|
||||||
content = response.split("##MEM:add(")[1].split(")")[0].strip('"\'')
|
content = response.split("##MEM:add(")[1].split(")")[0].strip('"\'')
|
||||||
memory.add(content)
|
memory.add(content)
|
||||||
print("[✅ Memory Added]")
|
print("[✅ Memory Added]")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[⚠️ Could not parse memory]: {e}")
|
print(f"[⚠️ Failed to add memory]: {e}")
|
||||||
|
|
||||||
return response, elapsed
|
return response, elapsed
|
||||||
|
|
||||||
# === 👂 Main loop ===
|
# === 💬 REPL Loop ===
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print(f"🚀 Kshama ready in {time.time() - begin_time:.2f}s")
|
print(f"🚀 Kshama ready in {time.time() - begin_time:.2f}s")
|
||||||
print("👋 Hello, Abu. Type 'exit' to quit.")
|
print("👋 Hello, Abu. Type 'exit' to quit.")
|
||||||
while True:
|
while True:
|
||||||
user_input = input("\n🧑 You: ")
|
user_input = input("\n🧑 You: ")
|
||||||
if user_input.strip().lower() in ["exit", "quit"]:
|
if user_input.strip().lower() in ["exit", "quit"]:
|
||||||
print("👋 Farewell.")
|
print("👋 Goodbye.")
|
||||||
break
|
break
|
||||||
response, delay = generate_response(user_input)
|
response, delay = generate_response(user_input)
|
||||||
print(f"\n🤖 ক্ষমা [{delay:.2f}s]: {response}")
|
print(f"\n🤖 ক্ষমা [{delay:.2f}s]: {response}")
|
||||||
|
|||||||
@ -29,9 +29,11 @@ class Memory:
|
|||||||
self._save()
|
self._save()
|
||||||
|
|
||||||
def query(self, text, top_k=5):
|
def query(self, text, top_k=5):
|
||||||
|
if self.index.ntotal == 0:
|
||||||
|
return []
|
||||||
vec = embedder.encode([text])
|
vec = embedder.encode([text])
|
||||||
D, I = self.index.search(vec, top_k)
|
D, I = self.index.search(vec, top_k)
|
||||||
return [self.metadata[i]["text"] for i in I[0]]
|
return [self.metadata[i]["text"] for i in I[0] if 0 <= i < len(self.metadata)]
|
||||||
|
|
||||||
def _save(self):
|
def _save(self):
|
||||||
faiss.write_index(self.index, self.index_path)
|
faiss.write_index(self.index, self.index_path)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user