From 4b5921d82935c08ed6cc699d3bf92bc92bcb0c54 Mon Sep 17 00:00:00 2001 From: sufian Date: Sun, 29 Jun 2025 20:49:04 +0600 Subject: [PATCH] Initial model codes --- .gitignore | 4 ++ agent.py | 108 +++++++++++++++++++++++++++++++++++++++++++ llama-3.2.py | 26 +++++++++++ llm_wrapper.py | 21 +++++++++ memory.py | 39 ++++++++++++++++ minimax-m1.py | 10 ++++ qwen.py | 9 ++++ test_memory.py | 18 ++++++++ timed_chat.py | 41 ++++++++++++++++ web_search_helper.py | 89 +++++++++++++++++++++++++++++++++++ 10 files changed, 365 insertions(+) create mode 100644 .gitignore create mode 100644 agent.py create mode 100644 llama-3.2.py create mode 100644 llm_wrapper.py create mode 100644 memory.py create mode 100644 minimax-m1.py create mode 100644 qwen.py create mode 100644 test_memory.py create mode 100644 timed_chat.py create mode 100644 web_search_helper.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..dee6d77 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.venv/ +*.index +*.pkl +.idea/ \ No newline at end of file diff --git a/agent.py b/agent.py new file mode 100644 index 0000000..b941c6f --- /dev/null +++ b/agent.py @@ -0,0 +1,108 @@ +import time +begin_time = time.time() + +import torch +from transformers import pipeline, AutoTokenizer +from memory import Memory +from web_search_helper import WebSearchHelper +from llm_wrapper import LlmWrapper + +# Initialize components +memory = Memory() +searcher = WebSearchHelper() +summarizer = LlmWrapper(model_name="Qwen/Qwen3-0.6B") # optional, could summarize search results + +tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct") +# Load your main LLM (Llama 3.2:1B-Instruct) +model_id = "meta-llama/Llama-3.2-1B-Instruct" +pipe = pipeline( + "text-generation", + model=model_id, + torch_dtype=torch.bfloat16, + device_map="auto", +) + +# Define system prompt and Kshama's capabilities +SYSTEM_PROMPT = """ +You are ক্ষমা, Abu's personal AI assistant. You're helpful, respectful, and aligned with his goals and preferences. +You maintain and query a persistent memory of past interactions and facts via a vector store. +You can: +1. Recall relevant knowledge from memory using semantic similarity. +2. Add new insights to memory when useful. +3. Perform live web searches and summarize results if memory is insufficient. +Structure your outputs clearly: +- Use ##MEM:add(...) to store thoughts to memory. +- Use ##MEM:recall(...) to request a lookup (already handled externally). +- Use ##SEARCH:trigger(...) when memory lacks the answer. +Respond in clear, friendly tone. Actively use what you know about Abu’s past work (e.g., GANs, TensorFlow, Exopid). +""" + +def generate_response(user_input: str): + # Step 1: Recall relevant memory + recalled = memory.query(user_input, top_k=3) + memory_context = "\n".join([f"- {item}" for item in recalled]) + + # Step 2: Evaluate recall quality + should_search = searcher.should_trigger_search(text=user_input) + kb_hits = "" + if should_search: + urls = searcher.search_duckduckgo(user_input) + summaries = searcher.crawl_and_summarize(urls, llm_function=summarizer.summarize) + searcher.add_to_kb(summaries) + _, hits = searcher.query_kb(user_input) + kb_hits = "\n".join([f"- {h['summary']}" for h in hits]) + else: + _, hits = searcher.query_kb(user_input) + kb_hits = "\n".join([f"- {h['summary']}" for h in hits]) + + # Step 3: Compose structured messages + context_block = f"""Known facts from memory: + {memory_context or '[None]'} + + External knowledge from web: + {kb_hits or '[None]'} + """ + + messages = [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": f"{context_block}\nUser asked: {user_input}"}, + ] + + # Convert using chat template + prompt = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True # appends assistant tag if needed + ) + + # Step 4: Call the model + output = pipe( + prompt, + max_new_tokens=512, + do_sample=True, + temperature=0.7, + ) + + # Step 5: Process model output (add to memory if marked) + response = output[0]["generated_text"].strip() + + if "##MEM:add(" in response: + try: + content = response.split("##MEM:add(")[1].split(")")[0].strip('"\'') + memory.add(content) + print("[✅ Memory Added]") + except: + print("[⚠️ Couldn't parse memory add]") + + return response + + +# 💬 REPL for testing +if __name__ == "__main__": + print(f"Time elapsed: {time.time() - begin_time:.2f} seconds") + print("👋 Welcome to Kshama. Type 'exit' to leave.") + while True: + user_input = input("\n🧑 You: ") + if user_input.strip().lower() in ["exit", "quit"]: break + response = generate_response(user_input) + print(f"\n🤖 ক্ষমা: {response}") diff --git a/llama-3.2.py b/llama-3.2.py new file mode 100644 index 0000000..f39ea82 --- /dev/null +++ b/llama-3.2.py @@ -0,0 +1,26 @@ +import time +begin_time = time.time() + +import torch +from transformers import pipeline + +model_id = "meta-llama/Llama-3.2-1B-Instruct" +pipe = pipeline( + "text-generation", + model=model_id, + torch_dtype=torch.bfloat16, + device_map="auto", +) +messages = [ + {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"}, + {"role": "user", "content": "Who are you?"}, +] +outputs = pipe( + messages, + max_new_tokens=256, +) + +print(f"Time elapsed: {time.time() - begin_time:.2f} seconds") + +#print(outputs[0]["generated_text"][-1]) +print(outputs) diff --git a/llm_wrapper.py b/llm_wrapper.py new file mode 100644 index 0000000..891ab9d --- /dev/null +++ b/llm_wrapper.py @@ -0,0 +1,21 @@ +from transformers import pipeline + +class LlmWrapper: + def __init__(self, model_name="Qwen/Qwen3-0.6B", max_new_tokens=256): + self.model_name = model_name + self.pipe = pipeline("text-generation", model=model_name) + self.max_tokens = max_new_tokens + + def summarize(self, text: str, prompt_template=None) -> str: + # Default to a lightweight summarization instruction + prompt = ( + prompt_template or + f"Summarize the following content briefly:\n\n{text.strip()}\n\nSummary:" + ) + + messages = [{"role": "user", "content": prompt}] + try: + outputs = self.pipe(messages, max_new_tokens=self.max_tokens) + return outputs[0]["generated_text"].strip() + except Exception as e: + return f"[LLM ERROR]: {e}" diff --git a/memory.py b/memory.py new file mode 100644 index 0000000..f58a746 --- /dev/null +++ b/memory.py @@ -0,0 +1,39 @@ +import faiss +import os +import pickle +from sentence_transformers import SentenceTransformer + +# Embedder: small model is fast & good enough +embedder = SentenceTransformer("all-MiniLM-L6-v2") # Replaceable + +class Memory: + def __init__(self, index_path="kshama.index", metadata_path="memory_meta.pkl"): + self.index_path = index_path + self.metadata_path = metadata_path + self.index = None + self.metadata = [] + self._load() + + def _load(self): + if os.path.exists(self.index_path): + self.index = faiss.read_index(self.index_path) + with open(self.metadata_path, "rb") as f: + self.metadata = pickle.load(f) + else: + self.index = faiss.IndexFlatL2(384) # Depends on embedder output dim + + def add(self, text, tags=None): + vec = embedder.encode([text]) + self.index.add(vec) + self.metadata.append({"text": text, "tags": tags or []}) + self._save() + + def query(self, text, top_k=5): + vec = embedder.encode([text]) + D, I = self.index.search(vec, top_k) + return [self.metadata[i]["text"] for i in I[0]] + + def _save(self): + faiss.write_index(self.index, self.index_path) + with open(self.metadata_path, "wb") as f: + pickle.dump(self.metadata, f) diff --git a/minimax-m1.py b/minimax-m1.py new file mode 100644 index 0000000..d09015a --- /dev/null +++ b/minimax-m1.py @@ -0,0 +1,10 @@ +# Use a pipeline as a high-level helper +from transformers import pipeline + +pipe = pipeline("text-generation", model="MiniMaxAI/MiniMax-M1-40k", trust_remote_code=True) +messages = [ + {"role": "user", "content": "Who are you?"}, +] +output = pipe(messages) + +print(output) \ No newline at end of file diff --git a/qwen.py b/qwen.py new file mode 100644 index 0000000..99b413a --- /dev/null +++ b/qwen.py @@ -0,0 +1,9 @@ +# Use a pipeline as a high-level helper +from transformers import pipeline + +pipe = pipeline("text-generation", model="Qwen/Qwen3-0.6B") +messages = [ + {"role": "user", "content": "Who are you?"}, +] +output = pipe(messages) +print(output[0]["generated_text"][-1]) \ No newline at end of file diff --git a/test_memory.py b/test_memory.py new file mode 100644 index 0000000..07e444a --- /dev/null +++ b/test_memory.py @@ -0,0 +1,18 @@ +from memory import Memory + +# Initialize the memory store +memory = Memory() + +# Add some sample thoughts +memory.add("Abu prefers concise Bash scripts for automating Exopid backups.", tags=["Abu", "preference"]) +memory.add("Kshama is the name of Abu's AI agent.", tags=["identity", "name"]) +memory.add("Abu is exploring GANs and TensorFlow with RTX 3050.", tags=["tech", "gpu", "project"]) + +# Query the memory +query = "What tools is Abu experimenting with?" +results = memory.query(query) + +# Display results +print("\n🔎 Query Results:") +for i, entry in enumerate(results, 1): + print(f"{i}. {entry}") diff --git a/timed_chat.py b/timed_chat.py new file mode 100644 index 0000000..4c6ff77 --- /dev/null +++ b/timed_chat.py @@ -0,0 +1,41 @@ +import time +load_start_time = time.time() + +from transformers import pipeline + +# Initialize your model +model_id = "meta-llama/Llama-3.2-1B-Instruct" +pipe = pipeline( + "text-generation", + model=model_id, + torch_dtype="auto", # or torch.bfloat16 if your GPU supports it + device_map="auto", + pad_token_id=128001 # same as eos_token_id +) + +# System prompt (optional) +SYSTEM_PROMPT = "You are a helpful assistant. Keep responses brief and clear." + +print(f"Time elapsed: {time.time() - load_start_time:.2f} seconds") +print("👋 Kshama is listening. Type 'exit' to quit.\n") + +while True: + user_input = input("🧑 You: ") + if user_input.strip().lower() == "exit": + print("👋 Goodbye!") + break + + prompt = f"<|system|>\n{SYSTEM_PROMPT}\n<|user|>\n{user_input}\n<|assistant|>\n" + + start_time = time.time() + output = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7) + elapsed = time.time() - start_time + + if isinstance(output[0], dict) and "generated_text" in output[0]: + response = output[0]["generated_text"].replace(prompt, "").strip() + elif isinstance(output[0], str): + response = output[0].replace(prompt, "").strip() + else: + response = str(output[0]) + + print(f"🤖 ক্ষমা [{elapsed:.2f}s]: {response}\n") diff --git a/web_search_helper.py b/web_search_helper.py new file mode 100644 index 0000000..8c190ee --- /dev/null +++ b/web_search_helper.py @@ -0,0 +1,89 @@ +import requests +import faiss +import pickle +import os +from bs4 import BeautifulSoup +from urllib.parse import quote +from sentence_transformers import SentenceTransformer +from urllib.parse import parse_qs, urlparse, unquote + +class WebSearchHelper: + def __init__(self, kb_path="web_kb.index", meta_path="web_kb_meta.pkl"): + self.embedder = SentenceTransformer("all-MiniLM-L6-v2") + self.kb_path = kb_path + self.meta_path = meta_path + self.meta = [] + self.index = None + self._load_index() + + def _load_index(self): + if os.path.exists(self.kb_path): + self.index = faiss.read_index(self.kb_path) + with open(self.meta_path, "rb") as f: + self.meta = pickle.load(f) + else: + self.index = faiss.IndexFlatL2(384) + + def _save_index(self): + faiss.write_index(self.index, self.kb_path) + with open(self.meta_path, "wb") as f: + pickle.dump(self.meta, f) + + def search_duckduckgo(self, query, num=5): + results = [] + q = quote(query) + headers = {'User-Agent': 'Mozilla/5.0'} + url = f"https://lite.duckduckgo.com/lite?q={q}" + print(url) + res = requests.get(url, headers=headers) + soup = BeautifulSoup(res.text, "html.parser") + links = soup.find_all("a", href=True) + for link in links[:num]: + parsed = urlparse(link['href']) + if parsed.path.startswith("/l/"): + qs = parse_qs(parsed.query) + actual_url = unquote(qs.get("uddg", [""])[0]) + if actual_url: + results.append(actual_url) + #results.append(link['href']) + return results + + def crawl_and_summarize(self, urls, llm_function): + summaries = [] + for url in urls: + try: + print(f"[crawling] {url}") + html = requests.get(url, timeout=5).text + text = BeautifulSoup(html, "html.parser").get_text() + clean = ' '.join(text.strip().split()[:1000]) # truncate + summary = llm_function(clean) + summaries.append((url, summary)) + except Exception as e: + print(f"[crawl error] {url} -> {e}") + return summaries + + def add_to_kb(self, summaries): + for url, content in summaries: + vec = self.embedder.encode([content]) + self.index.add(vec) + self.meta.append({"url": url, "summary": content}) + self._save_index() + + def query_kb(self, text, top_k=3): + if self.index.ntotal == 0: + return [], [] + vec = self.embedder.encode([text]) + D, I = self.index.search(vec, top_k) + results = [self.meta[i] for i in I[0] if i < len(self.meta)] + return D[0], results + + def should_trigger_search(self, score_threshold=0.7, text=""): + if self.index.ntotal == 0: + return True + + scores, _ = self.query_kb(text, top_k=1) + if not scores or len(scores) == 0: + return True + + return scores[0] > 1.0 or scores[0] < (1 - score_threshold) +