From 4b5921d82935c08ed6cc699d3bf92bc92bcb0c54 Mon Sep 17 00:00:00 2001
From: sufian <sufian.milon@outlook.com>
Date: Sun, 29 Jun 2025 20:49:04 +0600
Subject: [PATCH] Initial model codes

---
 .gitignore           |   4 ++
 agent.py             | 108 +++++++++++++++++++++++++++++++++++++++++++
 llama-3.2.py         |  26 +++++++++++
 llm_wrapper.py       |  21 +++++++++
 memory.py            |  39 ++++++++++++++++
 minimax-m1.py        |  10 ++++
 qwen.py              |   9 ++++
 test_memory.py       |  18 ++++++++
 timed_chat.py        |  41 ++++++++++++++++
 web_search_helper.py |  89 +++++++++++++++++++++++++++++++++++
 10 files changed, 365 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 agent.py
 create mode 100644 llama-3.2.py
 create mode 100644 llm_wrapper.py
 create mode 100644 memory.py
 create mode 100644 minimax-m1.py
 create mode 100644 qwen.py
 create mode 100644 test_memory.py
 create mode 100644 timed_chat.py
 create mode 100644 web_search_helper.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..dee6d77
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+.venv/
+*.index
+*.pkl
+.idea/
\ No newline at end of file
diff --git a/agent.py b/agent.py
new file mode 100644
index 0000000..b941c6f
--- /dev/null
+++ b/agent.py
@@ -0,0 +1,108 @@
+import time
+begin_time = time.time()
+
+import torch
+from transformers import pipeline, AutoTokenizer
+from memory import Memory
+from web_search_helper import WebSearchHelper
+from llm_wrapper import LlmWrapper
+
+# Initialize components
+memory = Memory()
+searcher = WebSearchHelper()
+summarizer = LlmWrapper(model_name="Qwen/Qwen3-0.6B")  # optional, could summarize search results
+
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")
+# Load your main LLM (Llama 3.2:1B-Instruct)
+model_id = "meta-llama/Llama-3.2-1B-Instruct"
+pipe = pipeline(
+    "text-generation",
+    model=model_id,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+)
+
+# Define system prompt and Kshama's capabilities
+SYSTEM_PROMPT = """
+You are ক্ষমা, Abu's personal AI assistant. You're helpful, respectful, and aligned with his goals and preferences.
+You maintain and query a persistent memory of past interactions and facts via a vector store.
+You can:
+1. Recall relevant knowledge from memory using semantic similarity.
+2. Add new insights to memory when useful.
+3. Perform live web searches and summarize results if memory is insufficient.
+Structure your outputs clearly:
+- Use ##MEM:add(...) to store thoughts to memory.
+- Use ##MEM:recall(...) to request a lookup (already handled externally).
+- Use ##SEARCH:trigger(...) when memory lacks the answer.
+Respond in clear, friendly tone. Actively use what you know about Abu’s past work (e.g., GANs, TensorFlow, Exopid).
+"""
+
+def generate_response(user_input: str):
+    # Step 1: Recall relevant memory
+    recalled = memory.query(user_input, top_k=3)
+    memory_context = "\n".join([f"- {item}" for item in recalled])
+
+    # Step 2: Evaluate recall quality
+    should_search = searcher.should_trigger_search(text=user_input)
+    kb_hits = ""
+    if should_search:
+        urls = searcher.search_duckduckgo(user_input)
+        summaries = searcher.crawl_and_summarize(urls, llm_function=summarizer.summarize)
+        searcher.add_to_kb(summaries)
+        _, hits = searcher.query_kb(user_input)
+        kb_hits = "\n".join([f"- {h['summary']}" for h in hits])
+    else:
+        _, hits = searcher.query_kb(user_input)
+        kb_hits = "\n".join([f"- {h['summary']}" for h in hits])
+
+    # Step 3: Compose structured messages
+    context_block = f"""Known facts from memory:
+        {memory_context or '[None]'}
+        
+        External knowledge from web:
+        {kb_hits or '[None]'}
+        """
+
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user", "content": f"{context_block}\nUser asked: {user_input}"},
+    ]
+
+    # Convert using chat template
+    prompt = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True  # appends assistant tag if needed
+    )
+
+    # Step 4: Call the model
+    output = pipe(
+        prompt,
+        max_new_tokens=512,
+        do_sample=True,
+        temperature=0.7,
+    )
+
+    # Step 5: Process model output (add to memory if marked)
+    response = output[0]["generated_text"].strip()
+
+    if "##MEM:add(" in response:
+        try:
+            content = response.split("##MEM:add(")[1].split(")")[0].strip('"\'')
+            memory.add(content)
+            print("[✅ Memory Added]")
+        except:
+            print("[⚠️ Couldn't parse memory add]")
+
+    return response
+
+
+# 💬 REPL for testing
+if __name__ == "__main__":
+    print(f"Time elapsed: {time.time() - begin_time:.2f} seconds")
+    print("👋 Welcome to Kshama. Type 'exit' to leave.")
+    while True:
+        user_input = input("\n🧑 You: ")
+        if user_input.strip().lower() in ["exit", "quit"]: break
+        response = generate_response(user_input)
+        print(f"\n🤖 ক্ষমা: {response}")
diff --git a/llama-3.2.py b/llama-3.2.py
new file mode 100644
index 0000000..f39ea82
--- /dev/null
+++ b/llama-3.2.py
@@ -0,0 +1,26 @@
+import time
+begin_time = time.time()
+
+import torch
+from transformers import pipeline
+
+model_id = "meta-llama/Llama-3.2-1B-Instruct"
+pipe = pipeline(
+    "text-generation",
+    model=model_id,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+)
+messages = [
+    {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
+    {"role": "user", "content": "Who are you?"},
+]
+outputs = pipe(
+    messages,
+    max_new_tokens=256,
+)
+
+print(f"Time elapsed: {time.time() - begin_time:.2f} seconds")
+
+#print(outputs[0]["generated_text"][-1])
+print(outputs)
diff --git a/llm_wrapper.py b/llm_wrapper.py
new file mode 100644
index 0000000..891ab9d
--- /dev/null
+++ b/llm_wrapper.py
@@ -0,0 +1,21 @@
+from transformers import pipeline
+
+class LlmWrapper:
+    def __init__(self, model_name="Qwen/Qwen3-0.6B", max_new_tokens=256):
+        self.model_name = model_name
+        self.pipe = pipeline("text-generation", model=model_name)
+        self.max_tokens = max_new_tokens
+
+    def summarize(self, text: str, prompt_template=None) -> str:
+        # Default to a lightweight summarization instruction
+        prompt = (
+            prompt_template or
+            f"Summarize the following content briefly:\n\n{text.strip()}\n\nSummary:"
+        )
+
+        messages = [{"role": "user", "content": prompt}]
+        try:
+            outputs = self.pipe(messages, max_new_tokens=self.max_tokens)
+            return outputs[0]["generated_text"].strip()
+        except Exception as e:
+            return f"[LLM ERROR]: {e}"
diff --git a/memory.py b/memory.py
new file mode 100644
index 0000000..f58a746
--- /dev/null
+++ b/memory.py
@@ -0,0 +1,39 @@
+import faiss
+import os
+import pickle
+from sentence_transformers import SentenceTransformer
+
+# Embedder: small model is fast & good enough
+embedder = SentenceTransformer("all-MiniLM-L6-v2")  # Replaceable
+
+class Memory:
+    def __init__(self, index_path="kshama.index", metadata_path="memory_meta.pkl"):
+        self.index_path = index_path
+        self.metadata_path = metadata_path
+        self.index = None
+        self.metadata = []
+        self._load()
+
+    def _load(self):
+        if os.path.exists(self.index_path):
+            self.index = faiss.read_index(self.index_path)
+            with open(self.metadata_path, "rb") as f:
+                self.metadata = pickle.load(f)
+        else:
+            self.index = faiss.IndexFlatL2(384)  # Depends on embedder output dim
+
+    def add(self, text, tags=None):
+        vec = embedder.encode([text])
+        self.index.add(vec)
+        self.metadata.append({"text": text, "tags": tags or []})
+        self._save()
+
+    def query(self, text, top_k=5):
+        vec = embedder.encode([text])
+        D, I = self.index.search(vec, top_k)
+        return [self.metadata[i]["text"] for i in I[0]]
+
+    def _save(self):
+        faiss.write_index(self.index, self.index_path)
+        with open(self.metadata_path, "wb") as f:
+            pickle.dump(self.metadata, f)
diff --git a/minimax-m1.py b/minimax-m1.py
new file mode 100644
index 0000000..d09015a
--- /dev/null
+++ b/minimax-m1.py
@@ -0,0 +1,10 @@
+# Use a pipeline as a high-level helper
+from transformers import pipeline
+
+pipe = pipeline("text-generation", model="MiniMaxAI/MiniMax-M1-40k", trust_remote_code=True)
+messages = [
+    {"role": "user", "content": "Who are you?"},
+]
+output = pipe(messages)
+
+print(output)
\ No newline at end of file
diff --git a/qwen.py b/qwen.py
new file mode 100644
index 0000000..99b413a
--- /dev/null
+++ b/qwen.py
@@ -0,0 +1,9 @@
+# Use a pipeline as a high-level helper
+from transformers import pipeline
+
+pipe = pipeline("text-generation", model="Qwen/Qwen3-0.6B")
+messages = [
+    {"role": "user", "content": "Who are you?"},
+]
+output = pipe(messages)
+print(output[0]["generated_text"][-1])
\ No newline at end of file
diff --git a/test_memory.py b/test_memory.py
new file mode 100644
index 0000000..07e444a
--- /dev/null
+++ b/test_memory.py
@@ -0,0 +1,18 @@
+from memory import Memory
+
+# Initialize the memory store
+memory = Memory()
+
+# Add some sample thoughts
+memory.add("Abu prefers concise Bash scripts for automating Exopid backups.", tags=["Abu", "preference"])
+memory.add("Kshama is the name of Abu's AI agent.", tags=["identity", "name"])
+memory.add("Abu is exploring GANs and TensorFlow with RTX 3050.", tags=["tech", "gpu", "project"])
+
+# Query the memory
+query = "What tools is Abu experimenting with?"
+results = memory.query(query)
+
+# Display results
+print("\n🔎 Query Results:")
+for i, entry in enumerate(results, 1):
+    print(f"{i}. {entry}")
diff --git a/timed_chat.py b/timed_chat.py
new file mode 100644
index 0000000..4c6ff77
--- /dev/null
+++ b/timed_chat.py
@@ -0,0 +1,41 @@
+import time
+load_start_time = time.time()
+
+from transformers import pipeline
+
+# Initialize your model
+model_id = "meta-llama/Llama-3.2-1B-Instruct"
+pipe = pipeline(
+    "text-generation",
+    model=model_id,
+    torch_dtype="auto",  # or torch.bfloat16 if your GPU supports it
+    device_map="auto",
+    pad_token_id=128001  # same as eos_token_id
+)
+
+# System prompt (optional)
+SYSTEM_PROMPT = "You are a helpful assistant. Keep responses brief and clear."
+
+print(f"Time elapsed: {time.time() - load_start_time:.2f} seconds")
+print("👋 Kshama is listening. Type 'exit' to quit.\n")
+
+while True:
+    user_input = input("🧑 You: ")
+    if user_input.strip().lower() == "exit":
+        print("👋 Goodbye!")
+        break
+
+    prompt = f"<|system|>\n{SYSTEM_PROMPT}\n<|user|>\n{user_input}\n<|assistant|>\n"
+
+    start_time = time.time()
+    output = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)
+    elapsed = time.time() - start_time
+
+    if isinstance(output[0], dict) and "generated_text" in output[0]:
+        response = output[0]["generated_text"].replace(prompt, "").strip()
+    elif isinstance(output[0], str):
+        response = output[0].replace(prompt, "").strip()
+    else:
+        response = str(output[0])
+
+    print(f"🤖 ক্ষমা [{elapsed:.2f}s]: {response}\n")
diff --git a/web_search_helper.py b/web_search_helper.py
new file mode 100644
index 0000000..8c190ee
--- /dev/null
+++ b/web_search_helper.py
@@ -0,0 +1,89 @@
+import requests
+import faiss
+import pickle
+import os
+from bs4 import BeautifulSoup
+from urllib.parse import quote
+from sentence_transformers import SentenceTransformer
+from urllib.parse import parse_qs, urlparse, unquote
+
+class WebSearchHelper:
+    def __init__(self, kb_path="web_kb.index", meta_path="web_kb_meta.pkl"):
+        self.embedder = SentenceTransformer("all-MiniLM-L6-v2")
+        self.kb_path = kb_path
+        self.meta_path = meta_path
+        self.meta = []
+        self.index = None
+        self._load_index()
+
+    def _load_index(self):
+        if os.path.exists(self.kb_path):
+            self.index = faiss.read_index(self.kb_path)
+            with open(self.meta_path, "rb") as f:
+                self.meta = pickle.load(f)
+        else:
+            self.index = faiss.IndexFlatL2(384)
+
+    def _save_index(self):
+        faiss.write_index(self.index, self.kb_path)
+        with open(self.meta_path, "wb") as f:
+            pickle.dump(self.meta, f)
+
+    def search_duckduckgo(self, query, num=5):
+        results = []
+        q = quote(query)
+        headers = {'User-Agent': 'Mozilla/5.0'}
+        url = f"https://lite.duckduckgo.com/lite?q={q}"
+        print(url)
+        res = requests.get(url, headers=headers)
+        soup = BeautifulSoup(res.text, "html.parser")
+        links = soup.find_all("a", href=True)
+        for link in links[:num]:
+            parsed = urlparse(link['href'])
+            if parsed.path.startswith("/l/"):
+                qs = parse_qs(parsed.query)
+                actual_url = unquote(qs.get("uddg", [""])[0])
+                if actual_url:
+                    results.append(actual_url)
+            #results.append(link['href'])
+        return results
+
+    def crawl_and_summarize(self, urls, llm_function):
+        summaries = []
+        for url in urls:
+            try:
+                print(f"[crawling] {url}")
+                html = requests.get(url, timeout=5).text
+                text = BeautifulSoup(html, "html.parser").get_text()
+                clean = ' '.join(text.strip().split()[:1000])  # truncate
+                summary = llm_function(clean)
+                summaries.append((url, summary))
+            except Exception as e:
+                print(f"[crawl error] {url} -> {e}")
+        return summaries
+
+    def add_to_kb(self, summaries):
+        for url, content in summaries:
+            vec = self.embedder.encode([content])
+            self.index.add(vec)
+            self.meta.append({"url": url, "summary": content})
+        self._save_index()
+
+    def query_kb(self, text, top_k=3):
+        if self.index.ntotal == 0:
+            return [], []
+        vec = self.embedder.encode([text])
+        D, I = self.index.search(vec, top_k)
+        results = [self.meta[i] for i in I[0] if i < len(self.meta)]
+        return D[0], results
+
+    def should_trigger_search(self, score_threshold=0.7, text=""):
+        if self.index.ntotal == 0:
+            return True
+
+        scores, _ = self.query_kb(text, top_k=1)
+        if not scores or len(scores) == 0:
+            return True
+
+        return scores[0] > 1.0 or scores[0] < (1 - score_threshold)
+