Initial model codes

2025-06-29 20:49:04 +06:00 · 2025-06-29 20:49:04 +06:00 · 4b5921d829
commit 4b5921d829
10 changed files with 365 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,4 @@
 .venv/
 *.index
 *.pkl
 .idea/
--- a/agent.py
+++ b/agent.py
@ -0,0 +1,108 @@
 import time
 begin_time = time.time()
 import torch
 from transformers import pipeline, AutoTokenizer
 from memory import Memory
 from web_search_helper import WebSearchHelper
 from llm_wrapper import LlmWrapper
 # Initialize components
 memory = Memory()
 searcher = WebSearchHelper()
 summarizer = LlmWrapper(model_name="Qwen/Qwen3-0.6B")  # optional, could summarize search results
 tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")
 # Load your main LLM (Llama 3.2:1B-Instruct)
 model_id = "meta-llama/Llama-3.2-1B-Instruct"
 pipe = pipeline(
    "text-generation",
    model=model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
 )
 # Define system prompt and Kshama's capabilities
 SYSTEM_PROMPT = """
 You are ক্ষমা, Abu's personal AI assistant. You're helpful, respectful, and aligned with his goals and preferences.
 You maintain and query a persistent memory of past interactions and facts via a vector store.
 You can:
 1. Recall relevant knowledge from memory using semantic similarity.
 2. Add new insights to memory when useful.
 3. Perform live web searches and summarize results if memory is insufficient.
 Structure your outputs clearly:
 - Use ##MEM:add(...) to store thoughts to memory.
 - Use ##MEM:recall(...) to request a lookup (already handled externally).
 - Use ##SEARCH:trigger(...) when memory lacks the answer.
 Respond in clear, friendly tone. Actively use what you know about Abu’s past work (e.g., GANs, TensorFlow, Exopid).
 """
 def generate_response(user_input: str):
    # Step 1: Recall relevant memory
    recalled = memory.query(user_input, top_k=3)
    memory_context = "\n".join([f"- {item}" for item in recalled])
    # Step 2: Evaluate recall quality
    should_search = searcher.should_trigger_search(text=user_input)
    kb_hits = ""
    if should_search:
        urls = searcher.search_duckduckgo(user_input)
        summaries = searcher.crawl_and_summarize(urls, llm_function=summarizer.summarize)
        searcher.add_to_kb(summaries)
        _, hits = searcher.query_kb(user_input)
        kb_hits = "\n".join([f"- {h['summary']}" for h in hits])
    else:
        _, hits = searcher.query_kb(user_input)
        kb_hits = "\n".join([f"- {h['summary']}" for h in hits])
    # Step 3: Compose structured messages
    context_block = f"""Known facts from memory:
        {memory_context or '[None]'}
        External knowledge from web:
        {kb_hits or '[None]'}
        """
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": f"{context_block}\nUser asked: {user_input}"},
    ]
    # Convert using chat template
    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True  # appends assistant tag if needed
    )
    # Step 4: Call the model
    output = pipe(
        prompt,
        max_new_tokens=512,
        do_sample=True,
        temperature=0.7,
    )
    # Step 5: Process model output (add to memory if marked)
    response = output[0]["generated_text"].strip()
    if "##MEM:add(" in response:
        try:
            content = response.split("##MEM:add(")[1].split(")")[0].strip('"\'')
            memory.add(content)
            print("[✅ Memory Added]")
        except:
            print("[⚠️ Couldn't parse memory add]")
    return response
 # 💬 REPL for testing
 if __name__ == "__main__":
    print(f"Time elapsed: {time.time() - begin_time:.2f} seconds")
    print("👋 Welcome to Kshama. Type 'exit' to leave.")
    while True:
        user_input = input("\n🧑 You: ")
        if user_input.strip().lower() in ["exit", "quit"]: break
        response = generate_response(user_input)
        print(f"\n🤖 ক্ষমা: {response}")
--- a/llama-3.2.py
+++ b/llama-3.2.py
@ -0,0 +1,26 @@
 import time
 begin_time = time.time()
 import torch
 from transformers import pipeline
 model_id = "meta-llama/Llama-3.2-1B-Instruct"
 pipe = pipeline(
    "text-generation",
    model=model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
 )
 messages = [
    {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
    {"role": "user", "content": "Who are you?"},
 ]
 outputs = pipe(
    messages,
    max_new_tokens=256,
 )
 print(f"Time elapsed: {time.time() - begin_time:.2f} seconds")
 #print(outputs[0]["generated_text"][-1])
 print(outputs)
--- a/llm_wrapper.py
+++ b/llm_wrapper.py
@ -0,0 +1,21 @@
 from transformers import pipeline
 class LlmWrapper:
    def __init__(self, model_name="Qwen/Qwen3-0.6B", max_new_tokens=256):
        self.model_name = model_name
        self.pipe = pipeline("text-generation", model=model_name)
        self.max_tokens = max_new_tokens
    def summarize(self, text: str, prompt_template=None) -> str:
        # Default to a lightweight summarization instruction
        prompt = (
            prompt_template or
            f"Summarize the following content briefly:\n\n{text.strip()}\n\nSummary:"
        )
        messages = [{"role": "user", "content": prompt}]
        try:
            outputs = self.pipe(messages, max_new_tokens=self.max_tokens)
            return outputs[0]["generated_text"].strip()
        except Exception as e:
            return f"[LLM ERROR]: {e}"
--- a/memory.py
+++ b/memory.py
@ -0,0 +1,39 @@
 import faiss
 import os
 import pickle
 from sentence_transformers import SentenceTransformer
 # Embedder: small model is fast & good enough
 embedder = SentenceTransformer("all-MiniLM-L6-v2")  # Replaceable
 class Memory:
    def __init__(self, index_path="kshama.index", metadata_path="memory_meta.pkl"):
        self.index_path = index_path
        self.metadata_path = metadata_path
        self.index = None
        self.metadata = []
        self._load()
    def _load(self):
        if os.path.exists(self.index_path):
            self.index = faiss.read_index(self.index_path)
            with open(self.metadata_path, "rb") as f:
                self.metadata = pickle.load(f)
        else:
            self.index = faiss.IndexFlatL2(384)  # Depends on embedder output dim
    def add(self, text, tags=None):
        vec = embedder.encode([text])
        self.index.add(vec)
        self.metadata.append({"text": text, "tags": tags or []})
        self._save()
    def query(self, text, top_k=5):
        vec = embedder.encode([text])
        D, I = self.index.search(vec, top_k)
        return [self.metadata[i]["text"] for i in I[0]]
    def _save(self):
        faiss.write_index(self.index, self.index_path)
        with open(self.metadata_path, "wb") as f:
            pickle.dump(self.metadata, f)
--- a/minimax-m1.py
+++ b/minimax-m1.py
@ -0,0 +1,10 @@
 # Use a pipeline as a high-level helper
 from transformers import pipeline
 pipe = pipeline("text-generation", model="MiniMaxAI/MiniMax-M1-40k", trust_remote_code=True)
 messages = [
    {"role": "user", "content": "Who are you?"},
 ]
 output = pipe(messages)
 print(output)
--- a/qwen.py
+++ b/qwen.py
@ -0,0 +1,9 @@
 # Use a pipeline as a high-level helper
 from transformers import pipeline
 pipe = pipeline("text-generation", model="Qwen/Qwen3-0.6B")
 messages = [
    {"role": "user", "content": "Who are you?"},
 ]
 output = pipe(messages)
 print(output[0]["generated_text"][-1])
--- a/test_memory.py
+++ b/test_memory.py
@ -0,0 +1,18 @@
 from memory import Memory
 # Initialize the memory store
 memory = Memory()
 # Add some sample thoughts
 memory.add("Abu prefers concise Bash scripts for automating Exopid backups.", tags=["Abu", "preference"])
 memory.add("Kshama is the name of Abu's AI agent.", tags=["identity", "name"])
 memory.add("Abu is exploring GANs and TensorFlow with RTX 3050.", tags=["tech", "gpu", "project"])
 # Query the memory
 query = "What tools is Abu experimenting with?"
 results = memory.query(query)
 # Display results
 print("\n🔎 Query Results:")
 for i, entry in enumerate(results, 1):
    print(f"{i}. {entry}")
--- a/timed_chat.py
+++ b/timed_chat.py
@ -0,0 +1,41 @@
 import time
 load_start_time = time.time()
 from transformers import pipeline
 # Initialize your model
 model_id = "meta-llama/Llama-3.2-1B-Instruct"
 pipe = pipeline(
    "text-generation",
    model=model_id,
    torch_dtype="auto",  # or torch.bfloat16 if your GPU supports it
    device_map="auto",
    pad_token_id=128001  # same as eos_token_id
 )
 # System prompt (optional)
 SYSTEM_PROMPT = "You are a helpful assistant. Keep responses brief and clear."
 print(f"Time elapsed: {time.time() - load_start_time:.2f} seconds")
 print("👋 Kshama is listening. Type 'exit' to quit.\n")
 while True:
    user_input = input("🧑 You: ")
    if user_input.strip().lower() == "exit":
        print("👋 Goodbye!")
        break
    prompt = f"<|system|>\n{SYSTEM_PROMPT}\n<|user|>\n{user_input}\n<|assistant|>\n"
    start_time = time.time()
    output = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)
    elapsed = time.time() - start_time
    if isinstance(output[0], dict) and "generated_text" in output[0]:
        response = output[0]["generated_text"].replace(prompt, "").strip()
    elif isinstance(output[0], str):
        response = output[0].replace(prompt, "").strip()
    else:
        response = str(output[0])
    print(f"🤖 ক্ষমা [{elapsed:.2f}s]: {response}\n")
--- a/web_search_helper.py
+++ b/web_search_helper.py
@ -0,0 +1,89 @@
 import requests
 import faiss
 import pickle
 import os
 from bs4 import BeautifulSoup
 from urllib.parse import quote
 from sentence_transformers import SentenceTransformer
 from urllib.parse import parse_qs, urlparse, unquote
 class WebSearchHelper:
    def __init__(self, kb_path="web_kb.index", meta_path="web_kb_meta.pkl"):
        self.embedder = SentenceTransformer("all-MiniLM-L6-v2")
        self.kb_path = kb_path
        self.meta_path = meta_path
        self.meta = []
        self.index = None
        self._load_index()
    def _load_index(self):
        if os.path.exists(self.kb_path):
            self.index = faiss.read_index(self.kb_path)
            with open(self.meta_path, "rb") as f:
                self.meta = pickle.load(f)
        else:
            self.index = faiss.IndexFlatL2(384)
    def _save_index(self):
        faiss.write_index(self.index, self.kb_path)
        with open(self.meta_path, "wb") as f:
            pickle.dump(self.meta, f)
    def search_duckduckgo(self, query, num=5):
        results = []
        q = quote(query)
        headers = {'User-Agent': 'Mozilla/5.0'}
        url = f"https://lite.duckduckgo.com/lite?q={q}"
        print(url)
        res = requests.get(url, headers=headers)
        soup = BeautifulSoup(res.text, "html.parser")
        links = soup.find_all("a", href=True)
        for link in links[:num]:
            parsed = urlparse(link['href'])
            if parsed.path.startswith("/l/"):
                qs = parse_qs(parsed.query)
                actual_url = unquote(qs.get("uddg", [""])[0])
                if actual_url:
                    results.append(actual_url)
            #results.append(link['href'])
        return results
    def crawl_and_summarize(self, urls, llm_function):
        summaries = []
        for url in urls:
            try:
                print(f"[crawling] {url}")
                html = requests.get(url, timeout=5).text
                text = BeautifulSoup(html, "html.parser").get_text()
                clean = ' '.join(text.strip().split()[:1000])  # truncate
                summary = llm_function(clean)
                summaries.append((url, summary))
            except Exception as e:
                print(f"[crawl error] {url} -> {e}")
        return summaries
    def add_to_kb(self, summaries):
        for url, content in summaries:
            vec = self.embedder.encode([content])
            self.index.add(vec)
            self.meta.append({"url": url, "summary": content})
        self._save_index()
    def query_kb(self, text, top_k=3):
        if self.index.ntotal == 0:
            return [], []
        vec = self.embedder.encode([text])
        D, I = self.index.search(vec, top_k)
        results = [self.meta[i] for i in I[0] if i < len(self.meta)]
        return D[0], results
    def should_trigger_search(self, score_threshold=0.7, text=""):
        if self.index.ntotal == 0:
            return True
        scores, _ = self.query_kb(text, top_k=1)
        if not scores or len(scores) == 0:
            return True
        return scores[0] > 1.0 or scores[0] < (1 - score_threshold)