Initial model codes

This commit is contained in:
sufian 2025-06-29 20:49:04 +06:00
commit 4b5921d829
10 changed files with 365 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
.venv/
*.index
*.pkl
.idea/

108
agent.py Normal file
View File

@ -0,0 +1,108 @@
import time
begin_time = time.time()
import torch
from transformers import pipeline, AutoTokenizer
from memory import Memory
from web_search_helper import WebSearchHelper
from llm_wrapper import LlmWrapper
# Initialize components
memory = Memory()
searcher = WebSearchHelper()
summarizer = LlmWrapper(model_name="Qwen/Qwen3-0.6B") # optional, could summarize search results
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")
# Load your main LLM (Llama 3.2:1B-Instruct)
model_id = "meta-llama/Llama-3.2-1B-Instruct"
pipe = pipeline(
"text-generation",
model=model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
)
# Define system prompt and Kshama's capabilities
SYSTEM_PROMPT = """
You are ষম, Abu's personal AI assistant. You're helpful, respectful, and aligned with his goals and preferences.
You maintain and query a persistent memory of past interactions and facts via a vector store.
You can:
1. Recall relevant knowledge from memory using semantic similarity.
2. Add new insights to memory when useful.
3. Perform live web searches and summarize results if memory is insufficient.
Structure your outputs clearly:
- Use ##MEM:add(...) to store thoughts to memory.
- Use ##MEM:recall(...) to request a lookup (already handled externally).
- Use ##SEARCH:trigger(...) when memory lacks the answer.
Respond in clear, friendly tone. Actively use what you know about Abus past work (e.g., GANs, TensorFlow, Exopid).
"""
def generate_response(user_input: str):
# Step 1: Recall relevant memory
recalled = memory.query(user_input, top_k=3)
memory_context = "\n".join([f"- {item}" for item in recalled])
# Step 2: Evaluate recall quality
should_search = searcher.should_trigger_search(text=user_input)
kb_hits = ""
if should_search:
urls = searcher.search_duckduckgo(user_input)
summaries = searcher.crawl_and_summarize(urls, llm_function=summarizer.summarize)
searcher.add_to_kb(summaries)
_, hits = searcher.query_kb(user_input)
kb_hits = "\n".join([f"- {h['summary']}" for h in hits])
else:
_, hits = searcher.query_kb(user_input)
kb_hits = "\n".join([f"- {h['summary']}" for h in hits])
# Step 3: Compose structured messages
context_block = f"""Known facts from memory:
{memory_context or '[None]'}
External knowledge from web:
{kb_hits or '[None]'}
"""
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"{context_block}\nUser asked: {user_input}"},
]
# Convert using chat template
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True # appends assistant tag if needed
)
# Step 4: Call the model
output = pipe(
prompt,
max_new_tokens=512,
do_sample=True,
temperature=0.7,
)
# Step 5: Process model output (add to memory if marked)
response = output[0]["generated_text"].strip()
if "##MEM:add(" in response:
try:
content = response.split("##MEM:add(")[1].split(")")[0].strip('"\'')
memory.add(content)
print("[✅ Memory Added]")
except:
print("[⚠️ Couldn't parse memory add]")
return response
# 💬 REPL for testing
if __name__ == "__main__":
print(f"Time elapsed: {time.time() - begin_time:.2f} seconds")
print("👋 Welcome to Kshama. Type 'exit' to leave.")
while True:
user_input = input("\n🧑 You: ")
if user_input.strip().lower() in ["exit", "quit"]: break
response = generate_response(user_input)
print(f"\n🤖 ক্ষমা: {response}")

26
llama-3.2.py Normal file
View File

@ -0,0 +1,26 @@
import time
begin_time = time.time()
import torch
from transformers import pipeline
model_id = "meta-llama/Llama-3.2-1B-Instruct"
pipe = pipeline(
"text-generation",
model=model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
)
messages = [
{"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
{"role": "user", "content": "Who are you?"},
]
outputs = pipe(
messages,
max_new_tokens=256,
)
print(f"Time elapsed: {time.time() - begin_time:.2f} seconds")
#print(outputs[0]["generated_text"][-1])
print(outputs)

21
llm_wrapper.py Normal file
View File

@ -0,0 +1,21 @@
from transformers import pipeline
class LlmWrapper:
def __init__(self, model_name="Qwen/Qwen3-0.6B", max_new_tokens=256):
self.model_name = model_name
self.pipe = pipeline("text-generation", model=model_name)
self.max_tokens = max_new_tokens
def summarize(self, text: str, prompt_template=None) -> str:
# Default to a lightweight summarization instruction
prompt = (
prompt_template or
f"Summarize the following content briefly:\n\n{text.strip()}\n\nSummary:"
)
messages = [{"role": "user", "content": prompt}]
try:
outputs = self.pipe(messages, max_new_tokens=self.max_tokens)
return outputs[0]["generated_text"].strip()
except Exception as e:
return f"[LLM ERROR]: {e}"

39
memory.py Normal file
View File

@ -0,0 +1,39 @@
import faiss
import os
import pickle
from sentence_transformers import SentenceTransformer
# Embedder: small model is fast & good enough
embedder = SentenceTransformer("all-MiniLM-L6-v2") # Replaceable
class Memory:
def __init__(self, index_path="kshama.index", metadata_path="memory_meta.pkl"):
self.index_path = index_path
self.metadata_path = metadata_path
self.index = None
self.metadata = []
self._load()
def _load(self):
if os.path.exists(self.index_path):
self.index = faiss.read_index(self.index_path)
with open(self.metadata_path, "rb") as f:
self.metadata = pickle.load(f)
else:
self.index = faiss.IndexFlatL2(384) # Depends on embedder output dim
def add(self, text, tags=None):
vec = embedder.encode([text])
self.index.add(vec)
self.metadata.append({"text": text, "tags": tags or []})
self._save()
def query(self, text, top_k=5):
vec = embedder.encode([text])
D, I = self.index.search(vec, top_k)
return [self.metadata[i]["text"] for i in I[0]]
def _save(self):
faiss.write_index(self.index, self.index_path)
with open(self.metadata_path, "wb") as f:
pickle.dump(self.metadata, f)

10
minimax-m1.py Normal file
View File

@ -0,0 +1,10 @@
# Use a pipeline as a high-level helper
from transformers import pipeline
pipe = pipeline("text-generation", model="MiniMaxAI/MiniMax-M1-40k", trust_remote_code=True)
messages = [
{"role": "user", "content": "Who are you?"},
]
output = pipe(messages)
print(output)

9
qwen.py Normal file
View File

@ -0,0 +1,9 @@
# Use a pipeline as a high-level helper
from transformers import pipeline
pipe = pipeline("text-generation", model="Qwen/Qwen3-0.6B")
messages = [
{"role": "user", "content": "Who are you?"},
]
output = pipe(messages)
print(output[0]["generated_text"][-1])

18
test_memory.py Normal file
View File

@ -0,0 +1,18 @@
from memory import Memory
# Initialize the memory store
memory = Memory()
# Add some sample thoughts
memory.add("Abu prefers concise Bash scripts for automating Exopid backups.", tags=["Abu", "preference"])
memory.add("Kshama is the name of Abu's AI agent.", tags=["identity", "name"])
memory.add("Abu is exploring GANs and TensorFlow with RTX 3050.", tags=["tech", "gpu", "project"])
# Query the memory
query = "What tools is Abu experimenting with?"
results = memory.query(query)
# Display results
print("\n🔎 Query Results:")
for i, entry in enumerate(results, 1):
print(f"{i}. {entry}")

41
timed_chat.py Normal file
View File

@ -0,0 +1,41 @@
import time
load_start_time = time.time()
from transformers import pipeline
# Initialize your model
model_id = "meta-llama/Llama-3.2-1B-Instruct"
pipe = pipeline(
"text-generation",
model=model_id,
torch_dtype="auto", # or torch.bfloat16 if your GPU supports it
device_map="auto",
pad_token_id=128001 # same as eos_token_id
)
# System prompt (optional)
SYSTEM_PROMPT = "You are a helpful assistant. Keep responses brief and clear."
print(f"Time elapsed: {time.time() - load_start_time:.2f} seconds")
print("👋 Kshama is listening. Type 'exit' to quit.\n")
while True:
user_input = input("🧑 You: ")
if user_input.strip().lower() == "exit":
print("👋 Goodbye!")
break
prompt = f"<|system|>\n{SYSTEM_PROMPT}\n<|user|>\n{user_input}\n<|assistant|>\n"
start_time = time.time()
output = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)
elapsed = time.time() - start_time
if isinstance(output[0], dict) and "generated_text" in output[0]:
response = output[0]["generated_text"].replace(prompt, "").strip()
elif isinstance(output[0], str):
response = output[0].replace(prompt, "").strip()
else:
response = str(output[0])
print(f"🤖 ক্ষমা [{elapsed:.2f}s]: {response}\n")

89
web_search_helper.py Normal file
View File

@ -0,0 +1,89 @@
import requests
import faiss
import pickle
import os
from bs4 import BeautifulSoup
from urllib.parse import quote
from sentence_transformers import SentenceTransformer
from urllib.parse import parse_qs, urlparse, unquote
class WebSearchHelper:
def __init__(self, kb_path="web_kb.index", meta_path="web_kb_meta.pkl"):
self.embedder = SentenceTransformer("all-MiniLM-L6-v2")
self.kb_path = kb_path
self.meta_path = meta_path
self.meta = []
self.index = None
self._load_index()
def _load_index(self):
if os.path.exists(self.kb_path):
self.index = faiss.read_index(self.kb_path)
with open(self.meta_path, "rb") as f:
self.meta = pickle.load(f)
else:
self.index = faiss.IndexFlatL2(384)
def _save_index(self):
faiss.write_index(self.index, self.kb_path)
with open(self.meta_path, "wb") as f:
pickle.dump(self.meta, f)
def search_duckduckgo(self, query, num=5):
results = []
q = quote(query)
headers = {'User-Agent': 'Mozilla/5.0'}
url = f"https://lite.duckduckgo.com/lite?q={q}"
print(url)
res = requests.get(url, headers=headers)
soup = BeautifulSoup(res.text, "html.parser")
links = soup.find_all("a", href=True)
for link in links[:num]:
parsed = urlparse(link['href'])
if parsed.path.startswith("/l/"):
qs = parse_qs(parsed.query)
actual_url = unquote(qs.get("uddg", [""])[0])
if actual_url:
results.append(actual_url)
#results.append(link['href'])
return results
def crawl_and_summarize(self, urls, llm_function):
summaries = []
for url in urls:
try:
print(f"[crawling] {url}")
html = requests.get(url, timeout=5).text
text = BeautifulSoup(html, "html.parser").get_text()
clean = ' '.join(text.strip().split()[:1000]) # truncate
summary = llm_function(clean)
summaries.append((url, summary))
except Exception as e:
print(f"[crawl error] {url} -> {e}")
return summaries
def add_to_kb(self, summaries):
for url, content in summaries:
vec = self.embedder.encode([content])
self.index.add(vec)
self.meta.append({"url": url, "summary": content})
self._save_index()
def query_kb(self, text, top_k=3):
if self.index.ntotal == 0:
return [], []
vec = self.embedder.encode([text])
D, I = self.index.search(vec, top_k)
results = [self.meta[i] for i in I[0] if i < len(self.meta)]
return D[0], results
def should_trigger_search(self, score_threshold=0.7, text=""):
if self.index.ntotal == 0:
return True
scores, _ = self.query_kb(text, top_k=1)
if not scores or len(scores) == 0:
return True
return scores[0] > 1.0 or scores[0] < (1 - score_threshold)