Initial model codes
This commit is contained in:
commit
4b5921d829
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
.venv/
|
||||||
|
*.index
|
||||||
|
*.pkl
|
||||||
|
.idea/
|
||||||
108
agent.py
Normal file
108
agent.py
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
import time
|
||||||
|
begin_time = time.time()
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from transformers import pipeline, AutoTokenizer
|
||||||
|
from memory import Memory
|
||||||
|
from web_search_helper import WebSearchHelper
|
||||||
|
from llm_wrapper import LlmWrapper
|
||||||
|
|
||||||
|
# Initialize components
|
||||||
|
memory = Memory()
|
||||||
|
searcher = WebSearchHelper()
|
||||||
|
summarizer = LlmWrapper(model_name="Qwen/Qwen3-0.6B") # optional, could summarize search results
|
||||||
|
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")
|
||||||
|
# Load your main LLM (Llama 3.2:1B-Instruct)
|
||||||
|
model_id = "meta-llama/Llama-3.2-1B-Instruct"
|
||||||
|
pipe = pipeline(
|
||||||
|
"text-generation",
|
||||||
|
model=model_id,
|
||||||
|
torch_dtype=torch.bfloat16,
|
||||||
|
device_map="auto",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define system prompt and Kshama's capabilities
|
||||||
|
SYSTEM_PROMPT = """
|
||||||
|
You are ক্ষমা, Abu's personal AI assistant. You're helpful, respectful, and aligned with his goals and preferences.
|
||||||
|
You maintain and query a persistent memory of past interactions and facts via a vector store.
|
||||||
|
You can:
|
||||||
|
1. Recall relevant knowledge from memory using semantic similarity.
|
||||||
|
2. Add new insights to memory when useful.
|
||||||
|
3. Perform live web searches and summarize results if memory is insufficient.
|
||||||
|
Structure your outputs clearly:
|
||||||
|
- Use ##MEM:add(...) to store thoughts to memory.
|
||||||
|
- Use ##MEM:recall(...) to request a lookup (already handled externally).
|
||||||
|
- Use ##SEARCH:trigger(...) when memory lacks the answer.
|
||||||
|
Respond in clear, friendly tone. Actively use what you know about Abu’s past work (e.g., GANs, TensorFlow, Exopid).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def generate_response(user_input: str):
|
||||||
|
# Step 1: Recall relevant memory
|
||||||
|
recalled = memory.query(user_input, top_k=3)
|
||||||
|
memory_context = "\n".join([f"- {item}" for item in recalled])
|
||||||
|
|
||||||
|
# Step 2: Evaluate recall quality
|
||||||
|
should_search = searcher.should_trigger_search(text=user_input)
|
||||||
|
kb_hits = ""
|
||||||
|
if should_search:
|
||||||
|
urls = searcher.search_duckduckgo(user_input)
|
||||||
|
summaries = searcher.crawl_and_summarize(urls, llm_function=summarizer.summarize)
|
||||||
|
searcher.add_to_kb(summaries)
|
||||||
|
_, hits = searcher.query_kb(user_input)
|
||||||
|
kb_hits = "\n".join([f"- {h['summary']}" for h in hits])
|
||||||
|
else:
|
||||||
|
_, hits = searcher.query_kb(user_input)
|
||||||
|
kb_hits = "\n".join([f"- {h['summary']}" for h in hits])
|
||||||
|
|
||||||
|
# Step 3: Compose structured messages
|
||||||
|
context_block = f"""Known facts from memory:
|
||||||
|
{memory_context or '[None]'}
|
||||||
|
|
||||||
|
External knowledge from web:
|
||||||
|
{kb_hits or '[None]'}
|
||||||
|
"""
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": SYSTEM_PROMPT},
|
||||||
|
{"role": "user", "content": f"{context_block}\nUser asked: {user_input}"},
|
||||||
|
]
|
||||||
|
|
||||||
|
# Convert using chat template
|
||||||
|
prompt = tokenizer.apply_chat_template(
|
||||||
|
messages,
|
||||||
|
tokenize=False,
|
||||||
|
add_generation_prompt=True # appends assistant tag if needed
|
||||||
|
)
|
||||||
|
|
||||||
|
# Step 4: Call the model
|
||||||
|
output = pipe(
|
||||||
|
prompt,
|
||||||
|
max_new_tokens=512,
|
||||||
|
do_sample=True,
|
||||||
|
temperature=0.7,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Step 5: Process model output (add to memory if marked)
|
||||||
|
response = output[0]["generated_text"].strip()
|
||||||
|
|
||||||
|
if "##MEM:add(" in response:
|
||||||
|
try:
|
||||||
|
content = response.split("##MEM:add(")[1].split(")")[0].strip('"\'')
|
||||||
|
memory.add(content)
|
||||||
|
print("[✅ Memory Added]")
|
||||||
|
except:
|
||||||
|
print("[⚠️ Couldn't parse memory add]")
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
# 💬 REPL for testing
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print(f"Time elapsed: {time.time() - begin_time:.2f} seconds")
|
||||||
|
print("👋 Welcome to Kshama. Type 'exit' to leave.")
|
||||||
|
while True:
|
||||||
|
user_input = input("\n🧑 You: ")
|
||||||
|
if user_input.strip().lower() in ["exit", "quit"]: break
|
||||||
|
response = generate_response(user_input)
|
||||||
|
print(f"\n🤖 ক্ষমা: {response}")
|
||||||
26
llama-3.2.py
Normal file
26
llama-3.2.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
import time
|
||||||
|
begin_time = time.time()
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from transformers import pipeline
|
||||||
|
|
||||||
|
model_id = "meta-llama/Llama-3.2-1B-Instruct"
|
||||||
|
pipe = pipeline(
|
||||||
|
"text-generation",
|
||||||
|
model=model_id,
|
||||||
|
torch_dtype=torch.bfloat16,
|
||||||
|
device_map="auto",
|
||||||
|
)
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
|
||||||
|
{"role": "user", "content": "Who are you?"},
|
||||||
|
]
|
||||||
|
outputs = pipe(
|
||||||
|
messages,
|
||||||
|
max_new_tokens=256,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Time elapsed: {time.time() - begin_time:.2f} seconds")
|
||||||
|
|
||||||
|
#print(outputs[0]["generated_text"][-1])
|
||||||
|
print(outputs)
|
||||||
21
llm_wrapper.py
Normal file
21
llm_wrapper.py
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
from transformers import pipeline
|
||||||
|
|
||||||
|
class LlmWrapper:
|
||||||
|
def __init__(self, model_name="Qwen/Qwen3-0.6B", max_new_tokens=256):
|
||||||
|
self.model_name = model_name
|
||||||
|
self.pipe = pipeline("text-generation", model=model_name)
|
||||||
|
self.max_tokens = max_new_tokens
|
||||||
|
|
||||||
|
def summarize(self, text: str, prompt_template=None) -> str:
|
||||||
|
# Default to a lightweight summarization instruction
|
||||||
|
prompt = (
|
||||||
|
prompt_template or
|
||||||
|
f"Summarize the following content briefly:\n\n{text.strip()}\n\nSummary:"
|
||||||
|
)
|
||||||
|
|
||||||
|
messages = [{"role": "user", "content": prompt}]
|
||||||
|
try:
|
||||||
|
outputs = self.pipe(messages, max_new_tokens=self.max_tokens)
|
||||||
|
return outputs[0]["generated_text"].strip()
|
||||||
|
except Exception as e:
|
||||||
|
return f"[LLM ERROR]: {e}"
|
||||||
39
memory.py
Normal file
39
memory.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
import faiss
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
|
||||||
|
# Embedder: small model is fast & good enough
|
||||||
|
embedder = SentenceTransformer("all-MiniLM-L6-v2") # Replaceable
|
||||||
|
|
||||||
|
class Memory:
|
||||||
|
def __init__(self, index_path="kshama.index", metadata_path="memory_meta.pkl"):
|
||||||
|
self.index_path = index_path
|
||||||
|
self.metadata_path = metadata_path
|
||||||
|
self.index = None
|
||||||
|
self.metadata = []
|
||||||
|
self._load()
|
||||||
|
|
||||||
|
def _load(self):
|
||||||
|
if os.path.exists(self.index_path):
|
||||||
|
self.index = faiss.read_index(self.index_path)
|
||||||
|
with open(self.metadata_path, "rb") as f:
|
||||||
|
self.metadata = pickle.load(f)
|
||||||
|
else:
|
||||||
|
self.index = faiss.IndexFlatL2(384) # Depends on embedder output dim
|
||||||
|
|
||||||
|
def add(self, text, tags=None):
|
||||||
|
vec = embedder.encode([text])
|
||||||
|
self.index.add(vec)
|
||||||
|
self.metadata.append({"text": text, "tags": tags or []})
|
||||||
|
self._save()
|
||||||
|
|
||||||
|
def query(self, text, top_k=5):
|
||||||
|
vec = embedder.encode([text])
|
||||||
|
D, I = self.index.search(vec, top_k)
|
||||||
|
return [self.metadata[i]["text"] for i in I[0]]
|
||||||
|
|
||||||
|
def _save(self):
|
||||||
|
faiss.write_index(self.index, self.index_path)
|
||||||
|
with open(self.metadata_path, "wb") as f:
|
||||||
|
pickle.dump(self.metadata, f)
|
||||||
10
minimax-m1.py
Normal file
10
minimax-m1.py
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
# Use a pipeline as a high-level helper
|
||||||
|
from transformers import pipeline
|
||||||
|
|
||||||
|
pipe = pipeline("text-generation", model="MiniMaxAI/MiniMax-M1-40k", trust_remote_code=True)
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "Who are you?"},
|
||||||
|
]
|
||||||
|
output = pipe(messages)
|
||||||
|
|
||||||
|
print(output)
|
||||||
9
qwen.py
Normal file
9
qwen.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# Use a pipeline as a high-level helper
|
||||||
|
from transformers import pipeline
|
||||||
|
|
||||||
|
pipe = pipeline("text-generation", model="Qwen/Qwen3-0.6B")
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "Who are you?"},
|
||||||
|
]
|
||||||
|
output = pipe(messages)
|
||||||
|
print(output[0]["generated_text"][-1])
|
||||||
18
test_memory.py
Normal file
18
test_memory.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
from memory import Memory
|
||||||
|
|
||||||
|
# Initialize the memory store
|
||||||
|
memory = Memory()
|
||||||
|
|
||||||
|
# Add some sample thoughts
|
||||||
|
memory.add("Abu prefers concise Bash scripts for automating Exopid backups.", tags=["Abu", "preference"])
|
||||||
|
memory.add("Kshama is the name of Abu's AI agent.", tags=["identity", "name"])
|
||||||
|
memory.add("Abu is exploring GANs and TensorFlow with RTX 3050.", tags=["tech", "gpu", "project"])
|
||||||
|
|
||||||
|
# Query the memory
|
||||||
|
query = "What tools is Abu experimenting with?"
|
||||||
|
results = memory.query(query)
|
||||||
|
|
||||||
|
# Display results
|
||||||
|
print("\n🔎 Query Results:")
|
||||||
|
for i, entry in enumerate(results, 1):
|
||||||
|
print(f"{i}. {entry}")
|
||||||
41
timed_chat.py
Normal file
41
timed_chat.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
import time
|
||||||
|
load_start_time = time.time()
|
||||||
|
|
||||||
|
from transformers import pipeline
|
||||||
|
|
||||||
|
# Initialize your model
|
||||||
|
model_id = "meta-llama/Llama-3.2-1B-Instruct"
|
||||||
|
pipe = pipeline(
|
||||||
|
"text-generation",
|
||||||
|
model=model_id,
|
||||||
|
torch_dtype="auto", # or torch.bfloat16 if your GPU supports it
|
||||||
|
device_map="auto",
|
||||||
|
pad_token_id=128001 # same as eos_token_id
|
||||||
|
)
|
||||||
|
|
||||||
|
# System prompt (optional)
|
||||||
|
SYSTEM_PROMPT = "You are a helpful assistant. Keep responses brief and clear."
|
||||||
|
|
||||||
|
print(f"Time elapsed: {time.time() - load_start_time:.2f} seconds")
|
||||||
|
print("👋 Kshama is listening. Type 'exit' to quit.\n")
|
||||||
|
|
||||||
|
while True:
|
||||||
|
user_input = input("🧑 You: ")
|
||||||
|
if user_input.strip().lower() == "exit":
|
||||||
|
print("👋 Goodbye!")
|
||||||
|
break
|
||||||
|
|
||||||
|
prompt = f"<|system|>\n{SYSTEM_PROMPT}\n<|user|>\n{user_input}\n<|assistant|>\n"
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
output = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)
|
||||||
|
elapsed = time.time() - start_time
|
||||||
|
|
||||||
|
if isinstance(output[0], dict) and "generated_text" in output[0]:
|
||||||
|
response = output[0]["generated_text"].replace(prompt, "").strip()
|
||||||
|
elif isinstance(output[0], str):
|
||||||
|
response = output[0].replace(prompt, "").strip()
|
||||||
|
else:
|
||||||
|
response = str(output[0])
|
||||||
|
|
||||||
|
print(f"🤖 ক্ষমা [{elapsed:.2f}s]: {response}\n")
|
||||||
89
web_search_helper.py
Normal file
89
web_search_helper.py
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
import requests
|
||||||
|
import faiss
|
||||||
|
import pickle
|
||||||
|
import os
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from urllib.parse import quote
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
from urllib.parse import parse_qs, urlparse, unquote
|
||||||
|
|
||||||
|
class WebSearchHelper:
|
||||||
|
def __init__(self, kb_path="web_kb.index", meta_path="web_kb_meta.pkl"):
|
||||||
|
self.embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
||||||
|
self.kb_path = kb_path
|
||||||
|
self.meta_path = meta_path
|
||||||
|
self.meta = []
|
||||||
|
self.index = None
|
||||||
|
self._load_index()
|
||||||
|
|
||||||
|
def _load_index(self):
|
||||||
|
if os.path.exists(self.kb_path):
|
||||||
|
self.index = faiss.read_index(self.kb_path)
|
||||||
|
with open(self.meta_path, "rb") as f:
|
||||||
|
self.meta = pickle.load(f)
|
||||||
|
else:
|
||||||
|
self.index = faiss.IndexFlatL2(384)
|
||||||
|
|
||||||
|
def _save_index(self):
|
||||||
|
faiss.write_index(self.index, self.kb_path)
|
||||||
|
with open(self.meta_path, "wb") as f:
|
||||||
|
pickle.dump(self.meta, f)
|
||||||
|
|
||||||
|
def search_duckduckgo(self, query, num=5):
|
||||||
|
results = []
|
||||||
|
q = quote(query)
|
||||||
|
headers = {'User-Agent': 'Mozilla/5.0'}
|
||||||
|
url = f"https://lite.duckduckgo.com/lite?q={q}"
|
||||||
|
print(url)
|
||||||
|
res = requests.get(url, headers=headers)
|
||||||
|
soup = BeautifulSoup(res.text, "html.parser")
|
||||||
|
links = soup.find_all("a", href=True)
|
||||||
|
for link in links[:num]:
|
||||||
|
parsed = urlparse(link['href'])
|
||||||
|
if parsed.path.startswith("/l/"):
|
||||||
|
qs = parse_qs(parsed.query)
|
||||||
|
actual_url = unquote(qs.get("uddg", [""])[0])
|
||||||
|
if actual_url:
|
||||||
|
results.append(actual_url)
|
||||||
|
#results.append(link['href'])
|
||||||
|
return results
|
||||||
|
|
||||||
|
def crawl_and_summarize(self, urls, llm_function):
|
||||||
|
summaries = []
|
||||||
|
for url in urls:
|
||||||
|
try:
|
||||||
|
print(f"[crawling] {url}")
|
||||||
|
html = requests.get(url, timeout=5).text
|
||||||
|
text = BeautifulSoup(html, "html.parser").get_text()
|
||||||
|
clean = ' '.join(text.strip().split()[:1000]) # truncate
|
||||||
|
summary = llm_function(clean)
|
||||||
|
summaries.append((url, summary))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[crawl error] {url} -> {e}")
|
||||||
|
return summaries
|
||||||
|
|
||||||
|
def add_to_kb(self, summaries):
|
||||||
|
for url, content in summaries:
|
||||||
|
vec = self.embedder.encode([content])
|
||||||
|
self.index.add(vec)
|
||||||
|
self.meta.append({"url": url, "summary": content})
|
||||||
|
self._save_index()
|
||||||
|
|
||||||
|
def query_kb(self, text, top_k=3):
|
||||||
|
if self.index.ntotal == 0:
|
||||||
|
return [], []
|
||||||
|
vec = self.embedder.encode([text])
|
||||||
|
D, I = self.index.search(vec, top_k)
|
||||||
|
results = [self.meta[i] for i in I[0] if i < len(self.meta)]
|
||||||
|
return D[0], results
|
||||||
|
|
||||||
|
def should_trigger_search(self, score_threshold=0.7, text=""):
|
||||||
|
if self.index.ntotal == 0:
|
||||||
|
return True
|
||||||
|
|
||||||
|
scores, _ = self.query_kb(text, top_k=1)
|
||||||
|
if not scores or len(scores) == 0:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return scores[0] > 1.0 or scores[0] < (1 - score_threshold)
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user