kshama/timed_chat.py
2025-06-29 20:49:04 +06:00

42 lines
1.3 KiB
Python

import time
load_start_time = time.time()
from transformers import pipeline
# Initialize your model
model_id = "meta-llama/Llama-3.2-1B-Instruct"
pipe = pipeline(
"text-generation",
model=model_id,
torch_dtype="auto", # or torch.bfloat16 if your GPU supports it
device_map="auto",
pad_token_id=128001 # same as eos_token_id
)
# System prompt (optional)
SYSTEM_PROMPT = "You are a helpful assistant. Keep responses brief and clear."
print(f"Time elapsed: {time.time() - load_start_time:.2f} seconds")
print("👋 Kshama is listening. Type 'exit' to quit.\n")
while True:
user_input = input("🧑 You: ")
if user_input.strip().lower() == "exit":
print("👋 Goodbye!")
break
prompt = f"<|system|>\n{SYSTEM_PROMPT}\n<|user|>\n{user_input}\n<|assistant|>\n"
start_time = time.time()
output = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)
elapsed = time.time() - start_time
if isinstance(output[0], dict) and "generated_text" in output[0]:
response = output[0]["generated_text"].replace(prompt, "").strip()
elif isinstance(output[0], str):
response = output[0].replace(prompt, "").strip()
else:
response = str(output[0])
print(f"🤖 ক্ষমা [{elapsed:.2f}s]: {response}\n")