42 lines
1.3 KiB
Python
42 lines
1.3 KiB
Python
import time
|
|
load_start_time = time.time()
|
|
|
|
from transformers import pipeline
|
|
|
|
# Initialize your model
|
|
model_id = "meta-llama/Llama-3.2-1B-Instruct"
|
|
pipe = pipeline(
|
|
"text-generation",
|
|
model=model_id,
|
|
torch_dtype="auto", # or torch.bfloat16 if your GPU supports it
|
|
device_map="auto",
|
|
pad_token_id=128001 # same as eos_token_id
|
|
)
|
|
|
|
# System prompt (optional)
|
|
SYSTEM_PROMPT = "You are a helpful assistant. Keep responses brief and clear."
|
|
|
|
print(f"Time elapsed: {time.time() - load_start_time:.2f} seconds")
|
|
print("👋 Kshama is listening. Type 'exit' to quit.\n")
|
|
|
|
while True:
|
|
user_input = input("🧑 You: ")
|
|
if user_input.strip().lower() == "exit":
|
|
print("👋 Goodbye!")
|
|
break
|
|
|
|
prompt = f"<|system|>\n{SYSTEM_PROMPT}\n<|user|>\n{user_input}\n<|assistant|>\n"
|
|
|
|
start_time = time.time()
|
|
output = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)
|
|
elapsed = time.time() - start_time
|
|
|
|
if isinstance(output[0], dict) and "generated_text" in output[0]:
|
|
response = output[0]["generated_text"].replace(prompt, "").strip()
|
|
elif isinstance(output[0], str):
|
|
response = output[0].replace(prompt, "").strip()
|
|
else:
|
|
response = str(output[0])
|
|
|
|
print(f"🤖 ক্ষমা [{elapsed:.2f}s]: {response}\n")
|