rohith-yarramala
/

asyncapi-assistant-model-merged

conversational-ai

4-bit precision

Model card Files Files and versions

asyncapi-assistant-model-merged / app.py

rohith-yarramala's picture

rohith-yarramala

Update app.py

c76f995 verified about 1 year ago

history blame contribute delete

1.67 kB

	import torch
	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer

	# --- ✅ Load Model & Tokenizer ---
	MODEL_PATH = "rohith-yarramala/asyncapi-assistant-model-merged"

	# 🚨 Force CPU mode (NO bitsandbytes, NO quantization)
	device = "cpu"

	model = AutoModelForCausalLM.from_pretrained(
	MODEL_PATH,
	torch_dtype=torch.float32, # ✅ Force CPU-friendly dtype
	device_map=device, # ✅ Ensure model is loaded on CPU
	trust_remote_code=True, # ✅ Required for custom model code
	low_cpu_mem_usage=True # ✅ Reduce CPU memory footprint
	)

	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
	model.config.pad_token_id = tokenizer.eos_token_id # ✅ Avoid generation warnings

	print("✅ Model and tokenizer loaded successfully!")

	# --- 🚀 Define Chatbot Function ---
	def asyncapi_chatbot(question):
	inputs = tokenizer(question, return_tensors="pt").to(device)
	output = model.generate(**inputs, max_length=300, use_cache=False)
	return tokenizer.decode(output[0], skip_special_tokens=True)

	# --- 🎨 Gradio UI ---
	css = """
	h1 { text-align: center; font-size: 28px; color: #4CAF50; }
	textarea { font-size: 16px; }
	"""

	iface = gr.Interface(
	fn=asyncapi_chatbot,
	inputs=gr.Textbox(label="Ask an AsyncAPI Question", placeholder="What is an AsyncAPI schema?"),
	outputs=gr.Textbox(label="AI Response"),
	title="AsyncAPI Assistant 🤖",
	description="Ask any question about AsyncAPI, event-driven architecture, or message brokers.",
	theme="compact",
	allow_flagging="never",
	css=css
	)

	# --- 🔥 Launch in Public Mode ---
	iface.launch()