anirudh248 commited on
Commit
108cdf7
·
verified ·
1 Parent(s): d2334dd

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +22 -22
handler.py CHANGED
@@ -7,13 +7,10 @@ import torch
7
 
8
  class Handler:
9
  def __init__(self):
10
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
- print(f"Using device: {self.device}")
12
-
13
  # Load the fine-tuned model and tokenizer
14
  print("Loading model and tokenizer...")
15
- self.model = AutoModelForCausalLM.from_pretrained("PranavKeshav/upf_code_generator").to(self.device)
16
- self.tokenizer = AutoTokenizer.from_pretrained("PranavKeshav/upf_code_generator").to(self.device)
17
 
18
  # Load the FAISS index and embeddings
19
  print("Loading FAISS index and embeddings...")
@@ -22,37 +19,40 @@ class Handler:
22
 
23
  # Create the Hugging Face pipeline for text generation
24
  print("Creating Hugging Face pipeline...")
25
-
26
- def run_inference(prompt: str):
27
- # Assuming 2048 is the desired max sequence length
28
- return self.model.generate(
29
- prompt, temperature=0.7, max_length=2048, top_p=0.95, repetition_penalty=1.15
30
- )
31
-
32
  self.hf_pipeline = pipeline(
33
  "text-generation",
34
  model=self.model,
35
  tokenizer=self.tokenizer,
 
36
  temperature=0.7,
37
  max_new_tokens=2048,
38
  top_p=0.95,
39
  repetition_penalty=1.15
40
  )
41
-
42
- self.hf_pipeline.model.generate = run_inference
43
  # Wrap the pipeline in LangChain
44
  self.llm = HuggingFacePipeline(pipeline=self.hf_pipeline)
45
 
46
- # Create the retriever and pipeline
47
  self.retriever = self.vectorstore.as_retriever()
48
- self.qa_chain = RetrievalQA.from_chain_type(llm=self.llm, retriever=self.retriever)
 
 
 
 
49
 
50
  def __call__(self, request):
51
- # Get the prompt from the request
52
- prompt = request.json.get("prompt")
 
 
 
 
 
 
53
 
54
- # Generate UPF code using the QA chain
55
- response = self.qa_chain.run(prompt)
56
 
57
- # Return the response
58
- return {"response": response}
 
7
 
8
  class Handler:
9
  def __init__(self):
 
 
 
10
  # Load the fine-tuned model and tokenizer
11
  print("Loading model and tokenizer...")
12
+ self.model = AutoModelForCausalLM.from_pretrained("anirudh248/upf_code_generator_final", device_map="auto")
13
+ self.tokenizer = AutoTokenizer.from_pretrained("anirudh248/upf_code_generator_final")
14
 
15
  # Load the FAISS index and embeddings
16
  print("Loading FAISS index and embeddings...")
 
19
 
20
  # Create the Hugging Face pipeline for text generation
21
  print("Creating Hugging Face pipeline...")
 
 
 
 
 
 
 
22
  self.hf_pipeline = pipeline(
23
  "text-generation",
24
  model=self.model,
25
  tokenizer=self.tokenizer,
26
+ device=0 if torch.cuda.is_available() else -1,
27
  temperature=0.7,
28
  max_new_tokens=2048,
29
  top_p=0.95,
30
  repetition_penalty=1.15
31
  )
32
+
 
33
  # Wrap the pipeline in LangChain
34
  self.llm = HuggingFacePipeline(pipeline=self.hf_pipeline)
35
 
36
+ # Create the retriever and RetrievalQA chain
37
  self.retriever = self.vectorstore.as_retriever()
38
+ self.qa_chain = RetrievalQA.from_chain_type(
39
+ llm=self.llm,
40
+ retriever=self.retriever,
41
+ return_source_documents=False
42
+ )
43
 
44
  def __call__(self, request):
45
+ try:
46
+ # Get the prompt from the request
47
+ prompt = request.json.get("prompt")
48
+ if not prompt:
49
+ return {"error": "Prompt is required"}, 400
50
+
51
+ # Generate UPF code using the QA chain
52
+ response = self.qa_chain.run(prompt)
53
 
54
+ # Return the response
55
+ return {"response": response}
56
 
57
+ except Exception as e:
58
+ return {"error": str(e)}, 500