JVictor-CC commited on
Commit
e3edd55
1 Parent(s): e8afcbe

Add Model Class and requirements

Browse files

The Model Class has 3 main methods.
- `download_model` that gets an model url from huggingface and download indo a directory called 'model'.
- `load_local_model` that loads the local model on 'model' directory.
- `inference` that needs a prompt list and generate responses from de model.

Files changed (2) hide show
  1. Model.py +61 -0
  2. requirements.txt +5 -0
Model.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+
5
+ class Model:
6
+ def __init__(self, model_url) -> None:
7
+ self.model_url = model_url
8
+ self.tokenizer = None
9
+ self.model = None
10
+ self.device = "cpu"
11
+ self.dir_name = None
12
+
13
+ def download_model(self) -> bool:
14
+ self.dir_name = "model"
15
+ if not os.path.exists(self.dir_name) or not os.listdir(self.dir_name):
16
+ os.makedirs(self.dir_name)
17
+
18
+ tokenizer = AutoTokenizer.from_pretrained(self.model_url)
19
+ model = AutoModelForCausalLM.from_pretrained(self.model_url)
20
+
21
+ model.save_pretrained(self.dir_name)
22
+ tokenizer.save_pretrained(self.dir_name)
23
+
24
+ print(f"Model saved on '{self.dir_name}' directory.")
25
+ return True
26
+ else:
27
+ print("Model is already downloaded and ready to use.")
28
+ return False
29
+
30
+ def load_local_model(self):
31
+ tokenizer = AutoTokenizer.from_pretrained(self.dir_name)
32
+ model = AutoModelForCausalLM.from_pretrained(self.dir_name)
33
+
34
+ if self.device == "cuda" and torch.cuda.is_available():
35
+ model.to("cuda")
36
+
37
+ self.model = model
38
+ self.tokenizer = tokenizer
39
+
40
+ def inference(self, prompt_list) -> list:
41
+ if self.model != None and self.tokenizer != None:
42
+ self.model.eval()
43
+ model_inferences = []
44
+
45
+ for prompt in prompt_list:
46
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
47
+
48
+ with torch.no_grad():
49
+ outputs = self.model.generate(input_ids = inputs["input_ids"], max_new_tokens=512)
50
+ response = self.tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0]
51
+
52
+ model_inferences.append(response)
53
+ return model_inferences
54
+ else:
55
+ print("Model was not able to make inference, make sure you've loaded the model.")
56
+
57
+ def set_cuda(self) -> str:
58
+ self.device = "cuda"
59
+
60
+ def set_cpu(self) -> str:
61
+ self.device = "cpu"
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ os
2
+ gradio
3
+ transformers
4
+ huggingface-hub
5
+