marcelo-castro-cardoso commited on
Commit
afc8094
1 Parent(s): a7ab009
Files changed (2) hide show
  1. app.py +27 -14
  2. requirements.txt +2 -2
app.py CHANGED
@@ -1,5 +1,7 @@
1
  import gradio as gr
2
 
 
 
3
  from pathlib import Path
4
  import torch
5
  from transformers import pipeline
@@ -9,6 +11,8 @@ from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
9
  from llama_index.node_parser import SentenceSplitter
10
  from langchain.embeddings.huggingface import HuggingFaceEmbeddings
11
 
 
 
12
  from llama_index.embeddings import LangchainEmbedding
13
 
14
  INPUT_FOLDER = "./data"
@@ -19,28 +23,37 @@ max_input_size = 2048
19
  num_output = 256
20
  max_chunk_overlap = 20
21
  max_prompt_chunk_overlap = 0.5
22
- prompt_helper = PromptHelper(max_input_size, num_output, max_prompt_chunk_overlap)
23
 
24
- pipe = pipeline("text-generation", model="databricks/dolly-v2-3b", trust_remote_code=True, torch_dtype=torch.bfloat16, device_map="auto")
 
 
 
 
 
 
 
 
 
 
25
  embed_model = LangchainEmbedding(HuggingFaceEmbeddings())
26
 
27
- class CustomLLM(LLM):
28
- model_name = "databricks/dolly-v2-3b"
29
 
30
- def _call(self, prompt, stop = None):
31
- response = pipe(prompt, max_new_tokens=num_output)[0]["generated_text"]
32
- return response
33
 
34
- @property
35
- def _identifying_params(self):
36
- return {"name_of_model": self.model_name}
37
 
38
- @property
39
- def _llm_type(self):
40
- return "custom"
41
 
42
  # define our LLM
43
- llm_predictor = LLMPredictor(llm=CustomLLM())
44
 
45
  node_parser = SentenceSplitter(chunk_size=512, chunk_overlap=max_chunk_overlap)
46
  prompt_helper = PromptHelper(max_input_size, num_output, max_prompt_chunk_overlap)
 
1
  import gradio as gr
2
 
3
+ import os
4
+
5
  from pathlib import Path
6
  import torch
7
  from transformers import pipeline
 
11
  from llama_index.node_parser import SentenceSplitter
12
  from langchain.embeddings.huggingface import HuggingFaceEmbeddings
13
 
14
+ from llama_index.llms import HuggingFaceLLM
15
+
16
  from llama_index.embeddings import LangchainEmbedding
17
 
18
  INPUT_FOLDER = "./data"
 
23
  num_output = 256
24
  max_chunk_overlap = 20
25
  max_prompt_chunk_overlap = 0.5
 
26
 
27
+ # criação de um LLM HuggingFace no framework llamaindex
28
+ llm = HuggingFaceLLM(
29
+ tokenizer_name="tiiuae/falcon-7b-instruct",
30
+ model_name="tiiuae/falcon-7b-instruct",
31
+ device_map="auto",
32
+ model_kwargs={"max_length": 64, "offload_folder": "cached", "torch_dtype": torch.float16}
33
+ )
34
+
35
+ # prompt_helper = PromptHelper(max_input_size, num_output, max_prompt_chunk_overlap)
36
+
37
+ # pipe = pipeline("text-generation", model="databricks/dolly-v2-3b", trust_remote_code=True, torch_dtype=torch.bfloat16, device_map="auto")
38
  embed_model = LangchainEmbedding(HuggingFaceEmbeddings())
39
 
40
+ # class CustomLLM(LLM):
41
+ # model_name = "databricks/dolly-v2-3b"
42
 
43
+ # def _call(self, prompt, stop = None):
44
+ # response = pipe(prompt, max_new_tokens=num_output)[0]["generated_text"]
45
+ # return response
46
 
47
+ # @property
48
+ # def _identifying_params(self):
49
+ # return {"name_of_model": self.model_name}
50
 
51
+ # @property
52
+ # def _llm_type(self):
53
+ # return "custom"
54
 
55
  # define our LLM
56
+ llm_predictor = LLMPredictor(llm=llm)
57
 
58
  node_parser = SentenceSplitter(chunk_size=512, chunk_overlap=max_chunk_overlap)
59
  prompt_helper = PromptHelper(max_input_size, num_output, max_prompt_chunk_overlap)
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  gradio
2
- langchain
3
- llama-index
4
  transformers
5
  torch
6
  accelerate
 
1
  gradio
2
+ langchain==0.0.348
3
+ llama-index==0.9.26
4
  transformers
5
  torch
6
  accelerate