Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -18,9 +18,14 @@ from llama_index.core import Document
|
|
18 |
from llama_index.core.retrievers import VectorIndexRetriever
|
19 |
from llama_index.core import QueryBundle
|
20 |
import time
|
|
|
21 |
|
22 |
nest_asyncio.apply()
|
23 |
hf_token = os.getenv('hf_token')
|
|
|
|
|
|
|
|
|
24 |
# quantize to save memory
|
25 |
quantization_config = BitsAndBytesConfig(
|
26 |
load_in_4bit=True,
|
@@ -37,12 +42,11 @@ llm = HuggingFaceLLM(
|
|
37 |
model_kwargs={"quantization_config": quantization_config},
|
38 |
generate_kwargs={"temperature": 0.1, "top_k": 50, "top_p": 0.95},
|
39 |
device_map="cuda:0",
|
40 |
-
|
41 |
)
|
42 |
|
43 |
embed_model = HuggingFaceEmbedding(
|
44 |
model_name="kheopss/kheops_embedding_e5_v3",
|
45 |
-
token = hf_token,
|
46 |
)
|
47 |
Settings.llm=llm
|
48 |
Settings.embed_model=embed_model
|
|
|
18 |
from llama_index.core.retrievers import VectorIndexRetriever
|
19 |
from llama_index.core import QueryBundle
|
20 |
import time
|
21 |
+
from huggingface_hub import login
|
22 |
|
23 |
nest_asyncio.apply()
|
24 |
hf_token = os.getenv('hf_token')
|
25 |
+
|
26 |
+
|
27 |
+
# Replace 'your_token_here' with your actual Hugging Face API token
|
28 |
+
login(token=hf_token)
|
29 |
# quantize to save memory
|
30 |
quantization_config = BitsAndBytesConfig(
|
31 |
load_in_4bit=True,
|
|
|
42 |
model_kwargs={"quantization_config": quantization_config},
|
43 |
generate_kwargs={"temperature": 0.1, "top_k": 50, "top_p": 0.95},
|
44 |
device_map="cuda:0",
|
45 |
+
|
46 |
)
|
47 |
|
48 |
embed_model = HuggingFaceEmbedding(
|
49 |
model_name="kheopss/kheops_embedding_e5_v3",
|
|
|
50 |
)
|
51 |
Settings.llm=llm
|
52 |
Settings.embed_model=embed_model
|