Spaces:
Runtime error
Runtime error
Asaad Almutareb
commited on
Commit
•
c30ce87
1
Parent(s):
2e6490e
cleaned code, updated requirmenets
Browse files- hf_mixtral_agent.py +4 -29
- innovation_pathfinder_ai/utils.py +0 -42
- requirements.txt +3 -1
hf_mixtral_agent.py
CHANGED
@@ -1,15 +1,9 @@
|
|
1 |
# HF libraries
|
2 |
from langchain_community.llms import HuggingFaceEndpoint
|
3 |
-
from langchain_core.prompts import ChatPromptTemplate
|
4 |
-
from langchain import hub
|
5 |
-
import gradio as gr
|
6 |
from langchain.agents import AgentExecutor
|
7 |
from langchain.agents.format_scratchpad import format_log_to_str
|
8 |
-
from langchain.agents.output_parsers import
|
9 |
-
ReActJsonSingleInputOutputParser,
|
10 |
-
)
|
11 |
# Import things that are needed generically
|
12 |
-
from typing import List, Dict
|
13 |
from langchain.tools.render import render_text_description
|
14 |
import os
|
15 |
from dotenv import load_dotenv
|
@@ -17,12 +11,11 @@ from innovation_pathfinder_ai.structured_tools.structured_tools import (
|
|
17 |
arxiv_search, get_arxiv_paper, google_search, wikipedia_search
|
18 |
)
|
19 |
|
20 |
-
# hacky and should be replaced with a database
|
21 |
-
from innovation_pathfinder_ai.source_container.container import (
|
22 |
-
all_sources
|
23 |
-
)
|
24 |
from langchain import PromptTemplate
|
25 |
from innovation_pathfinder_ai.templates.react_json_with_memory import template_system
|
|
|
|
|
|
|
26 |
|
27 |
config = load_dotenv(".env")
|
28 |
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
|
@@ -49,13 +42,6 @@ tools = [
|
|
49 |
# get_arxiv_paper,
|
50 |
]
|
51 |
|
52 |
-
tools_papers = [
|
53 |
-
arxiv_search,
|
54 |
-
get_arxiv_paper,
|
55 |
-
|
56 |
-
]
|
57 |
-
|
58 |
-
|
59 |
prompt = PromptTemplate.from_template(
|
60 |
template=template_system
|
61 |
)
|
@@ -87,15 +73,4 @@ agent_executor = AgentExecutor(
|
|
87 |
#max_execution_time=60, # timout at 60 sec
|
88 |
return_intermediate_steps=True,
|
89 |
handle_parsing_errors=True,
|
90 |
-
)
|
91 |
-
|
92 |
-
# instantiate AgentExecutor
|
93 |
-
agent_executor_noweb = AgentExecutor(
|
94 |
-
agent=agent,
|
95 |
-
tools=tools_papers,
|
96 |
-
verbose=True,
|
97 |
-
max_iterations=6, # cap number of iterations
|
98 |
-
#max_execution_time=60, # timout at 60 sec
|
99 |
-
return_intermediate_steps=True,
|
100 |
-
handle_parsing_errors=True,
|
101 |
)
|
|
|
1 |
# HF libraries
|
2 |
from langchain_community.llms import HuggingFaceEndpoint
|
|
|
|
|
|
|
3 |
from langchain.agents import AgentExecutor
|
4 |
from langchain.agents.format_scratchpad import format_log_to_str
|
5 |
+
from langchain.agents.output_parsers import ReActJsonSingleInputOutputParser
|
|
|
|
|
6 |
# Import things that are needed generically
|
|
|
7 |
from langchain.tools.render import render_text_description
|
8 |
import os
|
9 |
from dotenv import load_dotenv
|
|
|
11 |
arxiv_search, get_arxiv_paper, google_search, wikipedia_search
|
12 |
)
|
13 |
|
|
|
|
|
|
|
|
|
14 |
from langchain import PromptTemplate
|
15 |
from innovation_pathfinder_ai.templates.react_json_with_memory import template_system
|
16 |
+
from innovation_pathfinder_ai.utils import logger
|
17 |
+
|
18 |
+
logger = logger.get_console_logger("hf_mixtral_agent")
|
19 |
|
20 |
config = load_dotenv(".env")
|
21 |
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
|
|
|
42 |
# get_arxiv_paper,
|
43 |
]
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
prompt = PromptTemplate.from_template(
|
46 |
template=template_system
|
47 |
)
|
|
|
73 |
#max_execution_time=60, # timout at 60 sec
|
74 |
return_intermediate_steps=True,
|
75 |
handle_parsing_errors=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
)
|
innovation_pathfinder_ai/utils.py
DELETED
@@ -1,42 +0,0 @@
|
|
1 |
-
def create_wikipedia_urls_from_text(text):
|
2 |
-
"""
|
3 |
-
Extracts page titles from a given text and constructs Wikipedia URLs for each title.
|
4 |
-
|
5 |
-
Args:
|
6 |
-
- text (str): A string containing multiple sections, each starting with "Page:" followed by the title.
|
7 |
-
|
8 |
-
Returns:
|
9 |
-
- list: A list of Wikipedia URLs constructed from the extracted titles.
|
10 |
-
"""
|
11 |
-
# Split the text into sections based on "Page:" prefix
|
12 |
-
sections = text.split("Page: ")
|
13 |
-
# Remove the first item if it's empty (in case the text starts with "Page:")
|
14 |
-
if sections[0].strip() == "":
|
15 |
-
sections = sections[1:]
|
16 |
-
|
17 |
-
urls = [] # Initialize an empty list to store the URLs
|
18 |
-
for section in sections:
|
19 |
-
# Extract the title, which is the string up to the first newline
|
20 |
-
title = section.split("\n", 1)[0]
|
21 |
-
# Replace spaces with underscores for the URL
|
22 |
-
url_title = title.replace(" ", "_")
|
23 |
-
# Construct the URL and add it to the list
|
24 |
-
url = f"https://en.wikipedia.org/wiki/{url_title}"
|
25 |
-
urls.append(url)
|
26 |
-
|
27 |
-
return urls
|
28 |
-
|
29 |
-
def collect_urls(data_list):
|
30 |
-
urls = []
|
31 |
-
for item in data_list:
|
32 |
-
# Check if item is a string and contains 'link:'
|
33 |
-
if isinstance(item, str) and 'link:' in item:
|
34 |
-
start = item.find('link:') + len('link: ')
|
35 |
-
end = item.find(',', start)
|
36 |
-
url = item[start:end if end != -1 else None].strip()
|
37 |
-
urls.append(url)
|
38 |
-
# Check if item is a dictionary and has 'Entry ID'
|
39 |
-
elif isinstance(item, dict) and 'Entry ID' in item:
|
40 |
-
urls.append(item['Entry ID'])
|
41 |
-
last_sources = urls[-3:]
|
42 |
-
return last_sources
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -8,4 +8,6 @@ wikipedia
|
|
8 |
gradio==3.48.0
|
9 |
chromadb
|
10 |
google_api_python_client
|
11 |
-
pypdf2
|
|
|
|
|
|
8 |
gradio==3.48.0
|
9 |
chromadb
|
10 |
google_api_python_client
|
11 |
+
pypdf2
|
12 |
+
sqlmodel
|
13 |
+
rich
|