Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#Parent directory
|
2 |
+
import sys
|
3 |
+
import os
|
4 |
+
import time
|
5 |
+
import pandas as pd # for data manipulation (pip install pandas)
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
from random import randint
|
8 |
+
from urllib.parse import urlparse
|
9 |
+
import numpy as np
|
10 |
+
from langchain.chat_models import ChatOpenAI
|
11 |
+
from langchain.chains import create_extraction_chain
|
12 |
+
from langchain.llms import OpenAI
|
13 |
+
from langchain.chat_models import ChatOpenAI
|
14 |
+
from typing import Optional
|
15 |
+
from langchain.chains.openai_functions import (
|
16 |
+
create_structured_output_chain, create_tagging_chain_pydantic
|
17 |
+
)
|
18 |
+
from langchain.prompts import ChatPromptTemplate
|
19 |
+
import gradio as gr
|
20 |
+
from collections import defaultdict
|
21 |
+
|
22 |
+
# Schema
|
23 |
+
schema = {
|
24 |
+
"properties": {
|
25 |
+
"keyword": {"type": "string"},
|
26 |
+
"category": {"type": "string"},
|
27 |
+
},
|
28 |
+
"required": ["keyword", "category"],
|
29 |
+
}
|
30 |
+
|
31 |
+
# Input
|
32 |
+
prompt = ChatPromptTemplate.from_messages(
|
33 |
+
[
|
34 |
+
("system", "You are an expert marketing researcher specialized in the finance industry"),
|
35 |
+
("human", """{prompt_input}.
|
36 |
+
Here you have the categories splitted by coma: {categories}.
|
37 |
+
and Here you have the keywords splitted by coma: {keywords}."""),
|
38 |
+
("human", "Tip: Make sure to answer in the correct format and DO NOT leave keywords without category and DO NOT skip keywords. Please categorize all the keywords that I give you, each keyword must have just one and only one category."),
|
39 |
+
]
|
40 |
+
)
|
41 |
+
|
42 |
+
llm = ChatOpenAI(temperature=0, openai_api_key=os.environ['OpenAI_APIKEY'], model="gpt-3.5-turbo")
|
43 |
+
chain = create_extraction_chain(schema, llm, prompt, verbose=1)
|
44 |
+
|
45 |
+
def run_chain(input_prompt, keywords_file, categories_file, batch_size=50):
|
46 |
+
results = []
|
47 |
+
batch_size = batch_size
|
48 |
+
index = 0
|
49 |
+
try:
|
50 |
+
keywords = pd.read_csv(keywords_file.name)
|
51 |
+
except:
|
52 |
+
keywords = pd.read_excel(keywords_file.name)
|
53 |
+
try:
|
54 |
+
categories = pd.read_csv(categories_file.name)
|
55 |
+
except:
|
56 |
+
categories = pd.read_excel(categories_file.name)
|
57 |
+
keywords = list(keywords[keywords.columns[0]].values)
|
58 |
+
categories = list(categories[categories.columns[0]].values)
|
59 |
+
while index < len(keywords):
|
60 |
+
try:
|
61 |
+
batch = keywords[index:index+batch_size]
|
62 |
+
except:
|
63 |
+
batch = keywords[index:]
|
64 |
+
try:
|
65 |
+
result = chain.run({'prompt_input':input_prompt, 'categories':','.join(categories), 'keywords':','.join(batch)})
|
66 |
+
except Exception as E:
|
67 |
+
print('this batch did not worked from {} to {}'.format(index, index + batch_size))
|
68 |
+
print(E)
|
69 |
+
result = []
|
70 |
+
results += result
|
71 |
+
index += batch_size
|
72 |
+
results_to_csv(results)
|
73 |
+
#print((index, batch_size, len(keywords)))
|
74 |
+
return results, 'themes_results.csv'
|
75 |
+
|
76 |
+
def results_to_csv(results):
|
77 |
+
super_dict = defaultdict(list)
|
78 |
+
for d in results:
|
79 |
+
for k, v in d.items(): # d.items() in Python 3+
|
80 |
+
super_dict[k].append(v)
|
81 |
+
pd.DataFrame(super_dict).to_csv('themes_results.csv', index=False)
|
82 |
+
|
83 |
+
|
84 |
+
with gr.Blocks() as demo:
|
85 |
+
prompt_input = gr.Text("""I need your help to analyze and categorize the provided list of keywords
|
86 |
+
into the appropriate categories.
|
87 |
+
The goal is to understand information demand on search engines within this industry. Each keyword represents a search and it should have a relation with the category.
|
88 |
+
Extract each keyword and assign the best category among the given categories. Return every keyword with the relative category in pairs.""")
|
89 |
+
gr.Markdown("Upload CSV or xlsx with keywords: Just a csv with all the keywords in one column. Should have a header")
|
90 |
+
keywords_file = gr.File(file_types=['csv', 'xlsx'], label='keywords')
|
91 |
+
gr.Markdown("Upload CSV or xlsx with categories: Just a csv with all the keywords in one column. Should have a header")
|
92 |
+
categories_file = gr.File(file_types=['.csv', '.xlsx'], label='categories')
|
93 |
+
with gr.Accordion("Open for More!"):
|
94 |
+
gr.Markdown("Look at me...")
|
95 |
+
|
96 |
+
btn = gr.Button(value="run")
|
97 |
+
txt_3 = gr.Textbox(value="", label="Output")
|
98 |
+
output_file = gr.File(label="Output File",
|
99 |
+
file_count="single",
|
100 |
+
file_types=["", ".", ".csv",".xls",".xlsx"])
|
101 |
+
|
102 |
+
btn.click(run_chain, inputs=[prompt_input, keywords_file, categories_file], outputs=[txt_3, output_file])
|
103 |
+
|
104 |
+
demo.launch()
|