tomascufaro commited on
Commit
ad1fbe3
·
verified ·
1 Parent(s): 2f15652

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -0
app.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Parent directory
2
+ import sys
3
+ import os
4
+ import time
5
+ import pandas as pd # for data manipulation (pip install pandas)
6
+ import matplotlib.pyplot as plt
7
+ from random import randint
8
+ from urllib.parse import urlparse
9
+ import numpy as np
10
+ from langchain.chat_models import ChatOpenAI
11
+ from langchain.chains import create_extraction_chain
12
+ from langchain.llms import OpenAI
13
+ from langchain.chat_models import ChatOpenAI
14
+ from typing import Optional
15
+ from langchain.chains.openai_functions import (
16
+ create_structured_output_chain, create_tagging_chain_pydantic
17
+ )
18
+ from langchain.prompts import ChatPromptTemplate
19
+ import gradio as gr
20
+ from collections import defaultdict
21
+
22
+ # Schema
23
+ schema = {
24
+ "properties": {
25
+ "keyword": {"type": "string"},
26
+ "category": {"type": "string"},
27
+ },
28
+ "required": ["keyword", "category"],
29
+ }
30
+
31
+ # Input
32
+ prompt = ChatPromptTemplate.from_messages(
33
+ [
34
+ ("system", "You are an expert marketing researcher specialized in the finance industry"),
35
+ ("human", """{prompt_input}.
36
+ Here you have the categories splitted by coma: {categories}.
37
+ and Here you have the keywords splitted by coma: {keywords}."""),
38
+ ("human", "Tip: Make sure to answer in the correct format and DO NOT leave keywords without category and DO NOT skip keywords. Please categorize all the keywords that I give you, each keyword must have just one and only one category."),
39
+ ]
40
+ )
41
+
42
+ llm = ChatOpenAI(temperature=0, openai_api_key=os.environ['OpenAI_APIKEY'], model="gpt-3.5-turbo")
43
+ chain = create_extraction_chain(schema, llm, prompt, verbose=1)
44
+
45
+ def run_chain(input_prompt, keywords_file, categories_file, batch_size=50):
46
+ results = []
47
+ batch_size = batch_size
48
+ index = 0
49
+ try:
50
+ keywords = pd.read_csv(keywords_file.name)
51
+ except:
52
+ keywords = pd.read_excel(keywords_file.name)
53
+ try:
54
+ categories = pd.read_csv(categories_file.name)
55
+ except:
56
+ categories = pd.read_excel(categories_file.name)
57
+ keywords = list(keywords[keywords.columns[0]].values)
58
+ categories = list(categories[categories.columns[0]].values)
59
+ while index < len(keywords):
60
+ try:
61
+ batch = keywords[index:index+batch_size]
62
+ except:
63
+ batch = keywords[index:]
64
+ try:
65
+ result = chain.run({'prompt_input':input_prompt, 'categories':','.join(categories), 'keywords':','.join(batch)})
66
+ except Exception as E:
67
+ print('this batch did not worked from {} to {}'.format(index, index + batch_size))
68
+ print(E)
69
+ result = []
70
+ results += result
71
+ index += batch_size
72
+ results_to_csv(results)
73
+ #print((index, batch_size, len(keywords)))
74
+ return results, 'themes_results.csv'
75
+
76
+ def results_to_csv(results):
77
+ super_dict = defaultdict(list)
78
+ for d in results:
79
+ for k, v in d.items(): # d.items() in Python 3+
80
+ super_dict[k].append(v)
81
+ pd.DataFrame(super_dict).to_csv('themes_results.csv', index=False)
82
+
83
+
84
+ with gr.Blocks() as demo:
85
+ prompt_input = gr.Text("""I need your help to analyze and categorize the provided list of keywords
86
+ into the appropriate categories.
87
+ The goal is to understand information demand on search engines within this industry. Each keyword represents a search and it should have a relation with the category.
88
+ Extract each keyword and assign the best category among the given categories. Return every keyword with the relative category in pairs.""")
89
+ gr.Markdown("Upload CSV or xlsx with keywords: Just a csv with all the keywords in one column. Should have a header")
90
+ keywords_file = gr.File(file_types=['csv', 'xlsx'], label='keywords')
91
+ gr.Markdown("Upload CSV or xlsx with categories: Just a csv with all the keywords in one column. Should have a header")
92
+ categories_file = gr.File(file_types=['.csv', '.xlsx'], label='categories')
93
+ with gr.Accordion("Open for More!"):
94
+ gr.Markdown("Look at me...")
95
+
96
+ btn = gr.Button(value="run")
97
+ txt_3 = gr.Textbox(value="", label="Output")
98
+ output_file = gr.File(label="Output File",
99
+ file_count="single",
100
+ file_types=["", ".", ".csv",".xls",".xlsx"])
101
+
102
+ btn.click(run_chain, inputs=[prompt_input, keywords_file, categories_file], outputs=[txt_3, output_file])
103
+
104
+ demo.launch()