import gradio as gr import pandas as pd from functions import process_file_bm25 , process_file_bert , generate_plot , generate #------------------------------------------------------ # Create the state object # state = gr.State() # state.df_bm25 = pd.DataFrame({"Products": [1, 2, 3], "column2": ["A", "B", "C"]}) # state.df_bert = pd.DataFrame({"Products": [1, 2, 3], "column2": ["A", "B", "C"]}) # state.df_topics_bert = pd.DataFrame({"Topic": [1, 2, 3], "column2": ["A", "B", "C"]}) # state.df_topics_bm25 = pd.DataFrame({"Topic": [1, 2, 3], "column2": ["A", "B", "C"]}) df_bm25=gr.State(value=pd.DataFrame({"Products": [1, 2, 3], "column2": ["A", "B", "C"]})) df_topics_bm25 = gr.State(value=pd.DataFrame({"Topic": [1, 2, 3], "column2": ["A", "B", "C"]})) with gr.Blocks() as bm25: with gr.Row(): with gr.Column(): # gr.Markdown("NAC Product Clustering Analysis", center=True, style={"font-size": "24px", "font-weight": "bold","color": "blue"}), # gr.Markdown("This module helps to quickly cluster the products in any excel/csv file for product wise analysis for any NAC(National Assessment centre) of CBIC Indian Customs .", style={"font-size": "18px", "font-weight": "normal","color": "green"}) try: gr.HTML( """
This module helps to quickly cluster the products in any excel/csv file for product wise analysis for any NAC(National Assessment centre) of CBIC Indian Customs.
""", markup=True # Indicate content is HTML ) except: print("Warning: Styling within Markdown might not be fully supported. Consider using gr.HTML for more control.") gr.Markdown( """ # Select a CSV/Excel file with column as 'products' """) inputfile = gr.File(file_types=['.csv','.xlsx'], label="Upload CSV/Excel file") #german = gr.Textbox(label="German Text") def confirmation(file): if file.name.endswith('.csv'): df = pd.read_csv(file) elif file.name.endswith('.xls') or file.name.endswith('.xlsx'): df = pd.read_excel(file) else: doc = "Unsupported file format. Please provide a CSV or Excel file." return None,doc # Return immediately with the error message # Ensure that the 'products' column is present in the dataframe if 'products' not in df.columns.str.lower(): doc = "The input file must have a column named 'products'." return None,doc # Return immediately with the error message doc = 'File uploaded! Press Cluster button' return df,doc # Return the success message def download_df(): df1=df_bm25 print(df1) return df1 out = gr.Textbox() mode=gr.Radio(["Automated clustering", "Manually choose parameters"], label="Type of algorithm", value="Automated clustering",info="Choose any mode u want") inputfile.upload(confirmation,inputs=[inputfile],outputs=[gr.File(label="Uploaded File"),out]) with gr.Row(): min_cluster_size=gr.Slider(2, 500, value=5, step=1,label="min_cluster_size", info="Choose minimum No. of docs in a cluster. Lower the value ,higher the clusters created") top_n_words=gr.Slider(1, 25, value=10, step=1,label="top_n_words", info="Choose no of key words for a cluster") ngram=gr.Slider(1, 3, value=2, step=1,label="ngram", info="Choose no of n-grams words to be taken for clustering") cluster_btn = gr.Button(value="Cluster") #[ df,topics_info,barchart,topics_plot,heatmap,hierarchy] tup=cluster_btn.click(process_file_bm25, inputs=[inputfile,mode,min_cluster_size,top_n_words,ngram], outputs=[ gr.Dataframe(), gr.File(label="Download CSV"), gr.Dataframe(), #'html', gr.Plot(label="Barchart"), gr.Plot(label="Topics Plot"), gr.Plot(label="Heatmap"), gr.Plot(label="Hierarchy"), ]) llm_btn = gr.Button(value="Download Excel with Topics ") llm_btn.click(download_df,inputs=[],outputs=gr.Dataframe(label="Output")) with gr.Blocks() as bert: with gr.Row(): with gr.Column(): gr.Markdown( """ # Select a CSV/Excel file with column as 'products' """) inputfile = gr.File(file_types=['.csv','.xlsx'], label="Upload CSV/Excel file") #german = gr.Textbox(label="German Text") def confirmation(): doc='File uploaded! Press Cluster button' return doc out = gr.Textbox() mode=gr.Radio(["Automated clustering", "Manually choose parameters"], label="Type of algorithm", value="Automated clustering",info="Choose any mode u want") inputfile.upload(confirmation,inputs=[],outputs=out) with gr.Row(): with gr.Column(): min_cluster_size=gr.Slider(1, 100, value=5, step=1,label="min_cluster_size", info="Choose minimum No. of docs in a cluster. Lower the value ,higher the clusters created") with gr.Column(): top_n_words=gr.Slider(1, 25, value=10, step=1,label="top_n_words", info="Choose no of key words for a cluster") with gr.Column(): ngram=gr.Slider(1, 3, value=2, step=1,label="ngram", info="Choose no of n-grams words to be taken for clustering") cluster_btn = gr.Button(value="Cluster") #[ df,topics_info,barchart,topics_plot,heatmap,hierarchy] tup=cluster_btn.click(process_file_bert, inputs=[inputfile,mode,min_cluster_size], outputs=[ gr.Dataframe(), gr.Dataframe(), gr.Plot(label="Barchart"), gr.Plot(label="Topics Plot"), gr.Plot(label="Heatmap"), gr.Plot(label="Hierarchy") ]) #___________________________________________ additional_inputs=[ gr.Textbox( label="System Prompt", max_lines=1, interactive=True, ), gr.Slider( label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs", ), gr.Slider( label="Max new tokens", value=256, minimum=0, maximum=4096, step=64, interactive=True, info="The maximum numbers of new tokens", ), gr.Slider( label="Top-p (nucleus sampling)", value=0.90, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens", ), gr.Slider( label="Repetition penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.05, interactive=True, info="Penalize repeated tokens", ) ] examples=[["I'm planning a vacation to Japan. Can you suggest a one-week itinerary including must-visit places and local cuisines to try?", None, None, None, None, None, ], ["Can you write a short story about a time-traveling detective who solves historical mysteries?", None, None, None, None, None,], ["I'm trying to learn French. Can you provide some common phrases that would be useful for a beginner, along with their pronunciations?", None, None, None, None, None,], ["I have chicken, rice, and bell peppers in my kitchen. Can you suggest an easy recipe I can make with these ingredients?", None, None, None, None, None,], ["Can you explain how the QuickSort algorithm works and provide a Python implementation?", None, None, None, None, None,], ["What are some unique features of Rust that make it stand out compared to other systems programming languages like C++?", None, None, None, None, None,], ] chat_interface=gr.ChatInterface( fn=generate, chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"), additional_inputs=additional_inputs, title="Mixtral 46.7B", examples=examples, concurrency_limit=20, ) #______________________________________________________ # Create a Gradio interface #df=pd.DataFrame(columns=['Topic']) df=df_topics_bm25.value print(df) excel_analysis_bm25 = gr.Interface( fn=generate_plot, # Placeholder function, will be defined later inputs=[ gr.Dropdown(df['Topic'].unique().tolist(), label="Select Topic Number", type="index"), gr.Dropdown(list(df.columns[~df.columns.isin(['Topic'])]), label="Select X Axis", type="index"), gr.Dropdown(list(df.columns[~df.columns.isin(['Topic'])]), label="Select Y Axis", type="index"), gr.Radio(["scatter", "bar", "line", "box", "wordcloud", "pie"], label="Select Chart Type"), gr.Dropdown(["count", "count_distinct", "sum", "average"], label="Select Aggregation Function") ], outputs=gr.Plot(label="Visualization") ) demo = gr.TabbedInterface([bm25,chat_interface,excel_analysis_bm25, bert], ["TFIDF-BM25 Clustering", "TFIDF-BM25-Topics AI","TFIDF-BM25-Topic analysis","keyBERT"]) demo.launch(share=True,debug=True)