Spaces:
Running
Running
File size: 4,308 Bytes
03d6e86 caea1f5 ede2957 03d6e86 bafee93 57c5821 49124ad 7413ee9 de6d203 57c5821 49124ad 57c5821 5f82549 ede2957 91e5f2f ede2957 ef37c27 ede2957 ef37c27 ede2957 b1a6dfa de6d203 048e2e2 03d6e86 ede2957 91e5f2f bafee93 57c5821 b1a6dfa 5f82549 57c5821 caea1f5 5f82549 57c5821 b1a6dfa bafee93 91e5f2f b1a6dfa 91e5f2f 03d6e86 57c5821 03d6e86 9adca6f 03d6e86 57c5821 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import streamlit as st
import pandas as pd
import torch
from transformers import pipeline
import datetime
from rapidfuzz import process, fuzz
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
# Load the CSV file
df = pd.read_csv("anomalies.csv", quotechar='"')
# Filter 'real' higher than 10 Million
df= df[df['real'] >= 1000000.]
# Convert 'real' column to standard float format and then to strings
df['real'] = df['real'].apply(lambda x: f"{x:.2f}")
# Fill NaN values and convert all columns to strings
df = df.fillna('').astype(str)
print(df)
# Function to remove stopwords
def remove_stopwords(text, stopwords=ENGLISH_STOP_WORDS):
return ' '.join([word for word in text.split() if word.lower() not in stopwords])
# Function to filter DataFrame by checking if any of the user question words are in the columns
def filter_dataframe(df, user_question, threshold=80):
user_question = remove_stopwords(user_question) # Remove stopwords
question_words = user_question.split()
mask = pd.Series([False] * len(df), index=df.index)
for column in df.columns:
for word in question_words:
# Apply RapidFuzz fuzzy matching on the column
matches = process.extract(word, df[column], scorer=fuzz.token_sort_ratio, limit=None)
match_indices = [match[2] for match in matches if match[1] >= threshold]
mask.loc[match_indices] = True # Ensure the mask is aligned with the DataFrame index
filtered_df = df[mask]
return filtered_df
# Function to generate a response using the TAPAS model
def response(user_question, df):
a = datetime.datetime.now()
# Filter the DataFrame dynamically by user question
subset_df = filter_dataframe(df, user_question)
# Check if the DataFrame is empty
if subset_df.empty:
return {"Resposta": "Desculpe, não há dados disponíveis para responder à sua pergunta."}
# Initialize the TAPAS model
tqa = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq",
tokenizer_kwargs={"clean_up_tokenization_spaces": False})
# Debugging information
print("Filtered DataFrame shape:", subset_df.shape)
print("Filtered DataFrame head:\n", subset_df.head())
print("User question:", user_question)
# Query the TAPAS model
try:
answer = tqa(table=subset_df, query=user_question)['answer']
except ValueError as e:
print(f"Error: {e}")
answer = "Desculpe, ocorreu um erro ao processar sua pergunta."
query_result = {
"Resposta": answer
}
b = datetime.datetime.now()
print("Time taken:", b - a)
return query_result
# Streamlit interface
st.markdown("""
<div style='display: flex; align-items: center;'>
<div style='width: 40px; height: 40px; background-color: green; border-radius: 50%; margin-right: 5px;'></div>
<div style='width: 40px; height: 40px; background-color: red; border-radius: 50%; margin-right: 5px;'></div>
<div style='width: 40px; height: 40px; background-color: yellow; border-radius: 50%; margin-right: 5px;'></div>
<span style='font-size: 40px; font-weight: bold;'>Chatbot do Tesouro RS</span>
</div>
""", unsafe_allow_html=True)
# Chat history
if 'history' not in st.session_state:
st.session_state['history'] = []
# Input box for user question
user_question = st.text_input("Escreva sua questão aqui:", "")
if user_question:
# Add human emoji when user asks a question
st.session_state['history'].append(('👤', user_question))
st.markdown(f"**👤 {user_question}**")
# Generate the response
bot_response = response(user_question, df)["Resposta"]
# Add robot emoji when generating response and align to the right
st.session_state['history'].append(('🤖', bot_response))
st.markdown(f"<div style='text-align: right'>**🤖 {bot_response}**</div>", unsafe_allow_html=True)
# Clear history button
if st.button("Limpar"):
st.session_state['history'] = []
# Display chat history
for sender, message in st.session_state['history']:
if sender == '👤':
st.markdown(f"**👤 {message}**")
elif sender == '🤖':
st.markdown(f"<div style='text-align: right'>**🤖 {message}**</div>", unsafe_allow_html=True)
|