Spaces:
Sleeping
Sleeping
File size: 6,885 Bytes
3852d49 3683f9c 3852d49 3683f9c 2162f38 3683f9c 3852d49 3683f9c 2162f38 3683f9c 2162f38 3683f9c 2162f38 3852d49 3938c4d d2f3083 3852d49 d2f3083 3852d49 3938c4d 3852d49 3938c4d 2162f38 d2f3083 2162f38 d2f3083 2162f38 d2f3083 3938c4d d2f3083 2162f38 d2f3083 3938c4d 3852d49 d2f3083 3938c4d d2f3083 3938c4d d2f3083 3852d49 3683f9c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
import gradio as gr
import pandas as pd
import re
from huggingface_hub import InferenceClient
import plotly.express as px
from collections import Counter
# Initialize Hugging Face client
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
def parse_message(message):
"""Extract information from a chat message using regex."""
info = {}
# Extract timestamp and phone number
timestamp_match = re.search(r'\[(.*?)\]', message)
phone_match = re.search(r'\] (.*?):', message)
if timestamp_match and phone_match:
info['timestamp'] = timestamp_match.group(1)
info['phone'] = phone_match.group(1)
# Extract rest of the message
content = message.split(':', 1)[1].strip()
# Extract name
name_match = re.match(r'^([^•\n-]+)', content)
if name_match:
info['name'] = name_match.group(1).strip()
# Extract affiliation
affiliation_match = re.search(r'[Aa]ffiliation:?\s*([^•\n]+)', content)
if affiliation_match:
info['affiliation'] = affiliation_match.group(1).strip()
# Extract research field/interests
field_match = re.search(r'([Ff]ield of [Ii]nterest|[Dd]omaine de recherche|[Rr]esearch area|[Aa]reas of interest):?\s*([^•\n]+)', content)
if field_match:
info['research_field'] = field_match.group(2).strip()
# Extract thesis topic
thesis_match = re.search(r'[Tt]hesis:?\s*([^•\n]+)', content)
if thesis_match:
info['thesis_topic'] = thesis_match.group(1).strip()
return info
def create_researcher_df(chat_history):
"""Convert chat messages to structured DataFrame."""
researchers = []
messages = chat_history.split('\n')
for message in messages:
if message.strip():
info = parse_message(message)
if info:
researchers.append(info)
df = pd.DataFrame(researchers)
return df
def analyze_research_fields(df):
"""Analyze and categorize research fields."""
if 'research_field' not in df.columns:
return pd.Series()
fields = df['research_field'].dropna()
# Split fields and flatten
all_fields = [field.strip().lower() for fields_list in fields for field in fields_list.split(',')]
return pd.Series(Counter(all_fields))
def create_visualizations(df):
"""Create visualizations from the researcher data."""
figures = []
# 1. Affiliation Distribution
if 'affiliation' in df.columns and not df['affiliation'].empty:
affiliation_counts = df['affiliation'].value_counts()
fig_affiliation = px.pie(
values=affiliation_counts.values,
names=affiliation_counts.index,
title='Distribution of Researchers by Affiliation'
)
figures.append(fig_affiliation)
# 2. Research Fields Analysis
field_counts = analyze_research_fields(df)
if not field_counts.empty:
fig_fields = px.bar(
x=field_counts.index,
y=field_counts.values,
title='Popular Research Fields',
labels={'x': 'Field', 'y': 'Count'}
)
figures.append(fig_fields)
return figures[0] if figures else None
def analyze_chat_history(chat_history_text):
"""Analyze chat history and return DataFrame, plot, and summary."""
if not chat_history_text.strip():
return None, None, "No chat history provided."
df = create_researcher_df(chat_history_text)
if df.empty:
return None, None, "No data could be extracted from the chat history."
# Generate analysis summary
summary = f"Analysis of {len(df)} researchers:\n"
if 'affiliation' in df.columns:
summary += f"- Institutions represented: {df['affiliation'].nunique()}\n"
field_counts = analyze_research_fields(df)
if not field_counts.empty:
top_fields = field_counts.nlargest(3)
summary += "- Top research fields:\n"
for field, count in top_fields.items():
summary += f" • {field}: {count} researchers\n"
# Create visualization
fig = create_visualizations(df)
return df, fig, summary
def process_message(
message,
chat_history,
system_message,
max_tokens,
temperature,
top_p
):
"""Process message and return response."""
try:
# Generate response using the LLM
messages = [{"role": "system", "content": system_message}]
for user_msg, bot_msg in chat_history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": bot_msg})
messages.append({"role": "user", "content": message})
response = client.chat_completion(
messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
bot_message = response.choices[0].message.content
chat_history.append((message, bot_message))
return chat_history
except Exception as e:
error_message = f"Error: {str(e)}"
chat_history.append((message, error_message))
return chat_history
with gr.Blocks(title="CohortBot") as demo:
with gr.Row():
with gr.Column(scale=2):
chatbot = gr.Chatbot(label="Chat History")
msg = gr.Textbox(label="Message", placeholder="Type your message here...")
with gr.Row():
system_msg = gr.Textbox(value="You are a friendly Research Community Chatbot.", label="System message")
with gr.Row():
max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
with gr.Column(scale=1):
chat_history_text = gr.Textbox(label="Chat History for Analysis", lines=10)
analyze_btn = gr.Button("Analyze Chat History", variant="primary")
with gr.Row():
analysis_text = gr.Textbox(label="Analysis Summary", lines=4)
with gr.Row():
researcher_table = gr.Dataframe(label="Extracted Researcher Data")
with gr.Row():
plot = gr.Plot(label="Community Analysis")
msg.submit(
process_message,
[msg, chatbot, system_msg, max_tokens, temperature, top_p],
[chatbot]
)
analyze_btn.click(
analyze_chat_history,
inputs=[chat_history_text],
outputs=[researcher_table, plot, analysis_text]
)
if __name__ == "__main__":
demo.launch() |