File size: 2,061 Bytes
8edd6e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9412f88
8edd6e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9412f88
8edd6e7
 
9412f88
8edd6e7
 
 
 
 
 
 
9412f88
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# bsky2M_emojis_streamlit.py

# Packages required 
import streamlit as st 
from datasets import load_dataset 
import emoji 
from dateutil import parser 
from collections import Counter 
import plotly.express as px 
import pandas as pd 
from collections import defaultdict 


# Streamlit app title 
st.title("Top 200 Most Frequent Emojis in Bluesky Posts")
st.image('Bluesky Emoji Model 1.jpeg', caption='Bluesky Emoji Model 1')

# Step 1: Load the Dataset 
# Load the dataset 
dataset = load_dataset("alpindale/two-million-bluesky-posts")

# Access the first split
data = dataset['train']

# Step 2: Extract Emojis from Text 
def extract_emojis(text):
    return [e['emoji'] for e in emoji.emoji_list(text)]

# Apply the function to the 'text' column and ensure the dataset is updated 
data = data.map(lambda x: {"emojis": extract_emojis(x["text"])}) 

# Step 3: Convert created_ad to Datatime 
# Convert 'created_at' to datetime 
data = data.map(lambda x: {"created_at": parser.isoparse(x["created_at"])})

# Step 4: Count Emoji Frequencies 
# Flatten the list of emojis
all_emojis = [emoji for entry in data for emoji in entry["emojis"]]

# Count the frequency of each emoji
emoji_counts = Counter(all_emojis)

# Step 5: Visualize Emoji Frequencies
# Get the top 200 most common emojis
top_emojis = emoji_counts.most_common(200)

# Extract emojis and their counts 
emojis, counts = zip(*top_emojis)

# Create a DataFrame for Plotly
df = pd.DataFrame({'Emojis': emojis, 'Frequency': counts})

# Display the dataframe in the Streamlit app 
st.image('Bluesky Emoji Model 4.jpeg', caption='Bluesky Emoji Model 4')
st.write('### Top 200 Emojis Dataframe')
st.dataframe(df)
st.image('Bluesky Emoji Model 2.jpeg', caption='Bluesky Emoji Model 2')

# Plot the bar chart
fig = px.bar(df, x='Emojis', y='Frequency', title='Top 200 Most Frequent Emojis')
fig.update_xaxes(title_text='Emojis')
fig.update_yaxes(title_text='Frequency')

# Display the plot in the Streamlit app 
st.image('Bluesky Emoji Model 3.jpeg', caption='Bluesky Emoji Model 3')
st.plotly_chart(fig)