Spaces:
Runtime error
Runtime error
zayed-upal
commited on
Commit
·
4c25316
1
Parent(s):
325466a
Google ads format download added, topic name rename option added
Browse files- Functionalities/TopicClustering.py +24 -3
- pages/2_Topic_Cluster.py +28 -1
Functionalities/TopicClustering.py
CHANGED
@@ -9,14 +9,16 @@ import plotly.graph_objects as go
|
|
9 |
|
10 |
class TopicClustering:
|
11 |
def __init__(self, keyword_df, text_col, representation_model, sentence_model):
|
|
|
12 |
self.topic_model = None
|
13 |
self.embeddings = None
|
|
|
14 |
self.keyword_df, self.text_col = keyword_df, text_col
|
15 |
self.sentence_model = SentenceTransformer(sentence_model)
|
16 |
self.representation_model = NLP_Helper.get_bertopic_representation(representation_model)
|
17 |
|
18 |
def topic_cluster_bert(self) -> None:
|
19 |
-
self.embeddings = self.sentence_model.encode(self.keyword_df[self.text_col], show_progress_bar=
|
20 |
self.topic_model = BERTopic(representation_model=self.representation_model,
|
21 |
embedding_model=self.sentence_model,
|
22 |
n_gram_range=(1, 3), top_n_words=2)
|
@@ -34,7 +36,7 @@ class TopicClustering:
|
|
34 |
self.keyword_df = pd.merge(topic_info, self.keyword_df, on=['Topic'])
|
35 |
self.keyword_df.rename(columns={'Name': 'Topic Name'}, inplace=True)
|
36 |
self.keyword_df.drop(columns=['CustomName'], inplace=True)
|
37 |
-
|
38 |
|
39 |
def visualize_documents(self, n_neighbors) -> go.Figure:
|
40 |
reduced_embeddings = UMAP(n_neighbors=n_neighbors, n_components=2, min_dist=0.0, metric='cosine').fit_transform(
|
@@ -46,5 +48,24 @@ class TopicClustering:
|
|
46 |
return fig
|
47 |
|
48 |
def visualize_topic_distribution(self) -> go.Figure:
|
49 |
-
fig = self.topic_model.visualize_barchart(custom_labels=True, top_n_topics=5, n_words=20,
|
|
|
50 |
return fig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
class TopicClustering:
|
11 |
def __init__(self, keyword_df, text_col, representation_model, sentence_model):
|
12 |
+
self.topic_names = None
|
13 |
self.topic_model = None
|
14 |
self.embeddings = None
|
15 |
+
self.topic_name_mapping = {}
|
16 |
self.keyword_df, self.text_col = keyword_df, text_col
|
17 |
self.sentence_model = SentenceTransformer(sentence_model)
|
18 |
self.representation_model = NLP_Helper.get_bertopic_representation(representation_model)
|
19 |
|
20 |
def topic_cluster_bert(self) -> None:
|
21 |
+
self.embeddings = self.sentence_model.encode(self.keyword_df[self.text_col], show_progress_bar=True)
|
22 |
self.topic_model = BERTopic(representation_model=self.representation_model,
|
23 |
embedding_model=self.sentence_model,
|
24 |
n_gram_range=(1, 3), top_n_words=2)
|
|
|
36 |
self.keyword_df = pd.merge(topic_info, self.keyword_df, on=['Topic'])
|
37 |
self.keyword_df.rename(columns={'Name': 'Topic Name'}, inplace=True)
|
38 |
self.keyword_df.drop(columns=['CustomName'], inplace=True)
|
39 |
+
self.topic_names = topic_labels
|
40 |
|
41 |
def visualize_documents(self, n_neighbors) -> go.Figure:
|
42 |
reduced_embeddings = UMAP(n_neighbors=n_neighbors, n_components=2, min_dist=0.0, metric='cosine').fit_transform(
|
|
|
48 |
return fig
|
49 |
|
50 |
def visualize_topic_distribution(self) -> go.Figure:
|
51 |
+
fig = self.topic_model.visualize_barchart(custom_labels=True, top_n_topics=5, n_words=20,
|
52 |
+
title='Topic Distribution')
|
53 |
return fig
|
54 |
+
|
55 |
+
def update_topic_names(self):
|
56 |
+
for k in self.topic_name_mapping:
|
57 |
+
self.keyword_df['Topic Name'][self.keyword_df['Topic Name'] == k] = self.topic_name_mapping[k]
|
58 |
+
|
59 |
+
self.topic_names = self.topic_name_mapping.values()
|
60 |
+
self.topic_name_mapping = {}
|
61 |
+
|
62 |
+
def get_df_in_google_ads_format(self, campaign_name):
|
63 |
+
keyword_df_google_ads = pd.DataFrame(
|
64 |
+
columns=['Action', 'Keyword status', 'Campaign', 'Ad group', 'Keyword', 'Match Type'])
|
65 |
+
keyword_df_google_ads['Ad group'] = self.keyword_df['Topic Name']
|
66 |
+
keyword_df_google_ads['Keyword'] = self.keyword_df[self.text_col]
|
67 |
+
keyword_df_google_ads['Match Type'] = 'Phrase'
|
68 |
+
keyword_df_google_ads['Action'] = 'Add'
|
69 |
+
keyword_df_google_ads['Keyword status'] = 'Enabled'
|
70 |
+
keyword_df_google_ads['Campaign'] = campaign_name
|
71 |
+
return keyword_df_google_ads
|
pages/2_Topic_Cluster.py
CHANGED
@@ -4,6 +4,7 @@ from Functionalities import NLP_Helper
|
|
4 |
from Functionalities.TopicClustering import TopicClustering
|
5 |
from streamlit_extras.dataframe_explorer import dataframe_explorer
|
6 |
|
|
|
7 |
class TopicClusterView:
|
8 |
def __init__(self):
|
9 |
self.n_neighbors = 10
|
@@ -60,14 +61,39 @@ class TopicClusterView:
|
|
60 |
if (st.session_state.topic_cluster is not None) and (st.session_state.topic_cluster.topic_model is not None):
|
61 |
filtered_df = dataframe_explorer(st.session_state.topic_cluster.keyword_df)
|
62 |
st.dataframe(filtered_df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
st.download_button(
|
64 |
-
"Press to Download",
|
65 |
st.session_state.topic_cluster.keyword_df.to_csv(index=False).encode('utf-8'),
|
66 |
"Clustered.csv",
|
67 |
"text/csv",
|
68 |
key='download-csv'
|
69 |
)
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
def visualize_clusters(self):
|
72 |
if (st.session_state.topic_cluster is not None) and (st.session_state.topic_cluster.topic_model is not None):
|
73 |
self.n_neighbors = st.slider(label='Size of the local neighborhood', min_value=2, max_value=100, step=1)
|
@@ -85,6 +111,7 @@ class TopicClusterView:
|
|
85 |
fig = st.session_state.topic_cluster.visualize_topic_distribution()
|
86 |
st.plotly_chart(fig, use_container_width=True, theme=None)
|
87 |
|
|
|
88 |
if __name__ == '__main__':
|
89 |
topic_cluster_view = TopicClusterView()
|
90 |
# tab1, tab2, tab3 = st.tabs(['Clustering Process', 'Cluster Visualization', 'Topic Distribution'])
|
|
|
4 |
from Functionalities.TopicClustering import TopicClustering
|
5 |
from streamlit_extras.dataframe_explorer import dataframe_explorer
|
6 |
|
7 |
+
|
8 |
class TopicClusterView:
|
9 |
def __init__(self):
|
10 |
self.n_neighbors = 10
|
|
|
61 |
if (st.session_state.topic_cluster is not None) and (st.session_state.topic_cluster.topic_model is not None):
|
62 |
filtered_df = dataframe_explorer(st.session_state.topic_cluster.keyword_df)
|
63 |
st.dataframe(filtered_df)
|
64 |
+
with st.expander("Rename Topics"):
|
65 |
+
for topic_name in st.session_state.topic_cluster.topic_names:
|
66 |
+
cur_topic_col, new_topic_col = st.columns(2)
|
67 |
+
with cur_topic_col:
|
68 |
+
cur_topic_col.write(topic_name)
|
69 |
+
with new_topic_col:
|
70 |
+
st.session_state.topic_cluster.topic_name_mapping[topic_name] = \
|
71 |
+
st.text_input("New topic name", topic_name)
|
72 |
+
|
73 |
+
if st.button("Update Topic Names"):
|
74 |
+
st.session_state.topic_cluster.update_topic_names()
|
75 |
+
st.experimental_rerun()
|
76 |
+
|
77 |
st.download_button(
|
78 |
+
"Press to Download as CSV",
|
79 |
st.session_state.topic_cluster.keyword_df.to_csv(index=False).encode('utf-8'),
|
80 |
"Clustered.csv",
|
81 |
"text/csv",
|
82 |
key='download-csv'
|
83 |
)
|
84 |
|
85 |
+
with st.expander("Download as CSV for Bulk upload in Google Ads"):
|
86 |
+
campaign_name = st.text_input("Campaign Name", "Demo Campaign")
|
87 |
+
st.dataframe(st.session_state.topic_cluster.get_df_in_google_ads_format(campaign_name))
|
88 |
+
st.download_button(
|
89 |
+
"Download as CSV for Bulk upload in Google Ads",
|
90 |
+
st.session_state.topic_cluster.get_df_in_google_ads_format(campaign_name).to_csv(
|
91 |
+
index=False).encode('utf-8'),
|
92 |
+
f"{campaign_name}_keywords_upload.csv",
|
93 |
+
"text/csv",
|
94 |
+
key='download-google-csv'
|
95 |
+
)
|
96 |
+
|
97 |
def visualize_clusters(self):
|
98 |
if (st.session_state.topic_cluster is not None) and (st.session_state.topic_cluster.topic_model is not None):
|
99 |
self.n_neighbors = st.slider(label='Size of the local neighborhood', min_value=2, max_value=100, step=1)
|
|
|
111 |
fig = st.session_state.topic_cluster.visualize_topic_distribution()
|
112 |
st.plotly_chart(fig, use_container_width=True, theme=None)
|
113 |
|
114 |
+
|
115 |
if __name__ == '__main__':
|
116 |
topic_cluster_view = TopicClusterView()
|
117 |
# tab1, tab2, tab3 = st.tabs(['Clustering Process', 'Cluster Visualization', 'Topic Distribution'])
|