alonsosilva commited on
Commit
cf23c39
·
1 Parent(s): b3f4f85
Files changed (4) hide show
  1. Dockerfile +23 -0
  2. README.md +1 -0
  3. app.py +53 -0
  4. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ # Set up a new user named "user" with user ID 1000 for permission
4
+ RUN useradd -m -u 1000 user
5
+
6
+ # Switch to the "user" user
7
+ USER user
8
+
9
+ # Set home to the user's home directory
10
+ ENV HOME=/home/user \
11
+ PATH=/home/user/.local/bin:$PATH
12
+
13
+ # Upgreade pip
14
+ RUN pip install --no-cache-dir --upgrade pip
15
+
16
+ COPY --chown=user requirements.txt .
17
+
18
+ # Install requirements
19
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
20
+
21
+ COPY --chown=user app.py app.py
22
+
23
+ ENTRYPOINT ["solara", "run", "app.py", "--host=0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -6,6 +6,7 @@ colorTo: green
6
  sdk: docker
7
  pinned: false
8
  license: mit
 
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
+ app_port: 7860
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import solara
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from sentence_transformers import SentenceTransformer
6
+ from huggingface_hub import snapshot_download
7
+ from umap import UMAP
8
+ from annoy import AnnoyIndex
9
+ from cluestar import plot_text
10
+
11
+ news = pd.read_csv('https://raw.githubusercontent.com/alonsosilvaallende/fake-and-real-news-titles/main/example.csv')
12
+ texts = list(news["title"].values)
13
+ texts = [str(text) for text in texts if str(text) != 'nan']
14
+
15
+ sentences = ["This is an example sentence", "Each sentence is converted"]
16
+ model_path = snapshot_download(
17
+ repo_id="TaylorAI/gte-tiny", allow_patterns=["*.json", "pytorch_model.bin"]
18
+ )
19
+
20
+ embedder2 = SentenceTransformer(model_path)
21
+ embeddings2 = [embedder2.encode(str(texts[i])) for i in range(500)]
22
+
23
+ reducer = UMAP()
24
+ X2 = reducer.fit_transform(embeddings2)
25
+
26
+ f = len(embeddings2[0])
27
+ t = AnnoyIndex(f, 'angular')
28
+ for i, embedded_text in enumerate(embeddings2):
29
+ t.add_item(i, embedded_text)
30
+ t.build(1000)
31
+
32
+ query = solara.reactive("What did Nancy Pelosi said about Obamacare?")
33
+ @solara.component
34
+ def Page():
35
+ with solara.Column(margin=10):
36
+ solara.Markdown("#Embeddings")
37
+ solara.InputText("Enter some query:", query, continuous_update=True)
38
+ if query.value != "":
39
+ embedded_query = embedder2.encode(query.value)
40
+ idx, distances = t.get_nns_by_vector(embedded_query, 10, include_distances=True)
41
+ df_neighbors = pd.DataFrame()
42
+ df_neighbors["neighbors"]=[texts[i] for i in idx]
43
+ df_neighbors["distances"] = distances
44
+ x = reducer.transform([embedded_query])
45
+ color_array = ["texts" if i not in idx else "neighbors" for i in range(len(texts[:500]))]+["query"]
46
+ solara.AltairChart(plot_text(np.vstack((X2,x)), texts[:500]+[query.value], color_array=color_array).configure_range(
47
+ category=['#0000ff', '#ff0000', '#a0aab4']
48
+ ))
49
+ solara.DataFrame(df_neighbors, items_per_page=10)
50
+ solara.Markdown("Dataset: 'Fake and real news' from [kaggle](https://www.kaggle.com/datasets/clmentbisaillon/fake-and-real-news-dataset)")
51
+ else:
52
+ color_array = ["texts" for _ in range(500)]
53
+ solara.AltairChart(plot_text(X2, texts[:500], color_array=color_array))
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ solara
2
+ numpy
3
+ pandas
4
+ sentence-transformers
5
+ annoy
6
+ cluestar
7
+ umap
8
+ umap-learn