Qifan Zhang
commited on
Commit
•
0e97d35
1
Parent(s):
8cd5cbf
update p2_flexibility, ui
Browse files- .gitignore +4 -2
- app.py +9 -11
- description.txt → data/description.txt +0 -0
- data/example.csv +10 -0
- utils/models.py +4 -4
- utils/pipeline.py +20 -5
.gitignore
CHANGED
@@ -1,3 +1,5 @@
|
|
1 |
-
data
|
2 |
.idea
|
3 |
-
|
|
|
|
|
|
|
|
|
|
1 |
.idea
|
2 |
+
data/example
|
3 |
+
data/tmp
|
4 |
+
|
5 |
+
output.csv
|
app.py
CHANGED
@@ -50,18 +50,13 @@ def process(task_name: str,
|
|
50 |
return {'Error': e}, None, None
|
51 |
|
52 |
|
|
|
53 |
task_name_dropdown = gr.components.Dropdown(
|
54 |
label='Task Name',
|
55 |
value='Originality',
|
56 |
choices=['Originality', 'Flexibility']
|
57 |
)
|
58 |
|
59 |
-
model_name_input = gr.components.Textbox(
|
60 |
-
value='paraphrase-multilingual-MiniLM-L12-v2',
|
61 |
-
lines=1,
|
62 |
-
type='text'
|
63 |
-
)
|
64 |
-
|
65 |
model_name_dropdown = gr.components.Dropdown(
|
66 |
label='Model Name',
|
67 |
value=list_models[0],
|
@@ -69,11 +64,16 @@ model_name_dropdown = gr.components.Dropdown(
|
|
69 |
)
|
70 |
|
71 |
text_input = gr.components.Textbox(
|
72 |
-
value='
|
73 |
lines=10,
|
74 |
type='text'
|
75 |
)
|
76 |
|
|
|
|
|
|
|
|
|
|
|
77 |
text_output = gr.components.Textbox(
|
78 |
label='Output',
|
79 |
type='text'
|
@@ -83,16 +83,14 @@ dataframe_output = gr.components.Dataframe(
|
|
83 |
label='DataFrame'
|
84 |
)
|
85 |
|
86 |
-
description = open('description.txt', 'r').read()
|
87 |
-
|
88 |
file_output = gr.components.File(label='Output File',
|
89 |
file_count='single',
|
90 |
file_types=['', '.', '.csv', '.xls', '.xlsx'])
|
91 |
|
92 |
app = gr.Interface(
|
93 |
fn=process,
|
94 |
-
inputs=[task_name_dropdown, model_name_dropdown, text_input,
|
95 |
outputs=[text_output, dataframe_output, file_output],
|
96 |
-
description=description
|
97 |
)
|
98 |
app.launch()
|
|
|
50 |
return {'Error': e}, None, None
|
51 |
|
52 |
|
53 |
+
# input
|
54 |
task_name_dropdown = gr.components.Dropdown(
|
55 |
label='Task Name',
|
56 |
value='Originality',
|
57 |
choices=['Originality', 'Flexibility']
|
58 |
)
|
59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
model_name_dropdown = gr.components.Dropdown(
|
61 |
label='Model Name',
|
62 |
value=list_models[0],
|
|
|
64 |
)
|
65 |
|
66 |
text_input = gr.components.Textbox(
|
67 |
+
value=open('data/example.csv', 'r').read(),
|
68 |
lines=10,
|
69 |
type='text'
|
70 |
)
|
71 |
|
72 |
+
# output
|
73 |
+
file_input = gr.components.File(label='Input File',
|
74 |
+
file_count='single',
|
75 |
+
file_types=['', '.', '.csv', '.xls', '.xlsx'])
|
76 |
+
|
77 |
text_output = gr.components.Textbox(
|
78 |
label='Output',
|
79 |
type='text'
|
|
|
83 |
label='DataFrame'
|
84 |
)
|
85 |
|
|
|
|
|
86 |
file_output = gr.components.File(label='Output File',
|
87 |
file_count='single',
|
88 |
file_types=['', '.', '.csv', '.xls', '.xlsx'])
|
89 |
|
90 |
app = gr.Interface(
|
91 |
fn=process,
|
92 |
+
inputs=[task_name_dropdown, model_name_dropdown, text_input, file_input],
|
93 |
outputs=[text_output, dataframe_output, file_output],
|
94 |
+
description=open('data/description.txt', 'r').read()
|
95 |
)
|
96 |
app.launch()
|
description.txt → data/description.txt
RENAMED
File without changes
|
data/example.csv
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
id,prompt,response
|
2 |
+
1,床单,过滤器
|
3 |
+
1,床单,做成渔网捞鱼
|
4 |
+
1,床单,做成枕头
|
5 |
+
1,牙刷,捅人
|
6 |
+
1,牙刷,用作鞋拔
|
7 |
+
1,牙刷,当飞镖扔
|
8 |
+
2,床单,做被子
|
9 |
+
2,床单,保暖
|
10 |
+
2,床单,绑在树上做成吊床
|
utils/models.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import torch
|
3 |
from functools import lru_cache
|
|
|
|
|
4 |
from sentence_transformers import SentenceTransformer
|
5 |
|
6 |
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
@@ -20,6 +20,6 @@ class SBert:
|
|
20 |
self.model = SentenceTransformer(path, device=DEVICE)
|
21 |
|
22 |
@lru_cache(maxsize=10000)
|
23 |
-
def __call__(self, x) ->
|
24 |
-
y = self.model.encode(x)
|
25 |
return y
|
|
|
|
|
|
|
1 |
from functools import lru_cache
|
2 |
+
|
3 |
+
import torch
|
4 |
from sentence_transformers import SentenceTransformer
|
5 |
|
6 |
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
|
20 |
self.model = SentenceTransformer(path, device=DEVICE)
|
21 |
|
22 |
@lru_cache(maxsize=10000)
|
23 |
+
def __call__(self, x) -> torch.Tensor:
|
24 |
+
y = self.model.encode(x, convert_to_tensor=True)
|
25 |
return y
|
utils/pipeline.py
CHANGED
@@ -9,22 +9,37 @@ def p0_originality(df: pd.DataFrame, model_name: str) -> pd.DataFrame:
|
|
9 |
assert 'response' in df.columns
|
10 |
model = SBert(model_name)
|
11 |
|
12 |
-
def get_cos_sim(
|
13 |
prompt_vec = model(prompt)
|
14 |
response_vec = model(response)
|
15 |
score = cos_sim(prompt_vec, response_vec).item()
|
16 |
return score
|
17 |
|
18 |
-
df['originality'] = df.apply(lambda x: 1 - get_cos_sim(
|
19 |
return df
|
20 |
|
21 |
|
22 |
def p1_flexibility(df: pd.DataFrame, model_name: str) -> pd.DataFrame:
|
23 |
-
|
|
|
24 |
assert 'id' in df.columns
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
df_out = df.groupby(by=['id', 'prompt']) \
|
26 |
-
.agg({'id': 'first', 'prompt': 'first', '
|
27 |
-
.rename(columns={'
|
28 |
.reset_index(drop=True)
|
29 |
return df_out
|
30 |
|
|
|
9 |
assert 'response' in df.columns
|
10 |
model = SBert(model_name)
|
11 |
|
12 |
+
def get_cos_sim(prompt: str, response: str) -> float:
|
13 |
prompt_vec = model(prompt)
|
14 |
response_vec = model(response)
|
15 |
score = cos_sim(prompt_vec, response_vec).item()
|
16 |
return score
|
17 |
|
18 |
+
df['originality'] = df.apply(lambda x: 1 - get_cos_sim(x['prompt'], x['response']), axis=1)
|
19 |
return df
|
20 |
|
21 |
|
22 |
def p1_flexibility(df: pd.DataFrame, model_name: str) -> pd.DataFrame:
|
23 |
+
assert 'prompt' in df.columns
|
24 |
+
assert 'response' in df.columns
|
25 |
assert 'id' in df.columns
|
26 |
+
model = SBert(model_name)
|
27 |
+
|
28 |
+
def get_cos_sim(responses: list[str]) -> float:
|
29 |
+
responses_vec = [model(_) for _ in responses]
|
30 |
+
count = 0
|
31 |
+
score = 0
|
32 |
+
for i in range(len(responses_vec)):
|
33 |
+
for j in range(1, len(responses_vec)):
|
34 |
+
if i == j:
|
35 |
+
continue
|
36 |
+
score += cos_sim(responses_vec[i], responses_vec[j]).item()
|
37 |
+
count += 1
|
38 |
+
return score / count
|
39 |
+
|
40 |
df_out = df.groupby(by=['id', 'prompt']) \
|
41 |
+
.agg({'id': 'first', 'prompt': 'first', 'response': get_cos_sim}) \
|
42 |
+
.rename(columns={'response': 'flexibility'}) \
|
43 |
.reset_index(drop=True)
|
44 |
return df_out
|
45 |
|