File size: 1,217 Bytes
d654474 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import pandas as pd
from sentence_transformers.util import cos_sim
from utils.models import SBert
def p0_originality(df: pd.DataFrame, model_name: str) -> pd.DataFrame:
assert 'prompt' in df.columns
assert 'response' in df.columns
model = SBert(model_name)
def get_cos_sim(model, prompt: str, response: str) -> float:
prompt_vec = model(prompt)
response_vec = model(response)
score = cos_sim(prompt_vec, response_vec).item()
return score
df['originality'] = df.apply(lambda x: 1 - get_cos_sim(model, x['prompt'], x['response']), axis=1)
return df
def p1_flexibility(df: pd.DataFrame, model_name: str) -> pd.DataFrame:
df = p0_originality(df, model_name)
assert 'id' in df.columns
df_out = df.groupby(by=['id', 'prompt']) \
.agg({'id': 'first', 'prompt': 'first', 'originality': 'mean'}) \
.rename(columns={'originality': 'flexibility'}) \
.reset_index(drop=True)
return df_out
if __name__ == '__main__':
_df_input = pd.read_csv('data/example_3.csv')
_df_0 = p0_originality(_df_input, 'paraphrase-multilingual-MiniLM-L12-v2')
_df_1 = p1_flexibility(_df_input, 'paraphrase-multilingual-MiniLM-L12-v2')
|