Spaces:

tinystyler
/

tinystyler_demo

Running

App Files Files Community

AjayP13 commited on Jun 5, 2024

Commit

fbfca4a

verified ·

1 Parent(s): 37aa083

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -1

app.py CHANGED Viewed

@@ -1,9 +1,12 @@
 import torch
 import numpy as np
 from torch.nn.utils.rnn import pad_sequence
 import gradio as gr
 from transformers import AutoModel, AutoModelForSeq2SeqLM, AutoTokenizer
 from sentence_transformers import SentenceTransformer
 from time import time
 # Load the model and tokenizer
@@ -16,7 +19,7 @@ embedding_model = SentenceTransformer('AnnaWegmann/Style-Embedding', device='cpu
 luar_model = AutoModel.from_pretrained("rrivera1849/LUAR-MUD", revision="51b0d9ecec5336314e02f191dd8ca4acc0652fe1", trust_remote_code=True).half()
 luar_model.to(device)
 luar_tokenizer = AutoTokenizer.from_pretrained("rrivera1849/LUAR-MUD", revision="51b0d9ecec5336314e02f191dd8ca4acc0652fe1", trust_remote_code=True)
 def get_target_style_embeddings(target_texts_batch):
     all_target_texts = [target_text for target_texts in target_texts_batch for target_text in target_texts]
@@ -43,6 +46,19 @@ def get_luar_embeddings(texts_batch):
     attention_mask = torch.stack(padded_attention_mask)
     return luar_model(input_ids=input_ids, attention_mask=attention_mask).float().cpu().numpy()
 def run_tinystyler_batch(source_texts, target_texts_batch, reranking, temperature, top_p):
     inputs = tokenizer(source_texts, return_tensors="pt").to(device)
     target_style_embeddings = get_target_style_embeddings(target_texts_batch)
@@ -50,6 +66,8 @@ def run_tinystyler_batch(source_texts, target_texts_batch, reranking, temperatur
     print("Log 0", time(), source_style_luar_embeddings.shape)
     target_style_luar_embeddings = get_luar_embeddings(target_texts_batch)
     print("Log 1", time(), target_style_luar_embeddings.shape)
     # Generate the output with specified temperature and top_p
@@ -67,6 +85,7 @@ def run_tinystyler_batch(source_texts, target_texts_batch, reranking, temperatur
     # Evaluate candidates
     candidates_luar_embeddings = [get_luar_embeddings([[candidates[i]] for candidates in generated_texts]) for i in range(reranking)]
     print("Log 3", time(), len(candidates_luar_embeddings), len(candidates_luar_embeddings[0]))
     # Get best based on re-ranking

+import itertools
 import torch
+from statistics import mean
 import numpy as np
 from torch.nn.utils.rnn import pad_sequence
 import gradio as gr
 from transformers import AutoModel, AutoModelForSeq2SeqLM, AutoTokenizer
 from sentence_transformers import SentenceTransformer
+from mutual_implication_score import MIS
 from time import time
 # Load the model and tokenizer
 luar_model = AutoModel.from_pretrained("rrivera1849/LUAR-MUD", revision="51b0d9ecec5336314e02f191dd8ca4acc0652fe1", trust_remote_code=True).half()
 luar_model.to(device)
 luar_tokenizer = AutoTokenizer.from_pretrained("rrivera1849/LUAR-MUD", revision="51b0d9ecec5336314e02f191dd8ca4acc0652fe1", trust_remote_code=True)
+mis_model = MIS(device=device)
 def get_target_style_embeddings(target_texts_batch):
     all_target_texts = [target_text for target_texts in target_texts_batch for target_text in target_texts]
     attention_mask = torch.stack(padded_attention_mask)
     return luar_model(input_ids=input_ids, attention_mask=attention_mask).float().cpu().numpy()
+def compute_mis(texts, target_texts_batch):
+    a_texts = list(itertools.chain.from_iterable([[st] * len(target_texts) for st, target_texts in zip(source_texts, target_texts_batch)]))
+    b_texts = list(itertools.chain.from_iterable(target_texts_batch))
+    scores = mis.compute(a_texts, b_texts, batch_size=len(a_texts))
+    for idx, (score, a_text, b_text) in enumerate(zip(scores, a_texts, b_texts)):
+        if a_text == b_text:
+            scores[idx] = 1.0
+    final_scores = []
+    current_idx = 0
+    for target_texts in target_texts_batch:
+        final_scores.append(mean(scores[idx:idx+len(target_texts)]))
+    return final_scores
 def run_tinystyler_batch(source_texts, target_texts_batch, reranking, temperature, top_p):
     inputs = tokenizer(source_texts, return_tensors="pt").to(device)
     target_style_embeddings = get_target_style_embeddings(target_texts_batch)
     print("Log 0", time(), source_style_luar_embeddings.shape)
     target_style_luar_embeddings = get_luar_embeddings(target_texts_batch)
     print("Log 1", time(), target_style_luar_embeddings.shape)
+    baseline_sim = compute_mis(source_texts, target_texts_batch)
+    print("Log 1.5", time(), len(baseline_sim))
     # Generate the output with specified temperature and top_p
     # Evaluate candidates
     candidates_luar_embeddings = [get_luar_embeddings([[candidates[i]] for candidates in generated_texts]) for i in range(reranking)]
+    candidates_sim = [compute_mis([candidates[i] for candidates in generated_texts], target_texts_batch) for i in range(reranking)]
     print("Log 3", time(), len(candidates_luar_embeddings), len(candidates_luar_embeddings[0]))
     # Get best based on re-ranking