thak123 commited on
Commit
42c019f
1 Parent(s): 385285d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -81
app.py CHANGED
@@ -2,28 +2,28 @@ import datasets
2
  import numpy as np
3
  import torch
4
  import transformers
5
- from config import epochs, batch_size, learning_rate
6
  from model import tokenizer, multitask_model
7
  from mtm import MultitaskTrainer, NLPDataCollator, DataLoaderWithTaskname
8
  import pandas as pd
9
  from datasets import Dataset, DatasetDict
10
  from data_predict import convert_to_stsb_features,convert_to_features
11
-
12
  from huggingface_hub import hf_hub_download,snapshot_download
13
 
 
14
 
15
- # features_dict = {}
16
- # extra_feature_dict = {}
17
- # sentinews_location = ""
 
18
 
19
- # df_document_croatian_test = pd.read_csv(sentinews_location+"textlabel.tsv", sep="\t")
20
- # df_document_croatian_test = df_document_croatian_test[["content"]]
21
- def predict():
22
  # gather everyone if you want to have a single DatasetDict
23
  document = DatasetDict({
24
  # "train": Dataset.from_pandas(df_document_sl_hr_train),
25
  # "valid": Dataset.from_pandas(df_document_sl_hr_valid),
26
- "test": Dataset.from_dict({"content":["Volim ti"]})
27
  })
28
 
29
  dataset_dict = {
@@ -45,81 +45,33 @@ def predict():
45
  features_dict = convert_to_features(dataset_dict, convert_func_dict)
46
 
47
  return features_dict
48
-
49
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
50
-
51
- #model_link = snapshot_download(repo_id="FFZG-cleopatra/Croatian-News-Classifier")
52
- model_link = hf_hub_download(repo_id="FFZG-cleopatra/Croatian-News-Classifier",filename = "pytorch_model.bin")
53
-
54
- # multitask_model.from_pretrained(, config="/media/gaurish/angela/projects/CroatianSlovenEnglishBert/i-got-u-brother-cleopatra-workshop/src/models/multitask_model_3ep/config.json")
55
- multitask_model.load_state_dict(torch.load(model_link, map_location=device))
56
-
57
- # multitask_model.to(device)
58
- predictions = []
59
- features_dict = predict()
60
- for _, batch in enumerate(features_dict["document"]['test']):
61
- for key, value in batch.items():
62
- batch[key] = batch[key].to(device)
63
-
64
- task_model = multitask_model.get_model("document")
65
- classifier_output = task_model.forward(
66
- torch.unsqueeze(batch["input_ids"], 0),
67
- torch.unsqueeze(batch["attention_mask"], 0),)
68
 
69
- print(tokenizer.decode(batch["input_ids"],skip_special_tokens=True))
70
- prediction =torch.max(classifier_output.logits, axis=1)
71
- predictions.append(prediction.indices.item())
72
 
73
- print("p:", predictions)
74
- # pd.DataFrame({"original_predictions":predictions}).to_csv("eacl_slavic.tsv")
75
 
76
-
77
- trainer = MultitaskTrainer(
78
- model=multitask_model,
79
- args=transformers.TrainingArguments(
80
- learning_rate=learning_rate,
81
- output_dir="/tmp",
82
- do_train=False,
83
- do_eval=True,
84
- # evaluation_strategy ="steps",
85
- # num_train_epochs=epochs,
86
- # fp16=True,
87
- # Adjust batch size if this doesn't fit on the Colab GPU
88
- per_device_train_batch_size=batch_size,
89
- per_device_eval_batch_size=batch_size,
90
- save_steps=3000,
91
- # eval_steps=50,
92
- load_best_model_at_end=True,
93
-
94
- ),
95
- data_collator=NLPDataCollator(tokenizer=tokenizer),
96
- callbacks=[],
97
-
98
  )
99
- print(features_dict["document"]["test"])
100
- tests_dict = {}
101
- for task_name in ["document"]: # "paragraph", "sentence"
102
- test_dataloader = DataLoaderWithTaskname(
103
- task_name,
104
- trainer.get_eval_dataloader(features_dict[task_name]["test"])
105
- )
106
- print(len(trainer.get_eval_dataloader(features_dict[task_name]["test"])))
107
- print(test_dataloader.data_loader.collate_fn)
108
- print(len(test_dataloader.data_loader))
109
- tests_dict[task_name] = trainer.prediction_loop(
110
- test_dataloader,
111
- description=f"Testing: {task_name}"
112
- )
113
- print(tests_dict)
114
- for task_name in ["document", ]: #"paragraph","sentence"
115
- for metric in ["precision", "recall", "f1"]:
116
- print("test {} {}:".format(metric, task_name),
117
- datasets.load_metric(metric,
118
- name="dev {} {}".format(metric, task_name)).compute(
119
- predictions=np.argmax(
120
- tests_dict[task_name].predictions, axis=1),
121
- references=tests_dict[task_name].label_ids, average="macro"
122
- ))
123
- print()
124
 
125
 
 
 
 
2
  import numpy as np
3
  import torch
4
  import transformers
5
+ from config import epochs, batch_size, learning_rate, id2label
6
  from model import tokenizer, multitask_model
7
  from mtm import MultitaskTrainer, NLPDataCollator, DataLoaderWithTaskname
8
  import pandas as pd
9
  from datasets import Dataset, DatasetDict
10
  from data_predict import convert_to_stsb_features,convert_to_features
11
+ import gradio as gr
12
  from huggingface_hub import hf_hub_download,snapshot_download
13
 
14
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
 
16
+ model_link = hf_hub_download(repo_id="FFZG-cleopatra/Croatian-News-Classifier",filename = "pytorch_model.bin")
17
+
18
+ multitask_model.load_state_dict(torch.load(model_link, map_location=device))
19
+ multitask_model.to(device)
20
 
21
+ def predict(sentence = "Volim ti"):
 
 
22
  # gather everyone if you want to have a single DatasetDict
23
  document = DatasetDict({
24
  # "train": Dataset.from_pandas(df_document_sl_hr_train),
25
  # "valid": Dataset.from_pandas(df_document_sl_hr_valid),
26
+ "test": Dataset.from_dict({"content":[sentence]})
27
  })
28
 
29
  dataset_dict = {
 
45
  features_dict = convert_to_features(dataset_dict, convert_func_dict)
46
 
47
  return features_dict
48
+ predictions = []
49
+ features_dict = predict()
50
+ for _, batch in enumerate(features_dict["document"]['test']):
51
+ for key, value in batch.items():
52
+ batch[key] = batch[key].to(device)
53
+
54
+ task_model = multitask_model.get_model("document")
55
+ classifier_output = task_model.forward(
56
+ torch.unsqueeze(batch["input_ids"], 0),
57
+ torch.unsqueeze(batch["attention_mask"], 0),)
58
+
59
+ print(tokenizer.decode(batch["input_ids"],skip_special_tokens=True))
60
+ prediction =torch.max(classifier_output.logits, axis=1)
61
+ predictions.append(prediction.indices.item())
 
 
 
 
 
 
62
 
63
+ print("p:", predictions[0] , id2label[predictions[0]] )
64
+ return id2label[predictions[0]]
 
65
 
 
 
66
 
67
+ interface = gr.Interface(
68
+ fn=get_sentiment,
69
+ inputs='text',
70
+ outputs=['text', 'label'],
71
+ title='Sentiment Analysis',
72
+ description='Get the positive/neutral/negative sentiment for the given input.'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
 
76
+ interface.launch(inline = False)
77
+