Add description
Browse files- app.py +5 -0
- description.md +29 -0
- preamble.md +3 -0
- requirements.txt +1 -1
app.py
CHANGED
@@ -59,7 +59,10 @@ def generate_diff(text_a: str, text_b: str, method: str):
|
|
59 |
my_pipeline = None
|
60 |
tokenizer = Whitespace()
|
61 |
|
|
|
62 |
with gr.Blocks() as demo:
|
|
|
|
|
63 |
with gr.Row():
|
64 |
text_a = gr.Textbox(label="Text A", value="Chinese shares close higher Friday.", lines=2)
|
65 |
text_b = gr.Textbox(label="Text B", value="Les actions chinoises clôturent en baisse mercredi.", lines=2)
|
@@ -77,6 +80,8 @@ with gr.Blocks() as demo:
|
|
77 |
inputs=[text_a, text_b, method],
|
78 |
outputs=[output_a, output_b],
|
79 |
)
|
|
|
|
|
80 |
|
81 |
|
82 |
if my_pipeline is None:
|
|
|
59 |
my_pipeline = None
|
60 |
tokenizer = Whitespace()
|
61 |
|
62 |
+
|
63 |
with gr.Blocks() as demo:
|
64 |
+
preamble = (Path(__file__).parent / "preamble.md").read_text()
|
65 |
+
gr.Markdown(preamble)
|
66 |
with gr.Row():
|
67 |
text_a = gr.Textbox(label="Text A", value="Chinese shares close higher Friday.", lines=2)
|
68 |
text_b = gr.Textbox(label="Text B", value="Les actions chinoises clôturent en baisse mercredi.", lines=2)
|
|
|
80 |
inputs=[text_a, text_b, method],
|
81 |
outputs=[output_a, output_b],
|
82 |
)
|
83 |
+
description = (Path(__file__).parent / "description.md").read_text()
|
84 |
+
gr.Markdown(description)
|
85 |
|
86 |
|
87 |
if my_pipeline is None:
|
description.md
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Small print
|
2 |
+
|
3 |
+
<p style="background-color: #fff9f9; border: 1px solid #ff0000; padding: 10px;">
|
4 |
+
Warning: This demo is highly experimental and not ready for production use.
|
5 |
+
</p>
|
6 |
+
|
7 |
+
This demo is a proof of concept for visualizing the semantic differences between two text documents.
|
8 |
+
The input documents may or may not be written in the same language.
|
9 |
+
|
10 |
+
In our paper, we evaluate three simple, unsupervised approaches based on BERT-like encoder models.
|
11 |
+
This demo implements the approaches `DiffAlign` and `DiffDel` using the model [ZurichNLP/unsup-simcse-xlm-roberta-base](https://huggingface.co/ZurichNLP/unsup-simcse-xlm-roberta-base). See the [XLM-R model](https://huggingface.co/xlm-roberta-base) for a list of supported languages.
|
12 |
+
|
13 |
+
The third approach, `DiffMask`, was not included in the demo because it is very slow.
|
14 |
+
|
15 |
+
More resources:
|
16 |
+
- Paper: https://arxiv.org/abs/2305.13303
|
17 |
+
- Code: https://github.com/ZurichNLP/recognizing-semantic-differences
|
18 |
+
|
19 |
+
## Citation
|
20 |
+
```bibtex
|
21 |
+
@article{vamvas-sennrich-2023-rsd,
|
22 |
+
title={Towards Unsupervised Recognition of Semantic Differences in Related Documents},
|
23 |
+
author={Jannis Vamvas and Rico Sennrich},
|
24 |
+
year={2023},
|
25 |
+
eprint={2305.13303},
|
26 |
+
archivePrefix={arXiv},
|
27 |
+
primaryClass={cs.CL}
|
28 |
+
}
|
29 |
+
```
|
preamble.md
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
# Diff tool for natural language text
|
2 |
+
|
3 |
+
Demo for the EMNLP 2023 paper ["Towards Unsupervised Recognition of Semantic Differences in Related Documents"](https://arxiv.org/abs/2305.13303).
|
requirements.txt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
transformers==4.34.0
|
2 |
--find-links https://download.pytorch.org/whl/cpu
|
3 |
-
torch
|
|
|
1 |
transformers==4.34.0
|
2 |
--find-links https://download.pytorch.org/whl/cpu
|
3 |
+
torch==2.0.1
|