Spaces:
Runtime error
Runtime error
File size: 4,458 Bytes
c085a6a 95ba32b 3087373 95ba32b 4a3a4a3 95ba32b 3087373 95ba32b 4a3a4a3 95ba32b 3087373 95ba32b 60d2d8a 95ba32b 60d2d8a 95ba32b f0296d1 95ba32b f0296d1 95ba32b f0296d1 95ba32b f0296d1 95ba32b f0296d1 95ba32b f0296d1 95ba32b f0296d1 95ba32b f0296d1 95ba32b 3087373 95ba32b 4a3a4a3 3087373 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import os
import streamlit as st
import difflib
import spacy
import streamlit_analytics
from utils import add_logo_to_sidebar, add_footer
@st.cache(allow_output_mutation=True)
def load_model():
return spacy.load('en_core_web_md')
streamlit_analytics.start_tracking()
## Layout stuff
st.set_page_config(
page_title="Compare Demo",
page_icon="π",
layout="wide",
initial_sidebar_state="expanded",
menu_items={
'Get Help': 'mailto:[email protected]',
'Report a bug': None,
'About': "## This a demo showcasing different Legal AI Actions"
}
)
add_logo_to_sidebar()
st.title('π Compare Demo')
st.write("""This demo shows how AI can be used to compare passages of text.""")
with st.spinner('βοΈ Loading model...'):
nlp = load_model()
EXAMPLE_TEXT_1 = """This Agreement shall be governed by and interpreted under the laws of the
State of Delaware without regard to its conflicts of law provisions."""
EXAMPLE_TEXT_2 = """This agreement will be governed by and must be construed in accordance with the laws of the State of Israel."""
col1, col2 = st.columns(2)
with col1:
st.markdown('### π Enter a passage of text')
text_1 = st.text_area('Enter a passage of text', label_visibility='collapsed', value=EXAMPLE_TEXT_1, height=100, key='input1')
with col2:
st.markdown('### π Enter a second passage of text')
text_2 = st.text_area('Enter a second passage of text', label_visibility='collapsed', value=EXAMPLE_TEXT_2, height=100, key='input2')
button = st.button('Compare', type='primary', use_container_width=True)
def get_tokens(doc):
return [token.lower for token in doc]
def get_pos_tags(doc):
return [token.pos_ for token in doc]
def add_md_color(text, match):
color = 'green' if match else 'red'
return f":{color}[{text}]"
def add_em(text, match):
if match:
return f"**{text}**"
else:
return f"*{text}*"
def create_str_output(doc, idxs):
out = []
for token in doc:
text = token.text
# higlight word diff
if any(token.i in range(start, end) for start, end in idxs):
text = add_md_color(text, match=True)
else:
text = add_md_color(text, match=False)
out.append(text)
return ' '.join(out)
def get_matching_idxs(items_1, items_2):
sm = difflib.SequenceMatcher(None, items_1, items_2)
matching_blocks = [match for match in sm.get_matching_blocks()]
doc_1_matching_idxs = []
doc_2_matching_idxs = []
for a, b, n in matching_blocks:
doc_1_matching_idxs.append((a, a + n))
doc_2_matching_idxs.append((b, b + n))
return doc_1_matching_idxs, doc_2_matching_idxs
if button:
with st.spinner('βοΈ Comparing Texts...'):
doc_1 = nlp(text_1)
doc_2 = nlp(text_2)
st.header('π§ͺ Comparison')
st.markdown('We can highlight the :green[similarities] and :red[differences] in **wording** across the two texts.')
doc_1_token_idxs, doc_2_token_idxs = get_matching_idxs(get_tokens(doc_1), get_tokens(doc_2))
col1, col2 = st.columns(2)
with col1:
st.markdown(create_str_output(doc_1, doc_1_token_idxs))
with col2:
st.markdown(create_str_output(doc_2, doc_2_token_idxs))
col1, col2, col3 = st.columns(3)
with col1:
# perform simple sequence matching
sm = difflib.SequenceMatcher(None, get_tokens(doc_1), get_tokens(doc_2))
st.subheader('π Textual Similarity')
st.markdown('We can measure the similarity based on the *wording* of the two texts.')
st.metric(label='Textual Similarity', value=f"{sm.ratio() * 100:.1f}%")
with col2:
st.subheader('π Linguistic Similarity')
st.markdown('We can measure the similarity based on the *linguistic features* of the two texts.')
postags_1 = [token.pos_ for token in doc_1]
postags_2 = [token.pos_ for token in doc_2]
sm = difflib.SequenceMatcher(None, postags_1, postags_2)
st.metric(label='Linguistic Similarity', value=f"{sm.ratio() * 100:.1f}%")
with col3:
st.subheader('π Semantic Similarity')
st.markdown('We can measure the similarity based on the *meaning* of the two texts.')
st.metric(label='Semantic Similarity', value=f"{doc_1.similarity(doc_2) * 100:.1f}%")
add_footer()
streamlit_analytics.stop_tracking(unsafe_password=os.environ["ANALYTICS_PASSWORD"]) |