File size: 2,578 Bytes
15adcdf
bfa4c9d
58a883e
 
bfa4c9d
 
58a883e
bfa4c9d
58a883e
bfa4c9d
 
 
 
 
 
eb05d29
bec4c85
15adcdf
 
58a883e
 
15adcdf
bfa4c9d
 
 
18026b7
 
bfa4c9d
18026b7
 
 
 
 
 
 
 
 
bfa4c9d
18026b7
bfa4c9d
 
18026b7
bfa4c9d
 
 
 
18026b7
bfa4c9d
 
18026b7
 
bfa4c9d
 
 
 
18026b7
 
bfa4c9d
 
 
 
 
 
18026b7
eb05d29
 
 
15adcdf
18026b7
bfa4c9d
 
 
18026b7
 
bfa4c9d
eb05d29
bfa4c9d
eb05d29
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

# import streamlit as st
# from transformers import pipeline

# # Load the model from the Hugging Face Hub
# ner_pipeline = pipeline("ner", model="Beehzod/smart-finetuned-ner")

# # Example predictions
# text = st.text_area('enter text: ')
# results = ner_pipeline(text)

# for entity in results:
#     print(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
#     st.json(entity)


import streamlit as st
from transformers import pipeline

# Load the model from the Hugging Face Hub
ner_pipeline = pipeline("ner", model="Beehzod/smart-finetuned-ner")

# Helper function to combine subword tokens
def merge_entities(entities):
    merged_entities = []
    current_entity = None

    for token in entities:
        token_text = token['word'].replace("##", "")  # Remove subword prefix if any

        # If token starts a new entity
        if token['entity'].startswith('B-') or (current_entity and token['entity'] != current_entity['entity']):
            # Add the previous entity if it exists
            if current_entity:
                # Average the score for all subwords in the entity
                current_entity['score'] /= current_entity['count']
                del current_entity['count']
                merged_entities.append(current_entity)

            # Start a new entity
            current_entity = {
                "word": token_text,
                "entity": token['entity'],
                "score": token['score'],
                "start": token['start'],
                "end": token['end'],
                "count": 1  # Helper count for score averaging
            }
        else:
            # Continue with the current entity
            current_entity["word"] += token_text
            current_entity["end"] = token['end']
            current_entity["score"] += token['score']
            current_entity["count"] += 1

    # Add the last entity if it exists
    if current_entity:
        current_entity['score'] /= current_entity['count']
        del current_entity['count']
        merged_entities.append(current_entity)

    return merged_entities

# Streamlit app to display entities
st.title('Named Entity Recognition (NER) with Streamlit')

text = st.text_area('Enter text for NER analysis:')

# Run NER model and merge results
if text:
    results = ner_pipeline(text)
    merged_results = merge_entities(results)

    # Display results
    for entity in merged_results:
        st.write(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
        st.json(entity)