Spaces:
Sleeping
Sleeping
File size: 2,578 Bytes
15adcdf bfa4c9d 58a883e bfa4c9d 58a883e bfa4c9d 58a883e bfa4c9d eb05d29 bec4c85 15adcdf 58a883e 15adcdf bfa4c9d 18026b7 bfa4c9d 18026b7 bfa4c9d 18026b7 bfa4c9d 18026b7 bfa4c9d 18026b7 bfa4c9d 18026b7 bfa4c9d 18026b7 bfa4c9d 18026b7 eb05d29 15adcdf 18026b7 bfa4c9d 18026b7 bfa4c9d eb05d29 bfa4c9d eb05d29 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# import streamlit as st
# from transformers import pipeline
# # Load the model from the Hugging Face Hub
# ner_pipeline = pipeline("ner", model="Beehzod/smart-finetuned-ner")
# # Example predictions
# text = st.text_area('enter text: ')
# results = ner_pipeline(text)
# for entity in results:
# print(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
# st.json(entity)
import streamlit as st
from transformers import pipeline
# Load the model from the Hugging Face Hub
ner_pipeline = pipeline("ner", model="Beehzod/smart-finetuned-ner")
# Helper function to combine subword tokens
def merge_entities(entities):
merged_entities = []
current_entity = None
for token in entities:
token_text = token['word'].replace("##", "") # Remove subword prefix if any
# If token starts a new entity
if token['entity'].startswith('B-') or (current_entity and token['entity'] != current_entity['entity']):
# Add the previous entity if it exists
if current_entity:
# Average the score for all subwords in the entity
current_entity['score'] /= current_entity['count']
del current_entity['count']
merged_entities.append(current_entity)
# Start a new entity
current_entity = {
"word": token_text,
"entity": token['entity'],
"score": token['score'],
"start": token['start'],
"end": token['end'],
"count": 1 # Helper count for score averaging
}
else:
# Continue with the current entity
current_entity["word"] += token_text
current_entity["end"] = token['end']
current_entity["score"] += token['score']
current_entity["count"] += 1
# Add the last entity if it exists
if current_entity:
current_entity['score'] /= current_entity['count']
del current_entity['count']
merged_entities.append(current_entity)
return merged_entities
# Streamlit app to display entities
st.title('Named Entity Recognition (NER) with Streamlit')
text = st.text_area('Enter text for NER analysis:')
# Run NER model and merge results
if text:
results = ner_pipeline(text)
merged_results = merge_entities(results)
# Display results
for entity in merged_results:
st.write(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
st.json(entity)
|