Spaces:
Sleeping
Sleeping
# import streamlit as st | |
# from transformers import pipeline | |
# # Load the model from the Hugging Face Hub | |
# ner_pipeline = pipeline("ner", model="Beehzod/smart-finetuned-ner") | |
# # Example predictions | |
# text = st.text_area('enter text: ') | |
# results = ner_pipeline(text) | |
# for entity in results: | |
# print(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}") | |
# st.json(entity) | |
import streamlit as st | |
from transformers import pipeline | |
# Load the model from the Hugging Face Hub | |
ner_pipeline = pipeline("ner", model="Beehzod/smart-finetuned-ner") | |
# Helper function to combine subword tokens | |
def merge_entities(entities): | |
merged_entities = [] | |
current_entity = None | |
for token in entities: | |
token_text = token['word'].replace("##", "") # Remove subword prefix if any | |
# If token starts a new entity | |
if token['entity'].startswith('B-') or (current_entity and token['entity'] != current_entity['entity']): | |
# Add the previous entity if it exists | |
if current_entity: | |
# Average the score for all subwords in the entity | |
current_entity['score'] /= current_entity['count'] | |
del current_entity['count'] | |
merged_entities.append(current_entity) | |
# Start a new entity | |
current_entity = { | |
"word": token_text, | |
"entity": token['entity'], | |
"score": token['score'], | |
"start": token['start'], | |
"end": token['end'], | |
"count": 1 # Helper count for score averaging | |
} | |
else: | |
# Continue with the current entity | |
current_entity["word"] += token_text | |
current_entity["end"] = token['end'] | |
current_entity["score"] += token['score'] | |
current_entity["count"] += 1 | |
# Add the last entity if it exists | |
if current_entity: | |
current_entity['score'] /= current_entity['count'] | |
del current_entity['count'] | |
merged_entities.append(current_entity) | |
return merged_entities | |
# Streamlit app to display entities | |
st.title('Named Entity Recognition (NER) with Streamlit') | |
text = st.text_area('Enter text for NER analysis:') | |
# Run NER model and merge results | |
if text: | |
results = ner_pipeline(text) | |
merged_results = merge_entities(results) | |
# Display results | |
for entity in merged_results: | |
st.write(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}") | |
st.json(entity) | |