File size: 8,408 Bytes
deaec20 3404a18 deaec20 d3baca2 deaec20 d3baca2 deaec20 d3baca2 deaec20 3404a18 d3baca2 deaec20 3404a18 deaec20 3404a18 deaec20 3404a18 deaec20 3404a18 deaec20 d3baca2 deaec20 3404a18 deaec20 d3baca2 deaec20 d3baca2 deaec20 3404a18 deaec20 3404a18 deaec20 bf4d19f deaec20 d3baca2 deaec20 d3baca2 deaec20 d3baca2 deaec20 ee6e98a bf4d19f deaec20 efa9e01 d3baca2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
# -*- coding: utf-8 -*-
"""disaster_help_ner_production.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1_wqnv01NeDdOLp2X1KT6WfvNgVwwTu5a
Note: This is a continuation of [this](https://colab.research.google.com/drive/1HlZLIVcAtWVeitZjWV3LclrH3gBwuymp?usp=sharing) notebook:
# Imports
"""
import json
import requests
import configparser
import spacy
import spacy_transformers
from spacy import displacy
from spacy.tokens import Span
from transformers import pipeline
from spacy.matcher import PhraseMatcher
import csv
import numpy as np
import pandas as pd
import geopy
import gradio as gr
from geopy.geocoders import Nominatim
"""# Telegram """
# bot_token = 'get from user using gradio'
offset = None
def get_data(bot_token):
global offset
try:
if offset == None:
response = requests.get("https://api.telegram.org/bot{}/getUpdates".format(bot_token))
response_json = json.loads(response.text)
last_update_id = int(response_json['result'][-1]['update_id'])
# without 'last_update_id + 1' there will be duplicate results
offset = last_update_id + 1
else:
response = requests.get('https://api.telegram.org/bot{}/getUpdates?offset={}'.format(bot_token, offset))
response_json = json.loads(response.text)
last_update_id = int(response_json['result'][-1]['update_id'])
# without 'last_update_id + 1' there will be duplicate results
offset = last_update_id + 1
text_list = [r['channel_post']['text'] for r in response_json['result']]
return text_list
except KeyError:
# print('An error occurred. Possibly empty request result or your Telegram Bot Token is incorrect.')
error_list = ['An error occurred. Possibly empty request result or your Telegram Bot Token is incorrect.']
return error_list
except Exception as e:
# print('An error occurred. Possibly empty request result or your Telegram Bot Token is incorrect.') #, e
error_list = ['An error occurred. Possibly empty request result or your Telegram Bot Token is incorrect.']
return error_list
"""# Classifier"""
def classify_message(bot_token):
error_msg = ['An error occurred. Possibly empty request result or your Telegram Bot Token is incorrect.']
disaster_docs = []
classifier = pipeline("sentiment-analysis", model="Madhana/disaster_msges_classifier_v1")
results = []
for data in get_data(bot_token):
if data == error_msg[0]:
return error_msg
classification = classifier(data)
label = classification[0]['label']
results.append((data, label))
if label == 'DISASTER':
disaster_docs.append(data)
return disaster_docs
"""# NER Pipeline"""
@spacy.Language.component("disaster_ner")
def disaster_ner(doc):
matcher = PhraseMatcher(doc.vocab)
patterns = list(nlp.tokenizer.pipe(Tamil_words))
matcher.add("Tamil_words", None, *patterns)
matches = matcher(doc)
spans = [Span(doc, start, end, label="YO!") for match_id, start, end in matches]
doc.ents = spans
return doc
Tamil_words = ['மதனா பாலா'] # umm, that's my name in Tamil, consider this as a easter egg in this app lol.
nlp = spacy.load("en_pipeline")
nlp.add_pipe("disaster_ner", name="disaster_ner", before='ner')
def create_address(row):
return f"{row['STREET']}, {row['NEIGHBORHOOD']}, {row['CITY']}"
geolocator = Nominatim(user_agent="disaster-ner-app")
def geocode_address(address):
try:
location = geolocator.geocode(address)
return (location.latitude, location.longitude)
except:
return None
"""# With Classifier"""
def get_classifier_ner(bot_token):
data = classify_message(bot_token)
entity_types = ["NAME", "STREET", "NEIGHBORHOOD", "CITY", "PHONE NUMBER","YO!"]
df = pd.DataFrame(columns=["Text"] + entity_types)
for text in data:
doc = nlp(text)
row = [text]
entities = {ent.label_: ent.text for ent in doc.ents}
for entity_type in entity_types:
row.append(entities.get(entity_type, ""))
# html = displacy.render(doc, style="ent")
# row.append(html)
num_cols = len(df.columns)
while len(row) < num_cols:
row.append("")
df.loc[len(df)] = row
df['Address'] = df.apply(create_address, axis=1)
df['Coordinates'] = df['Address'].apply(geocode_address)
return df
"""## Without Classifier"""
def get_ner(bot_token):
data = get_data(bot_token)
entity_types = ["NAME", "STREET", "NEIGHBORHOOD", "CITY", "PHONE NUMBER","YO!"]
df = pd.DataFrame(columns=["Text"] + entity_types)
for text in data:
doc = nlp(text)
row = [text]
entities = {ent.label_: ent.text for ent in doc.ents}
for entity_type in entity_types:
row.append(entities.get(entity_type, ""))
# html = displacy.render(doc, style="ent")
# row.append(html)
num_cols = len(df.columns)
while len(row) < num_cols:
row.append("")
df.loc[len(df)] = row
df['Address'] = df.apply(create_address, axis=1)
df['Coordinates'] = df['Address'].apply(geocode_address)
return df
"""# Gradio"""
def process_ner_data(your_bot_token):
return get_ner(your_bot_token)
def process_classifier_ner_data(your_bot_token):
return get_classifier_ner(your_bot_token)
demo = gr.Blocks()
with demo:
gr.Markdown("Telegram Disaster Recovery Assistant")
with gr.Tabs():
with gr.TabItem("Structured Telegram Messages"):
with gr.Row():
your_bot_token = gr.Textbox(type='password', label="Enter your Bot Token")
ner_df = gr.Dataframe(headers=["NAME", "STREET", "NEIGHBORHOOD", "CITY", "PHONE NUMBER","YO!"])
classifier_ner_button = gr.Button("Get Classifier-NER Output")
ner_button = gr.Button("Get NER Output")
clear = gr.Button("Clear")
with gr.TabItem("User Guide"):
with gr.Row():
gr.Markdown("""This is an Telegram based Disaster Recovery Assist app that uses Named Entity Recognition to extract important entities from the unstructured text and stores it in an dataframe.
You need to provide your personal Telegram Bot API token (API token of the bot that is added to the channel as an administrator) to use this app.
**Steps to create a Telegram Bot**:
1. Download the Telegram app on your device or use the web version.
2. Open the app and search for the "BotFather" bot.
3. Start a chat with the BotFather bot by clicking on the "START" button.
4. Type "/newbot" and follow the on-screen instructions to create a new bot.
5. Choose a name and username for your bot. \6. Once your bot is created, the BotFather will give you a unique API token.
**Steps to add your telegram bot to your channel as an administrator**:
1. Create a new channel or choose an existing one that you want to use the bot in.
2. Add your bot to the channel as an administrator. To do this, go to the channel settings, click on "Administrators", and then click on "Add Administrator". Search for your bot and add it to the channel.
3. Now you can send commands to the bot in the channel by mentioning the bot using the "@" symbol followed by the bot's username. For example, "@my_bot help" will send the "help" command to the bot.
**Get Classifier-NER Output VS Get NER Output**:
The 'Get Classifier Ner Output' function first classifies the message as either a disaster message or a random message, and then applies the NER pipeline to the classified output. In contrast, the 'Get NER Output' function applies the NER pipeline directly to the message.
*If you get any errors or dependency issues, feel free to reach out to me!*""")
ner_button.click(process_ner_data,inputs=your_bot_token, outputs=ner_df)
classifier_ner_button.click(process_classifier_ner_data,inputs=your_bot_token, outputs=ner_df)
clear.click(lambda: None, None, ner_df, queue=True)
demo.queue(concurrency_count=3)
demo.launch() # share=True, debug=True
|