File size: 5,873 Bytes
40cca03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import time

print("\n\n ==== THE NATURAL LANGUAGE MODULE IS BEING LOADED. PLEASE WAIT ==== \n\n")
start_time_load = time.time()

from transformers import logging
logging.set_verbosity_error()

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

import sys

import requests

from countriesIdentification import identify_locations
from datesIdentification import dates_binding
from magnitudeIdentification import magnitude_binding
from comparativesIdentification import comparatives_binding
from earthquaqeIdentification import identify_earthquake_event


def process_final_dict(final_dictionary):
  """
  Function to convert each one of the error codes from each component into a relevant code number to be handled by the SF
  """

  # convert all tuple error messages into dictionary error messages
  for i, elem in enumerate(final_dictionary):
    if isinstance(elem, tuple):

      if elem == (0, "MAGNITUDE", "no_magnitude"):
        final_dictionary[i] = {"Number": 9999911}

      elif elem == (0, "MAGNITUDE", "more_magnitude"):
        final_dictionary[i] = {"Number": 9999912}

      elif elem == (0, "MAGNITUDE", "format_error"):
        final_dictionary[i] = {"Number": 9999914}

      elif elem == (0, "MAGNITUDE", "unknown_error"):
        final_dictionary[i] = {"Number": 9999913}

      elif elem == (0, "EARTHQUAKE_EVENT", "no_earthquake_reference"):
        final_dictionary[i] = {"event":9999921}

      elif elem == (0, "EARTHQUAKE_EVENT", "unknown_error"):
        final_dictionary[i] = {"event": 9999922}

      elif elem == (0,'DATES', 'wrong_date_format'):
        final_dictionary[i] = {"date": {"day": 9999931, "month": 9999931, "year": 9999931}}

      elif elem == (0,'DATES', 'no_date'):
        final_dictionary[i] = {"date": {"day": 9999932, "month": 9999932, "year": 9999932}}

      elif elem == (0,'DATES', 'more_dates'):
        final_dictionary[i] = {"date": {"day": 9999933, "month": 9999933, "year": 9999933}}

      elif elem == (0,'DATES', 'unknown_error'):
        final_dictionary[i] = {"date": {"day": 9999934, "month": 9999934, "year": 9999934}}

      elif elem == (0, "LOCATION", "no_country"):
        final_dictionary[i] = {"city":[9999941], "country":[9999941]}

      elif elem == (0, "LOCATION", "more_city_or_country"):
        final_dictionary[i] = {"city": [9999942], "country": [9999942]}

      elif elem == (0, "LOCATION", "more_country"):
        final_dictionary[i] = {"city": [9999943], "country": [9999943]}

      elif elem == (0, "LOCATION", "unknown_error"):
        final_dictionary[i] = {"city": [9999944], "country": [9999944]}

      elif elem == (0, "COMPARATIVES", "more_comparatives_mentions"):
        final_dictionary[i] = {"comparative": 9999951}

      elif elem == (0, "COMPARATIVES", "no_comparatives"):
        final_dictionary[i] = {"comparative": 9999952}

      elif elem == (0, "COMPARATIVES", "more_symbol_comparatives"):
        final_dictionary[i] = {"comparative": 9999953}

      elif elem == (0, "COMPARATIVES", "unknown_error"):
        final_dictionary[i] = {"comparative": 9999954}

  return final_dictionary



def natural_language_module(sentence):
  """
  Function to execute the complete natural language module pipeline
  """
  try:
    final_dictionary = []

    # identify whether the sentence is referred on earthquake events
    earth_event = identify_earthquake_event(sentence)

    if earth_event:
      final_dictionary.append(earth_event)

    # identify the target country and city in the sentence
    location = identify_locations(sentence)

    if location:
      final_dictionary.append(location)

    # identify the target comparative in the sentence
    comparative = comparatives_binding(sentence)

    if comparative:
      final_dictionary.append(comparative)

    # identify the target date in the sentence
    date = dates_binding(sentence)

    if isinstance(date, list):

        date_dict = date[0]
        date_replc = date[1]

        if date_dict:
          final_dictionary.append(date_dict[0])

          # we also delete the date reference from the sentence so that there will
          # not be any confusion with it for the magnitude identification module
          if len(date_replc) == 1:
            sentence = sentence.replace(date_replc[0], " ")

    # in case it is a tuple we add it as it is and we do not substitute something in the sentence
    elif isinstance(date, tuple):
      final_dictionary.append(date)

    # identify the target magnitude number in the sentence
    magnitude = magnitude_binding(sentence)

    if magnitude:
      final_dictionary.append(magnitude)

    clean_final_dictionary = process_final_dict(final_dictionary)

    result = {}
    for d in clean_final_dictionary:
      result.update(d)

    return result

  except:
    return "\n\n=== AN UNEXPECTED ERROR HAS OCCURED. PLEASE EXECUTE AGAIN THE SCRIPT OR COMMUNICATE WITH THE DEVELOPER TEAM === \n\n"



def process_json_sf(nl_json, sentence):
  """
  Function to conver the captured information an a relevant json format
  """
  try:
    sf_json_format = {
      "text": sentence,
      "page": "1",
      "nlp": {"event": nl_json['event'], "city": nl_json['city'][0], "country": nl_json['country'][0], "year": int(nl_json['date']['year']), "month": int(nl_json['date']['month']),
              "day": int(nl_json['date']['day']), "magnitude": nl_json['Number'], "comparative": nl_json['comparative'], "point": False, "latitude": None,"lognitude": None}
    }

    return sf_json_format

  except:
    return "\n\n=== AN UNEXPECTED ERROR HAS OCCURED. PLEASE EXECUTE AGAIN THE SCRIPT OR COMMUNICATE WITH THE DEVELOPER TEAM === \n\n"



def main(sentence):
  """
  Function to bind together all the info and be executed
  """

  nl_data = natural_language_module(sentence)
  nl_json = process_json_sf(nl_data, sentence)

  return nl_json