Spaces:

jrc-ai
/

MultiNER-simplified

Running

App Files Files Community

jattokatarratto commited on 4 days ago

Commit

05e5fac

verified ·

1 Parent(s): 9bb2916

Update app.py

Browse files

Files changed (1) hide show

app.py +139 -109

app.py CHANGED Viewed

@@ -20,11 +20,7 @@ from typing import Dict
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from collections import Counter
-#os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
-#os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
 import torch
-#torch.cuda.empty_cache()  # Clear cache ot torch
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 print(f"Device: {device}...")
@@ -81,9 +77,9 @@ POSSIBLE_KGchoices_List = ["AEO", "BFO", "BIM", "BCGO", "CL", "CHIRO", "CHEBI",
              "GeoSPARQL", "HL7", "DOID", "HP", "HP_O", "IDO", "IAO", "ICD10", "LOINC", "MESH",
              "MONDO", "NCIT", "NCBITAXON", "NCBITaxon_", "NIFCELL", "NIFSTD", "GML", "OBCS", "OCHV", "OHPI",
              "OPB", "TRANS", "PLOSTHES", "RADLEX", "RO", "STY", "SO", "SNOMED", "STATO",
-             "SYMP", "FoodOn", "UBERON", "VO", "EuroSciVoc"]
-ONLY_Ontologies_OnBIOPORTAL = ["AEO", "BCGO", "BFO", "BIM", "CHEBI", "CHIRO", "CL", "DCM", "DOID", "FMA", "FOODON", "GENO", "GML", "GO", "GEOSPARQL", "HL7", "HP", "HP_O", "IAO", "ICD10", "IDO", "LOINC", "MESH", "MONDO", "NCBITAXON", "NCIT", "NIFCELL", "NIFSTD", "OBCS", "OCHV", "OHPI", "OPB", "PLOSTHES", "RADLEX", "OBOREL", "SNOMEDCT", "SO", "STATO", "STY", "SYMP", "PTRANS", "UBERON", "VO"]
 encod = encoding_getter('microsoft/deberta-v3-large')
 text_splitter = TokenTextSplitter(
@@ -897,7 +893,8 @@ def entitiesFusion(df_annotated, args):
     #delete all the rows with score smaller than entities_filter_threshold:
     if args.entities_filter_threshold > 0:
-        df_annotated = df_annotated[df_annotated['score'] >= args.entities_filter_threshold]
         if df_annotated.empty:
             return df_annotated
@@ -1067,7 +1064,7 @@ def geonames_api_call(word, args, key_geonames, cache_map_geonames):
         return None, singleContext, globalContext, singleTriples, globalTriples, cache_map_geonames
-def getUrlBioAndAllOtherBioConcepts(word, args, key_virtuoso, cache_map_virtuoso, endpoint, VirtuosoUsername, contextWordVirtuoso, UseBioportalForLinking=False ):
     #UseBioportalForLinking = False #trial to del
     if strtobool(args.debug):
@@ -1177,7 +1174,7 @@ def getUrlBioAndAllOtherBioConcepts(word, args, key_virtuoso, cache_map_virtuoso
             if strtobool(args.debug):
                 print("Use Virtuoso Sparql endpoint for linking ... " + word.lower())
-            responseText = sparqlQuery(endpoint, query, VirtuosoUsername, key_virtuoso, strtobool(args.USE_CACHE))
             # Parse the response as JSON
             results = json.loads(responseText)
@@ -1360,6 +1357,7 @@ def getUrlBioAndAllOtherBioConcepts(word, args, key_virtuoso, cache_map_virtuoso
 def getLinearTextualContextFromTriples(word,labelTriplesLIST, text_splitter, args, map_query_input_output, cleanInput=True, questionText=""):
     # trial
@@ -1463,20 +1461,20 @@ def getLinearTextualContextFromTriples(word,labelTriplesLIST, text_splitter, arg
         #     # Check if the current index is a multiple of nn
         #     if i % nn == 0:
         #         #print("elaborate RAG triples")
-        #
         #         #df_retrieved_Base = RAG_retrieval_Base(questionText, passages, min_threshold=0.7, max_num_passages=20)
         #         #df_retrievedZscore = RAG_retrieval_Z_scores(questionText, passages, z_threshold=1.0,  max_num_passages=20, min_threshold=0.7)
         #         #df_retrievedPercentile = RAG_retrieval_Percentile(questionText, passages, percentile=90,  max_num_passages=20, min_threshold=0.7)
         #         df_retrievedtopk = RAG_retrieval_TopK(questionText, passages, top_fraction=0.1, max_num_passages=20, min_threshold=0.7)
-        #
         #         passages = []
-        #
         #         df_retrieved = df_retrievedtopk.copy()
         #         if not df_retrieved.empty:
         #             #labelTriplesLIST_RAGGED = df_retrieved.to_records(index=False).tolist()
         #             labelTriplesLIST_RAGGED = df_retrieved['Passage'].apply(lambda x: (x,)).tolist()
         #             labelTriplesAPP = ". ".join(" ".join(str(element).capitalize() for element in triple) for triple in labelTriplesLIST_RAGGED)
-        #
         #             if not labelTriples:
         #                 labelTriples =labelTriplesAPP
         #             else:
@@ -1504,14 +1502,14 @@ def getLinearTextualContextFromTriples(word,labelTriplesLIST, text_splitter, arg
     if not(labelTriples) or labelTriples.strip=="":
-        logging.warning("No text or prompt supplied! Skypping it!")
         return "", map_query_input_output
     if token_counter(labelTriples, args.model_name) > args.tokens_max:  # THE CONTEXT IS TOO BIG, BIGGER THAN tokens_max, I need to split
         texts = text_splitter.create_documents([labelTriples])
         labelTriples = texts[0].page_content
         if not (labelTriples) or labelTriples.strip == "":
-            logging.warning("after splitting ...No text or prompt supplied! Skypping it!")
             return "", map_query_input_output
@@ -1604,6 +1602,7 @@ def getLinearTextualContextFromTriples(word,labelTriplesLIST, text_splitter, arg
     return contextText, map_query_input_output
 #@mem.cache
 def virtuoso_api_call(word, text_splitter, args, key_virtuoso, cache_map_virtuoso, load_map_query_input_output, id=None, iALLURIScontextFromNCBO=None,UseBioportalForLinking=True,questionText=""):
@@ -2036,102 +2035,111 @@ def virtuoso_api_call(word, text_splitter, args, key_virtuoso, cache_map_virtuos
 def process_row4Linking(row, text_splitter, args, key_geonames, cache_map_geonames, key_virtuoso, cache_map_virtuoso, load_map_query_input_output):
-    result = None
-    singleContext = None
-    globalContext = None
-    singleTriples = None
-    globalTriples = None
     ALLURIScontext = []
-    InRagMode=False
-    if hasattr(args, 'useBioKgRAG') and (strtobool(args.useBioKgRAG)==True):
-        InRagMode = True
-    if (InRagMode==False):
-        if row['IsGeo'] == 1:
-            if strtobool(args.debug):
-                print(f"\n----- IsGeo ... COMPUTING {row['word']} IN THE TEXT:")
-                print(row[args.source_column])
-            result, singleContext, globalContext, singleTriples, globalTriples, cache_map_geonames = geonames_api_call(row['word'], args, key_geonames, cache_map_geonames)
-        elif row['IsBio'] == 1:
-                # Check if '@id' column exists in df_Extract
-                iiid = None
-                # Check if the '@id' exists in the Series
-                if '@id' in row:
-                    # Check if the value is not None or NaN
-                    if row['@id'] is not None and not pd.isna(row['@id']):
-                        # Assign the value to the variable iiid
-                        iiid = row['@id']
-                iiiALLURIScontextFromNCBO = None
-                if 'ALLURIScontextFromNCBO' in row:
-                    if row['ALLURIScontextFromNCBO'] is not None and isinstance(row['ALLURIScontextFromNCBO'], list): #and not pd.isna(row['ALLURIScontextFromNCBO']):
-                        iiiALLURIScontextFromNCBO=row['ALLURIScontextFromNCBO']
-                        iiiALLURIScontextFromNCBO = list(set(iiiALLURIScontextFromNCBO))
                 if strtobool(args.debug):
-                    print(f"\n----- isBio COMPUTING ... {row['word']} IN THE TEXT:")
                     print(row[args.source_column])
-                result, ALLURIScontext, singleContext, globalContext, singleTriples, globalTriples, cache_map_virtuoso, load_map_query_input_output = virtuoso_api_call(row['word'], text_splitter, args, key_virtuoso, cache_map_virtuoso, load_map_query_input_output, id=iiid, iALLURIScontextFromNCBO=iiiALLURIScontextFromNCBO, UseBioportalForLinking=True, questionText=row[args.source_column])
-        else:
-            if row['model'] == "Forced":
-                # Check if '@id' column exists in df_Extract
-                iiid = None
-                # Check if the '@id' exists in the Series
-                if '@id' in row:
-                    # Check if the value is not None or NaN
-                    if row['@id'] is not None and not pd.isna(row['@id']):
-                        # Assign the value to the variable iiid
-                        iiid = row['@id']
-                iiiALLURIScontextFromNCBO = None
-                if 'ALLURIScontextFromNCBO' in row:
-                    if row['ALLURIScontextFromNCBO'] is not None and isinstance(row['ALLURIScontextFromNCBO'],
-                                                                                list):  # and not pd.isna(row['ALLURIScontextFromNCBO']):
-                        iiiALLURIScontextFromNCBO = row['ALLURIScontextFromNCBO']
-                        iiiALLURIScontextFromNCBO = list(set(iiiALLURIScontextFromNCBO))
-                if strtobool(args.debug):
-                    print(f"\n----- isForced COMPUTING ... {row['word']} IN THE TEXT:")
-                    print(row[args.source_column])
-                result, ALLURIScontext, singleContext, globalContext, singleTriples, globalTriples, cache_map_virtuoso, load_map_query_input_output = virtuoso_api_call(
-                    row['word'], text_splitter, args, key_virtuoso, cache_map_virtuoso, load_map_query_input_output,
-                    id=iiid, iALLURIScontextFromNCBO=iiiALLURIScontextFromNCBO,UseBioportalForLinking=True,questionText=row[args.source_column])
-                if not result:  #try annotation without bioportal
                     result, ALLURIScontext, singleContext, globalContext, singleTriples, globalTriples, cache_map_virtuoso, load_map_query_input_output = virtuoso_api_call(
                         row['word'], text_splitter, args, key_virtuoso, cache_map_virtuoso, load_map_query_input_output,
-                        id=iiid, iALLURIScontextFromNCBO=iiiALLURIScontextFromNCBO, UseBioportalForLinking=False,questionText=row[args.source_column])
-    else:
-        if (row['IsBio'] == 1) or ( (pd.isnull(row["IsBio"]) or row["IsBio"] == '' or row['IsBio'] == 0 or row["IsBio"] is None) and (row['entity_group'] == "MISC") ):
-            if strtobool(args.debug):
-                print(f"\n----- InRagMode ...COMPUTING ... {row['word']} IN THE TEXT:")
-                print(row[args.source_column])
-                # Check if '@id' column exists in df_Extract
-                iiid = None
-                # Check if the '@id' exists in the Series
-                if '@id' in row:
-                    # Check if the value is not None or NaN
-                    if row['@id'] is not None and not pd.isna(row['@id']):
-                        # Assign the value to the variable iiid
-                        iiid = row['@id']
-                iiiALLURIScontextFromNCBO = None
-                if 'ALLURIScontextFromNCBO' in row:
-                    if row['ALLURIScontextFromNCBO'] is not None and isinstance(row['ALLURIScontextFromNCBO'], list):
-                        iiiALLURIScontextFromNCBO = row['ALLURIScontextFromNCBO']
-                        iiiALLURIScontextFromNCBO = list(set(iiiALLURIScontextFromNCBO))
-            result, ALLURIScontext, singleContext, globalContext, singleTriples, globalTriples, cache_map_virtuoso, load_map_query_input_output = virtuoso_api_call(
-                row['word'], text_splitter, args, key_virtuoso, cache_map_virtuoso, load_map_query_input_output, id=iiid, iALLURIScontextFromNCBO=iiiALLURIScontextFromNCBO,UseBioportalForLinking=True,questionText=row[args.source_column])
-    return result, ALLURIScontext, singleContext, globalContext, singleTriples, globalTriples, cache_map_geonames, cache_map_virtuoso, load_map_query_input_output, row.name
 def parallel_process_Row4Linking(df, text_splitter, args, key_geonames, cache_map_geonames, key_virtuoso, cache_map_virtuoso, load_map_query_input_output):
@@ -2188,16 +2196,29 @@ def elinking(df_annotated_combined, text_splitter, args, key_geonames, cache_map
     else:
         # single processing
         result = df_annotated_combined.apply(lambda row: process_row4Linking(row, text_splitter, args, key_geonames, cache_map_geonames, key_virtuoso, cache_map_virtuoso, load_map_query_input_output), axis=1)
         #
-        df_annotated_combined['namedEntity'] = result.str[0]
-        df_annotated_combined['ALLURIScontext'] = result.str[1]
-        df_annotated_combined['Context'] = result.str[2]
-        df_annotated_combined['ContextGlobal'] = result.str[3]
-        df_annotated_combined['Triples'] = result.str[4]
-        df_annotated_combined['TriplesGlobal'] = result.str[5]
-        cache_map_geonames_AFTER = result.str[6].iloc[-1]
-        cache_map_virtuoso_AFTER = result.str[7].iloc[-1]
-        load_map_query_input_output_AFTER = result.str[8].iloc[-1]  #
     if args.num_cores_eLinking>1:
@@ -2546,19 +2567,28 @@ def nerBio(text, ModelsSelection, CategoriesSelection, ScoreFilt, EntityLinking,
             # ##### this is to pass the links:
             # Create a new column for the entities with links
-            # #df_annotated_combined['entity_with_link'] = df_annotated_combined.apply(lambda row: f"<a href='{row['namedEntity']}'>{row['word']}</a>", axis=1)
             # df_annotated_combined['entity_with_link'] = df_annotated_combined.apply(
-            #      lambda row: f"<a href='{row['namedEntity']}'>{row['word']}</a>" if pd.notnull(row['namedEntity']) else row[
             #          'word'], axis=1)
             #include the expl-rel prefix:
             #df_annotated_combined['entity_with_link'] = df_annotated_combined.apply(
-            #    lambda row: f"<a href='https://expl-rels-dev-vast.apps.ocpt.jrc.ec.europa.eu/?concept={row['namedEntity']}'>{row['word']}</a>" if pd.notnull(row['namedEntity']) else row[
             #        'word'], axis=1)
             df_annotated_combined['entity_with_link'] = df_annotated_combined.apply(
-                lambda
-                    row: f"<a href='https://api-vast.jrc.service.ec.europa.eu/describe//?url={row['namedEntity']}'>{row['word']}</a>" if pd.notnull(
-                    row['namedEntity']) else row[
-                    'word'], axis=1)
             # Create a new dictionary with the entity information and the link
             dict_annotated_combined_NEL = df_annotated_combined[

 from concurrent.futures import ThreadPoolExecutor, as_completed
 from collections import Counter
 import torch
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 print(f"Device: {device}...")
              "GeoSPARQL", "HL7", "DOID", "HP", "HP_O", "IDO", "IAO", "ICD10", "LOINC", "MESH",
              "MONDO", "NCIT", "NCBITAXON", "NCBITaxon_", "NIFCELL", "NIFSTD", "GML", "OBCS", "OCHV", "OHPI",
              "OPB", "TRANS", "PLOSTHES", "RADLEX", "RO", "STY", "SO", "SNOMED", "STATO",
+             "SYMP", "FoodOn", "UBERON", "VO", "OGMS", "EuroSciVoc"]
+ONLY_Ontologies_OnBIOPORTAL = ["AEO", "BCGO", "BFO", "BIM", "CHEBI", "CHIRO", "CL", "DCM", "DOID", "FMA", "FOODON", "GENO", "GML", "GO", "GEOSPARQL", "HL7", "HP", "HP_O", "IAO", "ICD10", "IDO", "LOINC", "MESH", "MONDO", "NCBITAXON", "NCIT", "NIFCELL", "NIFSTD", "OBCS", "OCHV", "OHPI", "OPB", "PLOSTHES", "RADLEX", "OBOREL", "SNOMEDCT", "SO", "STATO", "STY", "SYMP", "PTRANS", "UBERON", "VO", "OGMS"]
 encod = encoding_getter('microsoft/deberta-v3-large')
 text_splitter = TokenTextSplitter(
     #delete all the rows with score smaller than entities_filter_threshold:
     if args.entities_filter_threshold > 0:
+        # df_annotated = df_annotated[df_annotated['score'] >= args.entities_filter_threshold]
+        df_annotated = df_annotated[df_annotated['score'] > args.entities_filter_threshold]
         if df_annotated.empty:
             return df_annotated
         return None, singleContext, globalContext, singleTriples, globalTriples, cache_map_geonames
+def getUrlBioAndAllOtherBioConcepts(word, args, key_virtuoso, cache_map_virtuoso, endpoint, VirtuosoUsername, contextWordVirtuoso, UseBioportalForLinking=False, questionText="" ):
     #UseBioportalForLinking = False #trial to del
     if strtobool(args.debug):
             if strtobool(args.debug):
                 print("Use Virtuoso Sparql endpoint for linking ... " + word.lower())
+            responseText = sparqlQuery(endpoint, questionText, VirtuosoUsername, key_virtuoso, strtobool(args.USE_CACHE))
             # Parse the response as JSON
             results = json.loads(responseText)
 def getLinearTextualContextFromTriples(word,labelTriplesLIST, text_splitter, args, map_query_input_output, cleanInput=True, questionText=""):
     # trial
         #     # Check if the current index is a multiple of nn
         #     if i % nn == 0:
         #         #print("elaborate RAG triples")
+        #
         #         #df_retrieved_Base = RAG_retrieval_Base(questionText, passages, min_threshold=0.7, max_num_passages=20)
         #         #df_retrievedZscore = RAG_retrieval_Z_scores(questionText, passages, z_threshold=1.0,  max_num_passages=20, min_threshold=0.7)
         #         #df_retrievedPercentile = RAG_retrieval_Percentile(questionText, passages, percentile=90,  max_num_passages=20, min_threshold=0.7)
         #         df_retrievedtopk = RAG_retrieval_TopK(questionText, passages, top_fraction=0.1, max_num_passages=20, min_threshold=0.7)
+        #
         #         passages = []
+        #
         #         df_retrieved = df_retrievedtopk.copy()
         #         if not df_retrieved.empty:
         #             #labelTriplesLIST_RAGGED = df_retrieved.to_records(index=False).tolist()
         #             labelTriplesLIST_RAGGED = df_retrieved['Passage'].apply(lambda x: (x,)).tolist()
         #             labelTriplesAPP = ". ".join(" ".join(str(element).capitalize() for element in triple) for triple in labelTriplesLIST_RAGGED)
+        #
         #             if not labelTriples:
         #                 labelTriples =labelTriplesAPP
         #             else:
     if not(labelTriples) or labelTriples.strip=="":
+        logging.warning("getLinearTextualContextFromTriples - No text or prompt supplied! No relevant contextual triples retrieved...Skypping it! Word: "+str(word))
         return "", map_query_input_output
     if token_counter(labelTriples, args.model_name) > args.tokens_max:  # THE CONTEXT IS TOO BIG, BIGGER THAN tokens_max, I need to split
         texts = text_splitter.create_documents([labelTriples])
         labelTriples = texts[0].page_content
         if not (labelTriples) or labelTriples.strip == "":
+            logging.warning("after splitting ...No text or prompt supplied! Skypping it! Word: "+str(word))
             return "", map_query_input_output
     return contextText, map_query_input_output
 #@mem.cache
 def virtuoso_api_call(word, text_splitter, args, key_virtuoso, cache_map_virtuoso, load_map_query_input_output, id=None, iALLURIScontextFromNCBO=None,UseBioportalForLinking=True,questionText=""):
 def process_row4Linking(row, text_splitter, args, key_geonames, cache_map_geonames, key_virtuoso, cache_map_virtuoso, load_map_query_input_output):
+    result = ""  #None
+    singleContext = ""  #None
+    globalContext = ""  #None
+    singleTriples = ""  #None
+    globalTriples = ""  #None
     ALLURIScontext = []
+    try:
+        if row.empty:
+            return result, ALLURIScontext, singleContext, globalContext, singleTriples, globalTriples, cache_map_geonames, cache_map_virtuoso, load_map_query_input_output, row.name
+        InRagMode=False
+        if hasattr(args, 'useBioKgRAG') and (strtobool(args.useBioKgRAG)==True):
+            InRagMode = True
+        if (InRagMode==False):
+            if row['IsGeo'] == 1:
                 if strtobool(args.debug):
+                    print(f"\n----- IsGeo ... COMPUTING {row['word']} IN THE TEXT:")
                     print(row[args.source_column])
+                result, singleContext, globalContext, singleTriples, globalTriples, cache_map_geonames = geonames_api_call(row['word'], args, key_geonames, cache_map_geonames)
+            elif row['IsBio'] == 1:
+                    # Check if '@id' column exists in df_Extract
+                    iiid = None
+                    # Check if the '@id' exists in the Series
+                    if '@id' in row:
+                        # Check if the value is not None or NaN
+                        if row['@id'] is not None and not pd.isna(row['@id']):
+                            # Assign the value to the variable iiid
+                            iiid = row['@id']
+                    iiiALLURIScontextFromNCBO = None
+                    if 'ALLURIScontextFromNCBO' in row:
+                        if row['ALLURIScontextFromNCBO'] is not None and isinstance(row['ALLURIScontextFromNCBO'], list): #and not pd.isna(row['ALLURIScontextFromNCBO']):
+                            iiiALLURIScontextFromNCBO=row['ALLURIScontextFromNCBO']
+                            iiiALLURIScontextFromNCBO = list(set(iiiALLURIScontextFromNCBO))
+                    if strtobool(args.debug):
+                        print(f"\n----- isBio COMPUTING ... {row['word']} IN THE TEXT:")
+                        print(row[args.source_column])
+                    result, ALLURIScontext, singleContext, globalContext, singleTriples, globalTriples, cache_map_virtuoso, load_map_query_input_output = virtuoso_api_call(row['word'], text_splitter, args, key_virtuoso, cache_map_virtuoso, load_map_query_input_output, id=iiid, iALLURIScontextFromNCBO=iiiALLURIScontextFromNCBO, UseBioportalForLinking=True, questionText=row[args.source_column])
+            else:
+                if row['model'] == "Forced":
+                    # Check if '@id' column exists in df_Extract
+                    iiid = None
+                    # Check if the '@id' exists in the Series
+                    if '@id' in row:
+                        # Check if the value is not None or NaN
+                        if row['@id'] is not None and not pd.isna(row['@id']):
+                            # Assign the value to the variable iiid
+                            iiid = row['@id']
+                    iiiALLURIScontextFromNCBO = None
+                    if 'ALLURIScontextFromNCBO' in row:
+                        if row['ALLURIScontextFromNCBO'] is not None and isinstance(row['ALLURIScontextFromNCBO'],
+                                                                                    list):  # and not pd.isna(row['ALLURIScontextFromNCBO']):
+                            iiiALLURIScontextFromNCBO = row['ALLURIScontextFromNCBO']
+                            iiiALLURIScontextFromNCBO = list(set(iiiALLURIScontextFromNCBO))
+                    if strtobool(args.debug):
+                        print(f"\n----- isForced COMPUTING ... {row['word']} IN THE TEXT:")
+                        print(row[args.source_column])
                     result, ALLURIScontext, singleContext, globalContext, singleTriples, globalTriples, cache_map_virtuoso, load_map_query_input_output = virtuoso_api_call(
                         row['word'], text_splitter, args, key_virtuoso, cache_map_virtuoso, load_map_query_input_output,
+                        id=iiid, iALLURIScontextFromNCBO=iiiALLURIScontextFromNCBO,UseBioportalForLinking=True,questionText=row[args.source_column])
+                    if not result:  #try annotation without bioportal
+                        result, ALLURIScontext, singleContext, globalContext, singleTriples, globalTriples, cache_map_virtuoso, load_map_query_input_output = virtuoso_api_call(
+                            row['word'], text_splitter, args, key_virtuoso, cache_map_virtuoso, load_map_query_input_output,
+                            id=iiid, iALLURIScontextFromNCBO=iiiALLURIScontextFromNCBO, UseBioportalForLinking=False,questionText=row[args.source_column])
+        else:
+            if (row['IsBio'] == 1) or ( (pd.isnull(row["IsBio"]) or row["IsBio"] == '' or row['IsBio'] == 0 or row["IsBio"] is None) and (row['entity_group'] == "MISC") ):
+                if strtobool(args.debug):
+                    print(f"\n----- InRagMode ...COMPUTING ... {row['word']} IN THE TEXT:")
+                    print(row[args.source_column])
+                    # Check if '@id' column exists in df_Extract
+                    iiid = None
+                    # Check if the '@id' exists in the Series
+                    if '@id' in row:
+                        # Check if the value is not None or NaN
+                        if row['@id'] is not None and not pd.isna(row['@id']):
+                            # Assign the value to the variable iiid
+                            iiid = row['@id']
+                    iiiALLURIScontextFromNCBO = None
+                    if 'ALLURIScontextFromNCBO' in row:
+                        if row['ALLURIScontextFromNCBO'] is not None and isinstance(row['ALLURIScontextFromNCBO'], list):
+                            iiiALLURIScontextFromNCBO = row['ALLURIScontextFromNCBO']
+                            iiiALLURIScontextFromNCBO = list(set(iiiALLURIScontextFromNCBO))
+                result, ALLURIScontext, singleContext, globalContext, singleTriples, globalTriples, cache_map_virtuoso, load_map_query_input_output = virtuoso_api_call(
+                    row['word'], text_splitter, args, key_virtuoso, cache_map_virtuoso, load_map_query_input_output, id=iiid, iALLURIScontextFromNCBO=iiiALLURIScontextFromNCBO,UseBioportalForLinking=True,questionText=row[args.source_column])
+        return result, ALLURIScontext, singleContext, globalContext, singleTriples, globalTriples, cache_map_geonames, cache_map_virtuoso, load_map_query_input_output, row.name
+    except Exception as e:
+        #print(f"Error occurred: {e}")
+        return result, ALLURIScontext, singleContext, globalContext, singleTriples, globalTriples, cache_map_geonames, cache_map_virtuoso, load_map_query_input_output, row.name
 def parallel_process_Row4Linking(df, text_splitter, args, key_geonames, cache_map_geonames, key_virtuoso, cache_map_virtuoso, load_map_query_input_output):
     else:
         # single processing
         result = df_annotated_combined.apply(lambda row: process_row4Linking(row, text_splitter, args, key_geonames, cache_map_geonames, key_virtuoso, cache_map_virtuoso, load_map_query_input_output), axis=1)
         #
+        try:
+            df_annotated_combined['namedEntity'] = result.str[0]
+            df_annotated_combined['ALLURIScontext'] = result.str[1]
+            df_annotated_combined['Context'] = result.str[2]
+            df_annotated_combined['ContextGlobal'] = result.str[3]
+            df_annotated_combined['Triples'] = result.str[4]
+            df_annotated_combined['TriplesGlobal'] = result.str[5]
+            cache_map_geonames_AFTER = result.str[6].iloc[-1]
+            cache_map_virtuoso_AFTER = result.str[7].iloc[-1]
+            load_map_query_input_output_AFTER = result.str[8].iloc[-1]  #
+        except Exception as e:
+            # print(f"Error occurred: {e}")
+            df_annotated_combined['namedEntity'] = ""
+            df_annotated_combined['ALLURIScontext'] = ""
+            df_annotated_combined['Context'] = ""
+            df_annotated_combined['ContextGlobal'] = ""
+            df_annotated_combined['Triples'] = ""
+            df_annotated_combined['TriplesGlobal'] = ""
+            cache_map_geonames_AFTER = cache_map_geonames
+            cache_map_virtuoso_AFTER = cache_map_virtuoso
+            load_map_query_input_output_AFTER = load_map_query_input_output
     if args.num_cores_eLinking>1:
             # ##### this is to pass the links:
             # Create a new column for the entities with links
+            # #df_annotated_combined['entity_with_link'] = df_annotated_combined.apply(lambda row: f"<a href='{row['namedEntity']}' target='_blank'>{row['word']}</a>", axis=1)
             # df_annotated_combined['entity_with_link'] = df_annotated_combined.apply(
+            #      lambda row: f"<a href='{row['namedEntity']}' target='_blank'>{row['word']}</a>" if pd.notnull(row['namedEntity']) else row[
             #          'word'], axis=1)
             #include the expl-rel prefix:
             #df_annotated_combined['entity_with_link'] = df_annotated_combined.apply(
+            #    lambda row: f"<a href='https://expl-rels-dev-vast.apps.ocpt.jrc.ec.europa.eu/?concept={row['namedEntity']}' target='_blank'>{row['word']}</a>" if pd.notnull(row['namedEntity']) else row[
             #        'word'], axis=1)
+            # df_annotated_combined['entity_with_link'] = df_annotated_combined.apply(
+            #     lambda
+            #         row: f"<a href='https://api-vast.jrc.service.ec.europa.eu/describe//?url={row['namedEntity']}' target='_blank'>{row['word']}</a>" if pd.notnull(
+            #         row['namedEntity']) else row[
+            #         'word'], axis=1)
             df_annotated_combined['entity_with_link'] = df_annotated_combined.apply(
+                lambda row: (
+                    f"<a href='https://expl-rels-dev-vast.apps.ocpt.jrc.ec.europa.eu/?concept={row['namedEntity']}' target='_blank'>{row['word']}</a>"
+                    if row['namedEntity'] not in [None, '', 'NaN', 'nan'] and pd.notnull(row['namedEntity']) else row[
+                        'word']
+                ),
+                axis=1
+            )
             # Create a new dictionary with the entity information and the link
             dict_annotated_combined_NEL = df_annotated_combined[