Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1372,49 +1372,129 @@ def getLinearTextualContextFromTriples(word,labelTriplesLIST, text_splitter, arg
|
|
1372 |
if (strtobool(args.UseRetrieverForContextCreation)==True):
|
1373 |
labelTriples = ""
|
1374 |
passages = []
|
1375 |
-
nn=200
|
1376 |
-
|
1377 |
-
|
1378 |
-
|
1379 |
-
|
1380 |
-
|
1381 |
-
|
1382 |
-
|
1383 |
-
|
1384 |
-
#df_retrieved_Base = RAG_retrieval_Base(questionText, passages, min_threshold=0.7, max_num_passages=50)
|
1385 |
-
#df_retrievedZscore = RAG_retrieval_Z_scores(questionText, passages, z_threshold=1.0, max_num_passages=50, min_threshold=0.65)
|
1386 |
-
#df_retrievedPercentile = RAG_retrieval_Percentile(questionText, passages, percentile=90, max_num_passages=50, min_threshold=0.65)
|
1387 |
-
df_retrievedtopk = RAG_retrieval_TopK(questionText, passages, top_fraction=0.1, max_num_passages=50, min_threshold=0.65)
|
1388 |
-
|
1389 |
-
passages = []
|
1390 |
-
|
1391 |
-
df_retrieved = df_retrievedtopk.copy()
|
1392 |
-
if not df_retrieved.empty:
|
1393 |
-
labelTriplesLIST_RAGGED = df_retrieved.to_records(index=False).tolist()
|
1394 |
-
labelTriplesAPP = ". ".join(" ".join(str(element).capitalize() for element in triple) for triple in labelTriplesLIST_RAGGED)
|
1395 |
-
|
1396 |
-
if not labelTriples:
|
1397 |
-
labelTriples =labelTriplesAPP
|
1398 |
-
else:
|
1399 |
-
labelTriples = labelTriples + ". " + labelTriplesAPP
|
1400 |
|
1401 |
-
|
1402 |
-
|
1403 |
|
1404 |
-
df_retrieved = df_retrievedtopk.copy()
|
1405 |
if not df_retrieved.empty:
|
1406 |
labelTriplesLIST_RAGGED = df_retrieved.to_records(index=False).tolist()
|
1407 |
-
labelTriplesAPP = ". ".join(
|
|
|
|
|
1408 |
if not labelTriples:
|
1409 |
labelTriples = labelTriplesAPP
|
1410 |
else:
|
1411 |
labelTriples = labelTriples + ". " + labelTriplesAPP
|
1412 |
|
1413 |
-
|
1414 |
-
labelTriples.strip().replace("..",".").strip()
|
1415 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1416 |
|
1417 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1418 |
labelTriples = ". ".join(" ".join(str(element).capitalize() for element in triple) for triple in labelTriplesLIST)
|
1419 |
|
1420 |
|
@@ -1483,18 +1563,18 @@ def getLinearTextualContextFromTriples(word,labelTriplesLIST, text_splitter, arg
|
|
1483 |
try:
|
1484 |
|
1485 |
contextText = ""
|
1486 |
-
|
1487 |
-
|
1488 |
-
|
1489 |
-
|
1490 |
-
|
1491 |
-
|
1492 |
-
|
1493 |
-
|
1494 |
-
|
1495 |
-
|
1496 |
-
|
1497 |
-
|
1498 |
|
1499 |
|
1500 |
|
@@ -1519,7 +1599,6 @@ def getLinearTextualContextFromTriples(word,labelTriplesLIST, text_splitter, arg
|
|
1519 |
return contextText, map_query_input_output
|
1520 |
|
1521 |
|
1522 |
-
|
1523 |
#@mem.cache
|
1524 |
def virtuoso_api_call(word, text_splitter, args, key_virtuoso, cache_map_virtuoso, load_map_query_input_output, id=None, iALLURIScontextFromNCBO=None,UseBioportalForLinking=True,questionText=""):
|
1525 |
|
|
|
1372 |
if (strtobool(args.UseRetrieverForContextCreation)==True):
|
1373 |
labelTriples = ""
|
1374 |
passages = []
|
1375 |
+
nn = 200
|
1376 |
+
|
1377 |
+
if len(labelTriplesLIST)<=nn:
|
1378 |
+
passages = []
|
1379 |
+
for i, triple in enumerate(labelTriplesLIST, start=1):
|
1380 |
+
# for triple in labelTriplesLIST:
|
1381 |
+
TriplesString = (" ".join(str(element).capitalize() for element in triple))
|
1382 |
+
passages.append(TriplesString)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1383 |
|
1384 |
+
df_retrieved = RAG_retrieval_TopK(questionText, passages, top_fraction=0.1, max_num_passages=20,
|
1385 |
+
min_threshold=0.7)
|
1386 |
|
|
|
1387 |
if not df_retrieved.empty:
|
1388 |
labelTriplesLIST_RAGGED = df_retrieved.to_records(index=False).tolist()
|
1389 |
+
labelTriplesAPP = ". ".join(
|
1390 |
+
" ".join(str(element).capitalize() for element in triple) for triple in labelTriplesLIST_RAGGED)
|
1391 |
+
|
1392 |
if not labelTriples:
|
1393 |
labelTriples = labelTriplesAPP
|
1394 |
else:
|
1395 |
labelTriples = labelTriples + ". " + labelTriplesAPP
|
1396 |
|
1397 |
+
else:
|
|
|
1398 |
|
1399 |
+
OverallListRAGtriples = labelTriplesLIST.copy()
|
1400 |
+
|
1401 |
+
while len(OverallListRAGtriples)>nn:
|
1402 |
+
Oinnerlistiterative=[]
|
1403 |
+
for i, triple in enumerate(OverallListRAGtriples, start=1):
|
1404 |
+
# for triple in labelTriplesLIST:
|
1405 |
+
TriplesString = (" ".join(str(element).capitalize() for element in triple))
|
1406 |
+
passages.append(TriplesString)
|
1407 |
+
# Check if the current index is a multiple of nn
|
1408 |
+
if i % nn == 0:
|
1409 |
+
# print("elaborate RAG triples")
|
1410 |
+
|
1411 |
+
# df_retrieved_Base = RAG_retrieval_Base(questionText, passages, min_threshold=0.7, max_num_passages=20)
|
1412 |
+
# df_retrievedZscore = RAG_retrieval_Z_scores(questionText, passages, z_threshold=1.0, max_num_passages=20, min_threshold=0.7)
|
1413 |
+
# df_retrievedPercentile = RAG_retrieval_Percentile(questionText, passages, percentile=90, max_num_passages=20, min_threshold=0.7)
|
1414 |
+
df_retrievedtopk = RAG_retrieval_TopK(questionText, passages, top_fraction=0.1, max_num_passages=20,
|
1415 |
+
min_threshold=0.7)
|
1416 |
+
|
1417 |
+
passages = []
|
1418 |
+
|
1419 |
+
df_retrieved = df_retrievedtopk.copy()
|
1420 |
+
if not df_retrieved.empty:
|
1421 |
+
labelTriplesLIST_RAGGED = df_retrieved.to_records(index=False).tolist()
|
1422 |
+
if not Oinnerlistiterative:
|
1423 |
+
Oinnerlistiterative=labelTriplesLIST_RAGGED
|
1424 |
+
else:
|
1425 |
+
Oinnerlistiterative.extend(labelTriplesLIST_RAGGED)
|
1426 |
+
|
1427 |
+
if passages:
|
1428 |
+
df_retrievedtopk = RAG_retrieval_TopK(questionText, passages, top_fraction=0.1, max_num_passages=20,
|
1429 |
+
min_threshold=0.7)
|
1430 |
+
|
1431 |
+
df_retrieved = df_retrievedtopk.copy()
|
1432 |
+
if not df_retrieved.empty:
|
1433 |
+
labelTriplesLIST_RAGGED = df_retrieved.to_records(index=False).tolist()
|
1434 |
+
if not Oinnerlistiterative:
|
1435 |
+
Oinnerlistiterative = labelTriplesLIST_RAGGED
|
1436 |
+
else:
|
1437 |
+
Oinnerlistiterative.extend(labelTriplesLIST_RAGGED)
|
1438 |
+
|
1439 |
+
OverallListRAGtriples = Oinnerlistiterative.copy()
|
1440 |
+
|
1441 |
+
if OverallListRAGtriples:
|
1442 |
+
labelTriplesAPP = ". ".join(" ".join(str(element).capitalize() for element in triple) for triple in OverallListRAGtriples)
|
1443 |
|
1444 |
+
if not labelTriples:
|
1445 |
+
labelTriples = labelTriplesAPP
|
1446 |
+
else:
|
1447 |
+
labelTriples = labelTriples + ". " + labelTriplesAPP
|
1448 |
+
|
1449 |
+
labelTriples = labelTriples.strip().replace("..", ".").strip()
|
1450 |
+
|
1451 |
+
|
1452 |
+
|
1453 |
+
# labelTriples = ""
|
1454 |
+
# passages = []
|
1455 |
+
# nn=200
|
1456 |
+
# for i, triple in enumerate(labelTriplesLIST, start=1):
|
1457 |
+
# #for triple in labelTriplesLIST:
|
1458 |
+
# TriplesString = (" ".join(str(element).capitalize() for element in triple))
|
1459 |
+
# passages.append(TriplesString)
|
1460 |
+
# # Check if the current index is a multiple of nn
|
1461 |
+
# if i % nn == 0:
|
1462 |
+
# #print("elaborate RAG triples")
|
1463 |
+
#
|
1464 |
+
# #df_retrieved_Base = RAG_retrieval_Base(questionText, passages, min_threshold=0.7, max_num_passages=20)
|
1465 |
+
# #df_retrievedZscore = RAG_retrieval_Z_scores(questionText, passages, z_threshold=1.0, max_num_passages=20, min_threshold=0.7)
|
1466 |
+
# #df_retrievedPercentile = RAG_retrieval_Percentile(questionText, passages, percentile=90, max_num_passages=20, min_threshold=0.7)
|
1467 |
+
# df_retrievedtopk = RAG_retrieval_TopK(questionText, passages, top_fraction=0.1, max_num_passages=20, min_threshold=0.7)
|
1468 |
+
#
|
1469 |
+
# passages = []
|
1470 |
+
#
|
1471 |
+
# df_retrieved = df_retrievedtopk.copy()
|
1472 |
+
# if not df_retrieved.empty:
|
1473 |
+
# labelTriplesLIST_RAGGED = df_retrieved.to_records(index=False).tolist()
|
1474 |
+
# labelTriplesAPP = ". ".join(" ".join(str(element).capitalize() for element in triple) for triple in labelTriplesLIST_RAGGED)
|
1475 |
+
#
|
1476 |
+
# if not labelTriples:
|
1477 |
+
# labelTriples =labelTriplesAPP
|
1478 |
+
# else:
|
1479 |
+
# labelTriples = labelTriples + ". " + labelTriplesAPP
|
1480 |
+
#
|
1481 |
+
# if passages:
|
1482 |
+
# df_retrievedtopk = RAG_retrieval_TopK(questionText, passages, top_fraction=0.1, max_num_passages=20, min_threshold=0.7)
|
1483 |
+
#
|
1484 |
+
# df_retrieved = df_retrievedtopk.copy()
|
1485 |
+
# if not df_retrieved.empty:
|
1486 |
+
# labelTriplesLIST_RAGGED = df_retrieved.to_records(index=False).tolist()
|
1487 |
+
# labelTriplesAPP = ". ".join(" ".join(str(element).capitalize() for element in triple) for triple in labelTriplesLIST_RAGGED)
|
1488 |
+
# if not labelTriples:
|
1489 |
+
# labelTriples = labelTriplesAPP
|
1490 |
+
# else:
|
1491 |
+
# labelTriples = labelTriples + ". " + labelTriplesAPP
|
1492 |
+
#
|
1493 |
+
# if labelTriples:
|
1494 |
+
# labelTriples.strip().replace("..",".").strip()
|
1495 |
+
|
1496 |
+
|
1497 |
+
else: # NO RAG on triples
|
1498 |
labelTriples = ". ".join(" ".join(str(element).capitalize() for element in triple) for triple in labelTriplesLIST)
|
1499 |
|
1500 |
|
|
|
1563 |
try:
|
1564 |
|
1565 |
contextText = ""
|
1566 |
+
if args.service_provider == "gptjrc":
|
1567 |
+
contextText = call_model(input_text=labelTriples, prompt=myPromt, model=args.model_name,
|
1568 |
+
temperature=args.temperature, delimiter=myDelimiter,
|
1569 |
+
InContextExamples=[],
|
1570 |
+
handler=api_call_gptjrc,
|
1571 |
+
verbose=True, args=args)
|
1572 |
+
elif args.service_provider == "HFonPremises":
|
1573 |
+
contextText = call_model(input_text=labelTriples, prompt=myPromt, model=args.model_name,
|
1574 |
+
temperature=args.temperature, delimiter=myDelimiter,
|
1575 |
+
InContextExamples=[],
|
1576 |
+
handler=api_call_HFonPremises,
|
1577 |
+
verbose=True, args=args)
|
1578 |
|
1579 |
|
1580 |
|
|
|
1599 |
return contextText, map_query_input_output
|
1600 |
|
1601 |
|
|
|
1602 |
#@mem.cache
|
1603 |
def virtuoso_api_call(word, text_splitter, args, key_virtuoso, cache_map_virtuoso, load_map_query_input_output, id=None, iALLURIScontextFromNCBO=None,UseBioportalForLinking=True,questionText=""):
|
1604 |
|