jattokatarratto commited on
Commit
444f117
·
verified ·
1 Parent(s): e18819a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -45
app.py CHANGED
@@ -1372,49 +1372,129 @@ def getLinearTextualContextFromTriples(word,labelTriplesLIST, text_splitter, arg
1372
  if (strtobool(args.UseRetrieverForContextCreation)==True):
1373
  labelTriples = ""
1374
  passages = []
1375
- nn=200
1376
- for i, triple in enumerate(labelTriplesLIST, start=1):
1377
- #for triple in labelTriplesLIST:
1378
- TriplesString = (" ".join(str(element).capitalize() for element in triple))
1379
- passages.append(TriplesString)
1380
- # Check if the current index is a multiple of nn
1381
- if i % nn == 0:
1382
- #print("elaborate RAG triples")
1383
-
1384
- #df_retrieved_Base = RAG_retrieval_Base(questionText, passages, min_threshold=0.7, max_num_passages=50)
1385
- #df_retrievedZscore = RAG_retrieval_Z_scores(questionText, passages, z_threshold=1.0, max_num_passages=50, min_threshold=0.65)
1386
- #df_retrievedPercentile = RAG_retrieval_Percentile(questionText, passages, percentile=90, max_num_passages=50, min_threshold=0.65)
1387
- df_retrievedtopk = RAG_retrieval_TopK(questionText, passages, top_fraction=0.1, max_num_passages=50, min_threshold=0.65)
1388
-
1389
- passages = []
1390
-
1391
- df_retrieved = df_retrievedtopk.copy()
1392
- if not df_retrieved.empty:
1393
- labelTriplesLIST_RAGGED = df_retrieved.to_records(index=False).tolist()
1394
- labelTriplesAPP = ". ".join(" ".join(str(element).capitalize() for element in triple) for triple in labelTriplesLIST_RAGGED)
1395
-
1396
- if not labelTriples:
1397
- labelTriples =labelTriplesAPP
1398
- else:
1399
- labelTriples = labelTriples + ". " + labelTriplesAPP
1400
 
1401
- if passages:
1402
- df_retrievedtopk = RAG_retrieval_TopK(questionText, passages, top_fraction=0.1, max_num_passages=50, min_threshold=0.65)
1403
 
1404
- df_retrieved = df_retrievedtopk.copy()
1405
  if not df_retrieved.empty:
1406
  labelTriplesLIST_RAGGED = df_retrieved.to_records(index=False).tolist()
1407
- labelTriplesAPP = ". ".join(" ".join(str(element).capitalize() for element in triple) for triple in labelTriplesLIST_RAGGED)
 
 
1408
  if not labelTriples:
1409
  labelTriples = labelTriplesAPP
1410
  else:
1411
  labelTriples = labelTriples + ". " + labelTriplesAPP
1412
 
1413
- if labelTriples:
1414
- labelTriples.strip().replace("..",".").strip()
1415
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1416
 
1417
- else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1418
  labelTriples = ". ".join(" ".join(str(element).capitalize() for element in triple) for triple in labelTriplesLIST)
1419
 
1420
 
@@ -1483,18 +1563,18 @@ def getLinearTextualContextFromTriples(word,labelTriplesLIST, text_splitter, arg
1483
  try:
1484
 
1485
  contextText = ""
1486
- # if args.service_provider == "gptjrc":
1487
- # contextText = call_model(input_text=labelTriples, prompt=myPromt, model=args.model_name,
1488
- # temperature=args.temperature, delimiter=myDelimiter,
1489
- # InContextExamples=[],
1490
- # handler=api_call_gptjrc,
1491
- # verbose=True, args=args)
1492
- # elif args.service_provider == "HFonPremises":
1493
- # contextText = call_model(input_text=labelTriples, prompt=myPromt, model=args.model_name,
1494
- # temperature=args.temperature, delimiter=myDelimiter,
1495
- # InContextExamples=[],
1496
- # handler=api_call_HFonPremises,
1497
- # verbose=True, args=args)
1498
 
1499
 
1500
 
@@ -1519,7 +1599,6 @@ def getLinearTextualContextFromTriples(word,labelTriplesLIST, text_splitter, arg
1519
  return contextText, map_query_input_output
1520
 
1521
 
1522
-
1523
  #@mem.cache
1524
  def virtuoso_api_call(word, text_splitter, args, key_virtuoso, cache_map_virtuoso, load_map_query_input_output, id=None, iALLURIScontextFromNCBO=None,UseBioportalForLinking=True,questionText=""):
1525
 
 
1372
  if (strtobool(args.UseRetrieverForContextCreation)==True):
1373
  labelTriples = ""
1374
  passages = []
1375
+ nn = 200
1376
+
1377
+ if len(labelTriplesLIST)<=nn:
1378
+ passages = []
1379
+ for i, triple in enumerate(labelTriplesLIST, start=1):
1380
+ # for triple in labelTriplesLIST:
1381
+ TriplesString = (" ".join(str(element).capitalize() for element in triple))
1382
+ passages.append(TriplesString)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1383
 
1384
+ df_retrieved = RAG_retrieval_TopK(questionText, passages, top_fraction=0.1, max_num_passages=20,
1385
+ min_threshold=0.7)
1386
 
 
1387
  if not df_retrieved.empty:
1388
  labelTriplesLIST_RAGGED = df_retrieved.to_records(index=False).tolist()
1389
+ labelTriplesAPP = ". ".join(
1390
+ " ".join(str(element).capitalize() for element in triple) for triple in labelTriplesLIST_RAGGED)
1391
+
1392
  if not labelTriples:
1393
  labelTriples = labelTriplesAPP
1394
  else:
1395
  labelTriples = labelTriples + ". " + labelTriplesAPP
1396
 
1397
+ else:
 
1398
 
1399
+ OverallListRAGtriples = labelTriplesLIST.copy()
1400
+
1401
+ while len(OverallListRAGtriples)>nn:
1402
+ Oinnerlistiterative=[]
1403
+ for i, triple in enumerate(OverallListRAGtriples, start=1):
1404
+ # for triple in labelTriplesLIST:
1405
+ TriplesString = (" ".join(str(element).capitalize() for element in triple))
1406
+ passages.append(TriplesString)
1407
+ # Check if the current index is a multiple of nn
1408
+ if i % nn == 0:
1409
+ # print("elaborate RAG triples")
1410
+
1411
+ # df_retrieved_Base = RAG_retrieval_Base(questionText, passages, min_threshold=0.7, max_num_passages=20)
1412
+ # df_retrievedZscore = RAG_retrieval_Z_scores(questionText, passages, z_threshold=1.0, max_num_passages=20, min_threshold=0.7)
1413
+ # df_retrievedPercentile = RAG_retrieval_Percentile(questionText, passages, percentile=90, max_num_passages=20, min_threshold=0.7)
1414
+ df_retrievedtopk = RAG_retrieval_TopK(questionText, passages, top_fraction=0.1, max_num_passages=20,
1415
+ min_threshold=0.7)
1416
+
1417
+ passages = []
1418
+
1419
+ df_retrieved = df_retrievedtopk.copy()
1420
+ if not df_retrieved.empty:
1421
+ labelTriplesLIST_RAGGED = df_retrieved.to_records(index=False).tolist()
1422
+ if not Oinnerlistiterative:
1423
+ Oinnerlistiterative=labelTriplesLIST_RAGGED
1424
+ else:
1425
+ Oinnerlistiterative.extend(labelTriplesLIST_RAGGED)
1426
+
1427
+ if passages:
1428
+ df_retrievedtopk = RAG_retrieval_TopK(questionText, passages, top_fraction=0.1, max_num_passages=20,
1429
+ min_threshold=0.7)
1430
+
1431
+ df_retrieved = df_retrievedtopk.copy()
1432
+ if not df_retrieved.empty:
1433
+ labelTriplesLIST_RAGGED = df_retrieved.to_records(index=False).tolist()
1434
+ if not Oinnerlistiterative:
1435
+ Oinnerlistiterative = labelTriplesLIST_RAGGED
1436
+ else:
1437
+ Oinnerlistiterative.extend(labelTriplesLIST_RAGGED)
1438
+
1439
+ OverallListRAGtriples = Oinnerlistiterative.copy()
1440
+
1441
+ if OverallListRAGtriples:
1442
+ labelTriplesAPP = ". ".join(" ".join(str(element).capitalize() for element in triple) for triple in OverallListRAGtriples)
1443
 
1444
+ if not labelTriples:
1445
+ labelTriples = labelTriplesAPP
1446
+ else:
1447
+ labelTriples = labelTriples + ". " + labelTriplesAPP
1448
+
1449
+ labelTriples = labelTriples.strip().replace("..", ".").strip()
1450
+
1451
+
1452
+
1453
+ # labelTriples = ""
1454
+ # passages = []
1455
+ # nn=200
1456
+ # for i, triple in enumerate(labelTriplesLIST, start=1):
1457
+ # #for triple in labelTriplesLIST:
1458
+ # TriplesString = (" ".join(str(element).capitalize() for element in triple))
1459
+ # passages.append(TriplesString)
1460
+ # # Check if the current index is a multiple of nn
1461
+ # if i % nn == 0:
1462
+ # #print("elaborate RAG triples")
1463
+ #
1464
+ # #df_retrieved_Base = RAG_retrieval_Base(questionText, passages, min_threshold=0.7, max_num_passages=20)
1465
+ # #df_retrievedZscore = RAG_retrieval_Z_scores(questionText, passages, z_threshold=1.0, max_num_passages=20, min_threshold=0.7)
1466
+ # #df_retrievedPercentile = RAG_retrieval_Percentile(questionText, passages, percentile=90, max_num_passages=20, min_threshold=0.7)
1467
+ # df_retrievedtopk = RAG_retrieval_TopK(questionText, passages, top_fraction=0.1, max_num_passages=20, min_threshold=0.7)
1468
+ #
1469
+ # passages = []
1470
+ #
1471
+ # df_retrieved = df_retrievedtopk.copy()
1472
+ # if not df_retrieved.empty:
1473
+ # labelTriplesLIST_RAGGED = df_retrieved.to_records(index=False).tolist()
1474
+ # labelTriplesAPP = ". ".join(" ".join(str(element).capitalize() for element in triple) for triple in labelTriplesLIST_RAGGED)
1475
+ #
1476
+ # if not labelTriples:
1477
+ # labelTriples =labelTriplesAPP
1478
+ # else:
1479
+ # labelTriples = labelTriples + ". " + labelTriplesAPP
1480
+ #
1481
+ # if passages:
1482
+ # df_retrievedtopk = RAG_retrieval_TopK(questionText, passages, top_fraction=0.1, max_num_passages=20, min_threshold=0.7)
1483
+ #
1484
+ # df_retrieved = df_retrievedtopk.copy()
1485
+ # if not df_retrieved.empty:
1486
+ # labelTriplesLIST_RAGGED = df_retrieved.to_records(index=False).tolist()
1487
+ # labelTriplesAPP = ". ".join(" ".join(str(element).capitalize() for element in triple) for triple in labelTriplesLIST_RAGGED)
1488
+ # if not labelTriples:
1489
+ # labelTriples = labelTriplesAPP
1490
+ # else:
1491
+ # labelTriples = labelTriples + ". " + labelTriplesAPP
1492
+ #
1493
+ # if labelTriples:
1494
+ # labelTriples.strip().replace("..",".").strip()
1495
+
1496
+
1497
+ else: # NO RAG on triples
1498
  labelTriples = ". ".join(" ".join(str(element).capitalize() for element in triple) for triple in labelTriplesLIST)
1499
 
1500
 
 
1563
  try:
1564
 
1565
  contextText = ""
1566
+ if args.service_provider == "gptjrc":
1567
+ contextText = call_model(input_text=labelTriples, prompt=myPromt, model=args.model_name,
1568
+ temperature=args.temperature, delimiter=myDelimiter,
1569
+ InContextExamples=[],
1570
+ handler=api_call_gptjrc,
1571
+ verbose=True, args=args)
1572
+ elif args.service_provider == "HFonPremises":
1573
+ contextText = call_model(input_text=labelTriples, prompt=myPromt, model=args.model_name,
1574
+ temperature=args.temperature, delimiter=myDelimiter,
1575
+ InContextExamples=[],
1576
+ handler=api_call_HFonPremises,
1577
+ verbose=True, args=args)
1578
 
1579
 
1580
 
 
1599
  return contextText, map_query_input_output
1600
 
1601
 
 
1602
  #@mem.cache
1603
  def virtuoso_api_call(word, text_splitter, args, key_virtuoso, cache_map_virtuoso, load_map_query_input_output, id=None, iALLURIScontextFromNCBO=None,UseBioportalForLinking=True,questionText=""):
1604