nsthorat commited on
Commit
dc5a4ba
·
1 Parent(s): 83cc09a
.gitattributes DELETED
@@ -1,3 +0,0 @@
1
- dist/lilac-0.0.20-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
2
- data/.cache/lilac/concept/lilac/profanity/gte-small.pkl filter=lfs diff=lfs merge=lfs -text
3
- data/.cache/lilac/concept/lilac/toxicity/gte-small.pkl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
README.md CHANGED
@@ -5,6 +5,7 @@ colorFrom: purple
5
  colorTo: purple
6
  sdk: docker
7
  app_port: 5432
8
- datasets: []
 
9
 
10
  ---
 
5
  colorTo: purple
6
  sdk: docker
7
  app_port: 5432
8
+ datasets:
9
+ - lilacai/nikhil_staging-local-enron-emails
10
 
11
  ---
data/.cache/lilac/concept/100712716653593140239/aliens/gte-small.pkl DELETED
Binary file (10.8 kB)
 
data/.cache/lilac/concept/100712716653593140239/alienz/gte-small.pkl DELETED
Binary file (21.7 kB)
 
data/.cache/lilac/concept/100712716653593140239/asdf/gte-small.pkl DELETED
Binary file (21.7 kB)
 
data/.cache/lilac/concept/100712716653593140239/private_aliens/gte-small.pkl DELETED
Binary file (21.8 kB)
 
data/.cache/lilac/concept/lilac/legal-termination/gte-small.pkl DELETED
Binary file (60.6 kB)
 
data/.cache/lilac/concept/lilac/negative-sentiment/gte-small.pkl DELETED
Binary file (202 kB)
 
data/.cache/lilac/concept/lilac/non-english/gte-small.pkl DELETED
Binary file (331 kB)
 
data/.cache/lilac/concept/lilac/positive-sentiment/gte-small.pkl DELETED
Binary file (180 kB)
 
data/.cache/lilac/concept/lilac/profanity/gte-small.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed7340614b1dea910ddeb26bbda0167b1f4fe2479071a62a70b63c18bc6232d0
3
- size 1672960
 
 
 
 
data/.cache/lilac/concept/lilac/question/gte-small.pkl DELETED
Binary file (611 kB)
 
data/.cache/lilac/concept/lilac/source-code/gte-small.pkl DELETED
Binary file (147 kB)
 
data/.cache/lilac/concept/lilac/toxicity/gte-small.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2af2736f3d749391a431f9c24d3fc78cf8e58457cc4f0d1ce770185b92d879c
3
- size 1886446
 
 
 
 
data/.cache/lilac/concept/local/aliens/gte-small.pkl DELETED
Binary file (28.5 kB)
 
data/lilac.yml CHANGED
@@ -423,3 +423,542 @@ datasets:
423
  - - choices
424
  - '*'
425
  preferred_embedding: gte-small
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
  - - choices
424
  - '*'
425
  preferred_embedding: gte-small
426
+ - namespace: local
427
+ name: deepset-prompt-inj
428
+ source:
429
+ dataset_name: deepset/prompt-injections
430
+ source_name: huggingface
431
+ embeddings:
432
+ - path: text
433
+ embedding: gte-small
434
+ settings:
435
+ ui:
436
+ media_paths:
437
+ - text
438
+ - namespace: local
439
+ name: jasper-prompt-inj
440
+ source:
441
+ dataset_name: JasperLS/prompt-injections
442
+ source_name: huggingface
443
+ embeddings:
444
+ - path: text
445
+ embedding: gte-small
446
+ settings:
447
+ ui:
448
+ media_paths:
449
+ - text
450
+ - namespace: local
451
+ name: mosaic-chat-v2
452
+ source:
453
+ dataset_name: sam-mosaic/chat-v2
454
+ source_name: huggingface
455
+ embeddings:
456
+ - path: prompt
457
+ embedding: gte-small
458
+ - path: response
459
+ embedding: gte-small
460
+ signals:
461
+ - path: prompt
462
+ signal:
463
+ signal_name: near_dup
464
+ - path: prompt
465
+ signal:
466
+ signal_name: pii
467
+ - path: prompt
468
+ signal:
469
+ signal_name: lang_detection
470
+ - path: prompt
471
+ signal:
472
+ embedding: gte-small
473
+ namespace: lilac
474
+ concept_name: non-english
475
+ signal_name: concept_score
476
+ - path: prompt
477
+ signal:
478
+ embedding: gte-small
479
+ namespace: lilac
480
+ concept_name: toxicity
481
+ signal_name: concept_score
482
+ - path: prompt
483
+ signal:
484
+ embedding: gte-small
485
+ namespace: lilac
486
+ concept_name: source-code
487
+ signal_name: concept_score
488
+ - path: prompt
489
+ signal:
490
+ embedding: gte-small
491
+ namespace: lilac
492
+ concept_name: negative-sentiment
493
+ signal_name: concept_score
494
+ - path: prompt
495
+ signal:
496
+ embedding: gte-small
497
+ namespace: lilac
498
+ concept_name: profanity
499
+ signal_name: concept_score
500
+ - path: prompt
501
+ signal:
502
+ signal_name: text_statistics
503
+ - path: response
504
+ signal:
505
+ signal_name: near_dup
506
+ - path: response
507
+ signal:
508
+ signal_name: pii
509
+ - path: response
510
+ signal:
511
+ signal_name: lang_detection
512
+ - path: response
513
+ signal:
514
+ embedding: gte-small
515
+ namespace: lilac
516
+ concept_name: non-english
517
+ signal_name: concept_score
518
+ - path: response
519
+ signal:
520
+ embedding: gte-small
521
+ namespace: lilac
522
+ concept_name: toxicity
523
+ signal_name: concept_score
524
+ - path: response
525
+ signal:
526
+ embedding: gte-small
527
+ namespace: lilac
528
+ concept_name: source-code
529
+ signal_name: concept_score
530
+ - path: response
531
+ signal:
532
+ embedding: gte-small
533
+ namespace: lilac
534
+ concept_name: negative-sentiment
535
+ signal_name: concept_score
536
+ - path: response
537
+ signal:
538
+ embedding: gte-small
539
+ namespace: lilac
540
+ concept_name: profanity
541
+ signal_name: concept_score
542
+ - path: response
543
+ signal:
544
+ signal_name: text_statistics
545
+ settings:
546
+ ui:
547
+ media_paths:
548
+ - prompt
549
+ - response
550
+ preferred_embedding: gte-small
551
+ - namespace: local
552
+ name: databricks-dolly-15k-curated-en
553
+ source:
554
+ dataset_name: argilla/databricks-dolly-15k-curated-en
555
+ source_name: huggingface
556
+ embeddings:
557
+ - path: original-context
558
+ embedding: gte-small
559
+ - path:
560
+ - new-context
561
+ - value
562
+ - '*'
563
+ embedding: gte-small
564
+ signals:
565
+ - path: original-instruction
566
+ signal:
567
+ signal_name: near_dup
568
+ - path: original-instruction
569
+ signal:
570
+ signal_name: pii
571
+ - path: original-instruction
572
+ signal:
573
+ signal_name: lang_detection
574
+ - path: original-instruction
575
+ signal:
576
+ signal_name: text_statistics
577
+ - path: original-context
578
+ signal:
579
+ signal_name: near_dup
580
+ - path: original-context
581
+ signal:
582
+ signal_name: pii
583
+ - path: original-context
584
+ signal:
585
+ signal_name: lang_detection
586
+ - path: original-context
587
+ signal:
588
+ embedding: gte-small
589
+ namespace: lilac
590
+ concept_name: positive-sentiment
591
+ signal_name: concept_score
592
+ - path: original-context
593
+ signal:
594
+ embedding: gte-small
595
+ namespace: lilac
596
+ concept_name: non-english
597
+ signal_name: concept_score
598
+ - path: original-context
599
+ signal:
600
+ embedding: gte-small
601
+ namespace: lilac
602
+ concept_name: toxicity
603
+ signal_name: concept_score
604
+ - path: original-context
605
+ signal:
606
+ embedding: gte-small
607
+ namespace: lilac
608
+ concept_name: question
609
+ signal_name: concept_score
610
+ - path: original-context
611
+ signal:
612
+ embedding: gte-small
613
+ namespace: lilac
614
+ concept_name: legal-termination
615
+ signal_name: concept_score
616
+ - path: original-context
617
+ signal:
618
+ embedding: gte-small
619
+ namespace: lilac
620
+ concept_name: source-code
621
+ signal_name: concept_score
622
+ - path: original-context
623
+ signal:
624
+ embedding: gte-small
625
+ namespace: lilac
626
+ concept_name: negative-sentiment
627
+ signal_name: concept_score
628
+ - path: original-context
629
+ signal:
630
+ embedding: gte-small
631
+ namespace: lilac
632
+ concept_name: profanity
633
+ signal_name: concept_score
634
+ - path: original-context
635
+ signal:
636
+ signal_name: text_statistics
637
+ - path: original-response
638
+ signal:
639
+ signal_name: near_dup
640
+ - path: original-response
641
+ signal:
642
+ signal_name: pii
643
+ - path: original-response
644
+ signal:
645
+ signal_name: lang_detection
646
+ - path: original-response
647
+ signal:
648
+ signal_name: text_statistics
649
+ - path:
650
+ - new-instruction
651
+ - value
652
+ - '*'
653
+ signal:
654
+ signal_name: near_dup
655
+ - path:
656
+ - new-instruction
657
+ - value
658
+ - '*'
659
+ signal:
660
+ signal_name: pii
661
+ - path:
662
+ - new-instruction
663
+ - value
664
+ - '*'
665
+ signal:
666
+ signal_name: lang_detection
667
+ - path:
668
+ - new-instruction
669
+ - value
670
+ - '*'
671
+ signal:
672
+ signal_name: text_statistics
673
+ - path:
674
+ - new-context
675
+ - value
676
+ - '*'
677
+ signal:
678
+ signal_name: near_dup
679
+ - path:
680
+ - new-context
681
+ - value
682
+ - '*'
683
+ signal:
684
+ signal_name: pii
685
+ - path:
686
+ - new-context
687
+ - value
688
+ - '*'
689
+ signal:
690
+ signal_name: lang_detection
691
+ - path:
692
+ - new-context
693
+ - value
694
+ - '*'
695
+ signal:
696
+ embedding: gte-small
697
+ namespace: lilac
698
+ concept_name: positive-sentiment
699
+ signal_name: concept_score
700
+ - path:
701
+ - new-context
702
+ - value
703
+ - '*'
704
+ signal:
705
+ embedding: gte-small
706
+ namespace: lilac
707
+ concept_name: non-english
708
+ signal_name: concept_score
709
+ - path:
710
+ - new-context
711
+ - value
712
+ - '*'
713
+ signal:
714
+ embedding: gte-small
715
+ namespace: lilac
716
+ concept_name: toxicity
717
+ signal_name: concept_score
718
+ - path:
719
+ - new-context
720
+ - value
721
+ - '*'
722
+ signal:
723
+ embedding: gte-small
724
+ namespace: lilac
725
+ concept_name: question
726
+ signal_name: concept_score
727
+ - path:
728
+ - new-context
729
+ - value
730
+ - '*'
731
+ signal:
732
+ embedding: gte-small
733
+ namespace: lilac
734
+ concept_name: legal-termination
735
+ signal_name: concept_score
736
+ - path:
737
+ - new-context
738
+ - value
739
+ - '*'
740
+ signal:
741
+ embedding: gte-small
742
+ namespace: lilac
743
+ concept_name: source-code
744
+ signal_name: concept_score
745
+ - path:
746
+ - new-context
747
+ - value
748
+ - '*'
749
+ signal:
750
+ embedding: gte-small
751
+ namespace: lilac
752
+ concept_name: negative-sentiment
753
+ signal_name: concept_score
754
+ - path:
755
+ - new-context
756
+ - value
757
+ - '*'
758
+ signal:
759
+ embedding: gte-small
760
+ namespace: lilac
761
+ concept_name: profanity
762
+ signal_name: concept_score
763
+ - path:
764
+ - new-context
765
+ - value
766
+ - '*'
767
+ signal:
768
+ signal_name: text_statistics
769
+ - path:
770
+ - new-response
771
+ - value
772
+ - '*'
773
+ signal:
774
+ signal_name: near_dup
775
+ - path:
776
+ - new-response
777
+ - value
778
+ - '*'
779
+ signal:
780
+ signal_name: pii
781
+ - path:
782
+ - new-response
783
+ - value
784
+ - '*'
785
+ signal:
786
+ signal_name: lang_detection
787
+ - path:
788
+ - new-response
789
+ - value
790
+ - '*'
791
+ signal:
792
+ signal_name: text_statistics
793
+ settings:
794
+ ui:
795
+ media_paths:
796
+ - original-instruction
797
+ - original-context
798
+ - original-response
799
+ - - new-instruction
800
+ - value
801
+ - '*'
802
+ - - new-context
803
+ - value
804
+ - '*'
805
+ - - new-response
806
+ - value
807
+ - '*'
808
+ preferred_embedding: gte-small
809
+ - namespace: local
810
+ name: open-asssistant-conversations
811
+ source:
812
+ dataset_name: OpenAssistant/oasst1
813
+ source_name: huggingface
814
+ embeddings:
815
+ - path: text
816
+ embedding: gte-small
817
+ signals:
818
+ - path: text
819
+ signal:
820
+ signal_name: near_dup
821
+ - path: text
822
+ signal:
823
+ signal_name: pii
824
+ - path: text
825
+ signal:
826
+ signal_name: lang_detection
827
+ - path: text
828
+ signal:
829
+ embedding: gte-small
830
+ namespace: lilac
831
+ concept_name: positive-sentiment
832
+ signal_name: concept_score
833
+ - path: text
834
+ signal:
835
+ embedding: gte-small
836
+ namespace: lilac
837
+ concept_name: non-english
838
+ signal_name: concept_score
839
+ - path: text
840
+ signal:
841
+ embedding: gte-small
842
+ namespace: lilac
843
+ concept_name: toxicity
844
+ signal_name: concept_score
845
+ - path: text
846
+ signal:
847
+ embedding: gte-small
848
+ namespace: lilac
849
+ concept_name: question
850
+ signal_name: concept_score
851
+ - path: text
852
+ signal:
853
+ embedding: gte-small
854
+ namespace: lilac
855
+ concept_name: legal-termination
856
+ signal_name: concept_score
857
+ - path: text
858
+ signal:
859
+ embedding: gte-small
860
+ namespace: lilac
861
+ concept_name: source-code
862
+ signal_name: concept_score
863
+ - path: text
864
+ signal:
865
+ embedding: gte-small
866
+ namespace: lilac
867
+ concept_name: negative-sentiment
868
+ signal_name: concept_score
869
+ - path: text
870
+ signal:
871
+ embedding: gte-small
872
+ namespace: lilac
873
+ concept_name: negative-sentiment
874
+ signal_name: concept_score
875
+ - path: text
876
+ signal:
877
+ embedding: gte-small
878
+ namespace: lilac
879
+ concept_name: profanity
880
+ signal_name: concept_score
881
+ - path: text
882
+ signal:
883
+ signal_name: text_statistics
884
+ settings:
885
+ ui:
886
+ media_paths:
887
+ - text
888
+ preferred_embedding: gte-small
889
+ - namespace: local
890
+ name: enron-emails
891
+ source:
892
+ dataset_name: EleutherAI/pile
893
+ config_name: enron_emails
894
+ sample_size: 100000
895
+ source_name: huggingface
896
+ embeddings:
897
+ - path: text
898
+ embedding: gte-small
899
+ signals:
900
+ - path: text
901
+ signal:
902
+ signal_name: near_dup
903
+ - path: text
904
+ signal:
905
+ signal_name: pii
906
+ - path: text
907
+ signal:
908
+ signal_name: lang_detection
909
+ - path: text
910
+ signal:
911
+ embedding: gte-small
912
+ namespace: lilac
913
+ concept_name: positive-sentiment
914
+ signal_name: concept_score
915
+ - path: text
916
+ signal:
917
+ embedding: gte-small
918
+ namespace: lilac
919
+ concept_name: non-english
920
+ signal_name: concept_score
921
+ - path: text
922
+ signal:
923
+ embedding: gte-small
924
+ namespace: lilac
925
+ concept_name: toxicity
926
+ signal_name: concept_score
927
+ - path: text
928
+ signal:
929
+ embedding: gte-small
930
+ namespace: lilac
931
+ concept_name: question
932
+ signal_name: concept_score
933
+ - path: text
934
+ signal:
935
+ embedding: gte-small
936
+ namespace: lilac
937
+ concept_name: legal-termination
938
+ signal_name: concept_score
939
+ - path: text
940
+ signal:
941
+ embedding: gte-small
942
+ namespace: lilac
943
+ concept_name: source-code
944
+ signal_name: concept_score
945
+ - path: text
946
+ signal:
947
+ embedding: gte-small
948
+ namespace: lilac
949
+ concept_name: negative-sentiment
950
+ signal_name: concept_score
951
+ - path: text
952
+ signal:
953
+ embedding: gte-small
954
+ namespace: lilac
955
+ concept_name: profanity
956
+ signal_name: concept_score
957
+ - path: text
958
+ signal:
959
+ signal_name: text_statistics
960
+ settings:
961
+ ui:
962
+ media_paths:
963
+ - text
964
+ preferred_embedding: gte-small
dist/README.md DELETED
@@ -1,2 +0,0 @@
1
- This directory is used for locally built whl files.
2
- We write a README.md to ensure an empty folder is uploaded when there is no whl.
 
 
 
dist/lilac-0.0.20-py3-none-any.whl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:62911c0784fde670f2e8937545fa3e031e633b55e93a2361a9c7f603fe40a5e0
3
- size 1146198