nsthorat commited on
Commit
3bd5462
·
1 Parent(s): 26c379b
.gitattributes DELETED
@@ -1,3 +0,0 @@
1
- dist/lilac-0.0.17-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
2
- data/.cache/lilac/concept/lilac/profanity/gte-small.pkl filter=lfs diff=lfs merge=lfs -text
3
- data/.cache/lilac/concept/lilac/toxicity/gte-small.pkl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
data/.cache/lilac/concept/100712716653593140239/aliens/gte-small.pkl DELETED
Binary file (23.7 kB)
 
data/.cache/lilac/concept/100712716653593140239/alienz/gte-small.pkl DELETED
Binary file (21.7 kB)
 
data/.cache/lilac/concept/100712716653593140239/asdf/gte-small.pkl DELETED
Binary file (21.7 kB)
 
data/.cache/lilac/concept/100712716653593140239/private_aliens/gte-small.pkl DELETED
Binary file (21.8 kB)
 
data/.cache/lilac/concept/lilac/legal-termination/gte-small.pkl DELETED
Binary file (60.6 kB)
 
data/.cache/lilac/concept/lilac/negative-sentiment/gte-small.pkl DELETED
Binary file (202 kB)
 
data/.cache/lilac/concept/lilac/non-english/gte-small.pkl DELETED
Binary file (331 kB)
 
data/.cache/lilac/concept/lilac/positive-sentiment/gte-small.pkl DELETED
Binary file (180 kB)
 
data/.cache/lilac/concept/lilac/profanity/gte-small.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed7340614b1dea910ddeb26bbda0167b1f4fe2479071a62a70b63c18bc6232d0
3
- size 1672960
 
 
 
 
data/.cache/lilac/concept/lilac/question/gte-small.pkl DELETED
Binary file (611 kB)
 
data/.cache/lilac/concept/lilac/source-code/gte-small.pkl DELETED
Binary file (126 kB)
 
data/.cache/lilac/concept/lilac/toxicity/gte-small.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2af2736f3d749391a431f9c24d3fc78cf8e58457cc4f0d1ce770185b92d879c
3
- size 1886446
 
 
 
 
data/.cache/lilac/concept/local/aliens/gte-small.pkl DELETED
Binary file (28.5 kB)
 
data/lilac.yml ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Lilac project config.
2
+ # See https://lilacml.com/api_reference/index.html#lilac.Config for details.
3
+
4
+ datasets:
5
+ - namespace: local
6
+ name: glue
7
+ source:
8
+ dataset_name: glue
9
+ config_name: ax
10
+ source_name: huggingface
11
+ embeddings:
12
+ - path: premise
13
+ embedding: gte-small
14
+ signals:
15
+ - path: premise
16
+ signal:
17
+ signal_name: pii
18
+ - path: hypothesis
19
+ signal:
20
+ signal_name: pii
21
+ settings:
22
+ ui:
23
+ media_paths:
24
+ - premise
25
+ - namespace: local
26
+ name: glue_ax
27
+ source:
28
+ dataset_name: glue
29
+ config_name: ax
30
+ source_name: huggingface
31
+ embeddings:
32
+ - path: hypothesis
33
+ embedding: gte-small
34
+ settings:
35
+ ui:
36
+ media_paths:
37
+ - hypothesis
38
+ - namespace: local
39
+ name: imdb3
40
+ source:
41
+ dataset_name: imdb
42
+ source_name: huggingface
43
+ settings:
44
+ ui:
45
+ media_paths:
46
+ - text
47
+ - namespace: local
48
+ name: imdb
49
+ source:
50
+ dataset_name: imdb
51
+ source_name: huggingface
52
+ embeddings:
53
+ - path: text
54
+ embedding: gte-small
55
+ settings:
56
+ ui:
57
+ media_paths:
58
+ - text
59
+ - namespace: local
60
+ name: imdb2
61
+ source:
62
+ dataset_name: imdb
63
+ source_name: huggingface
64
+ settings:
65
+ ui:
66
+ media_paths:
67
+ - text
68
+ - namespace: lilac
69
+ name: OpenOrca-100k
70
+ source:
71
+ dataset_name: Open-Orca/OpenOrca
72
+ sample_size: 100000
73
+ source_name: huggingface
74
+ embeddings:
75
+ - path: question
76
+ embedding: gte-small
77
+ - path: response
78
+ embedding: gte-small
79
+ signals:
80
+ - path: question
81
+ signal:
82
+ signal_name: near_dup
83
+ - path: question
84
+ signal:
85
+ signal_name: pii
86
+ - path: question
87
+ signal:
88
+ signal_name: lang_detection
89
+ - path: question
90
+ signal:
91
+ embedding: gte-small
92
+ namespace: lilac
93
+ concept_name: positive-sentiment
94
+ signal_name: concept_score
95
+ - path: question
96
+ signal:
97
+ embedding: gte-small
98
+ namespace: lilac
99
+ concept_name: non-english
100
+ signal_name: concept_score
101
+ - path: question
102
+ signal:
103
+ embedding: gte-small
104
+ namespace: lilac
105
+ concept_name: toxicity
106
+ signal_name: concept_score
107
+ - path: question
108
+ signal:
109
+ embedding: gte-small
110
+ namespace: lilac
111
+ concept_name: question
112
+ signal_name: concept_score
113
+ - path: question
114
+ signal:
115
+ embedding: gte-small
116
+ namespace: lilac
117
+ concept_name: legal-termination
118
+ signal_name: concept_score
119
+ - path: question
120
+ signal:
121
+ embedding: gte-small
122
+ namespace: lilac
123
+ concept_name: source-code
124
+ signal_name: concept_score
125
+ - path: question
126
+ signal:
127
+ embedding: gte-small
128
+ namespace: lilac
129
+ concept_name: negative-sentiment
130
+ signal_name: concept_score
131
+ - path: question
132
+ signal:
133
+ embedding: gte-small
134
+ namespace: lilac
135
+ concept_name: profanity
136
+ signal_name: concept_score
137
+ - path: question
138
+ signal:
139
+ signal_name: text_statistics
140
+ - path: response
141
+ signal:
142
+ signal_name: near_dup
143
+ - path: response
144
+ signal:
145
+ signal_name: pii
146
+ - path: response
147
+ signal:
148
+ signal_name: lang_detection
149
+ - path: response
150
+ signal:
151
+ embedding: gte-small
152
+ namespace: lilac
153
+ concept_name: positive-sentiment
154
+ signal_name: concept_score
155
+ - path: response
156
+ signal:
157
+ embedding: gte-small
158
+ namespace: lilac
159
+ concept_name: non-english
160
+ signal_name: concept_score
161
+ - path: response
162
+ signal:
163
+ embedding: gte-small
164
+ namespace: lilac
165
+ concept_name: toxicity
166
+ signal_name: concept_score
167
+ - path: response
168
+ signal:
169
+ embedding: gte-small
170
+ namespace: lilac
171
+ concept_name: question
172
+ signal_name: concept_score
173
+ - path: response
174
+ signal:
175
+ embedding: gte-small
176
+ namespace: lilac
177
+ concept_name: legal-termination
178
+ signal_name: concept_score
179
+ - path: response
180
+ signal:
181
+ embedding: gte-small
182
+ namespace: lilac
183
+ concept_name: source-code
184
+ signal_name: concept_score
185
+ - path: response
186
+ signal:
187
+ embedding: gte-small
188
+ namespace: lilac
189
+ concept_name: negative-sentiment
190
+ signal_name: concept_score
191
+ - path: response
192
+ signal:
193
+ embedding: gte-small
194
+ namespace: lilac
195
+ concept_name: profanity
196
+ signal_name: concept_score
197
+ - path: response
198
+ signal:
199
+ signal_name: text_statistics
200
+ settings:
201
+ ui:
202
+ media_paths:
203
+ - question
204
+ - response
205
+ preferred_embedding: gte-small
206
+ - namespace: local
207
+ name: the_movies_dataset
208
+ source:
209
+ filepaths:
210
+ - gs://lilac-data/datasets/the_movies_dataset/the_movies_dataset.csv
211
+ names: []
212
+ source_name: csv
213
+ settings:
214
+ ui:
215
+ media_paths:
216
+ - overview
217
+ - namespace: local
218
+ name: glue_ax_parquet
219
+ source:
220
+ filepaths:
221
+ - gs://lilac-data/datasets/glue_ax_parquet/glue_ax.parquet
222
+ source_name: parquet
223
+ settings:
224
+ ui:
225
+ media_paths:
226
+ - premise
dist/README.md DELETED
@@ -1,2 +0,0 @@
1
- This directory is used for locally built whl files.
2
- We write a README.md to ensure an empty folder is uploaded when there is no whl.
 
 
 
dist/lilac-0.0.17-py3-none-any.whl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0af64bad5f5bfea7c8bddef1804297b3e518c82742c1edb18f50155ad1e4f69c
3
- size 1132299
 
 
 
 
docker_start.py CHANGED
@@ -9,6 +9,7 @@ from huggingface_hub import scan_cache_dir, snapshot_download
9
 
10
  from lilac.concepts.db_concept import CONCEPTS_DIR, DiskConceptDB, get_concept_output_dir
11
  from lilac.env import data_path, env
 
12
  from lilac.utils import get_datasets_dir, get_lilac_cache_dir, log
13
 
14
 
@@ -70,8 +71,12 @@ def main() -> None:
70
  ignore_patterns=['.gitattributes', 'README.md'])
71
 
72
  snapshot_dir = snapshot_download(repo_id=repo_id, repo_type='space', token=env('HF_ACCESS_TOKEN'))
73
- # Copy datasets.
74
  spaces_data_dir = os.path.join(snapshot_dir, 'data')
 
 
 
 
75
 
76
  # Delete cache files from persistent storage.
77
  cache_dir = get_lilac_cache_dir(data_path())
 
9
 
10
  from lilac.concepts.db_concept import CONCEPTS_DIR, DiskConceptDB, get_concept_output_dir
11
  from lilac.env import data_path, env
12
+ from lilac.project import PROJECT_CONFIG_FILENAME
13
  from lilac.utils import get_datasets_dir, get_lilac_cache_dir, log
14
 
15
 
 
71
  ignore_patterns=['.gitattributes', 'README.md'])
72
 
73
  snapshot_dir = snapshot_download(repo_id=repo_id, repo_type='space', token=env('HF_ACCESS_TOKEN'))
74
+
75
  spaces_data_dir = os.path.join(snapshot_dir, 'data')
76
+ # Copy the config file.
77
+ project_config_file = os.path.join(spaces_data_dir, PROJECT_CONFIG_FILENAME)
78
+ if os.path.exists(project_config_file):
79
+ shutil.copy(project_config_file, os.path.join(data_path(), PROJECT_CONFIG_FILENAME))
80
 
81
  # Delete cache files from persistent storage.
82
  cache_dir = get_lilac_cache_dir(data_path())