Spaces:
Sleeping
Sleeping
Seungwoo hong
commited on
Commit
·
d457583
1
Parent(s):
739c80b
🚑 fix: Add missing project files for tutorial 1 in Dockerfile
Browse files- .gitattributes +1 -0
- Dockerfile +3 -0
- projects/tutorial_1/0/config.yaml +37 -0
- projects/tutorial_1/0/post_retrieve_node_line/generator/0.parquet +3 -0
- projects/tutorial_1/0/post_retrieve_node_line/generator/best_0.parquet +3 -0
- projects/tutorial_1/0/post_retrieve_node_line/generator/summary.csv +2 -0
- projects/tutorial_1/0/post_retrieve_node_line/prompt_maker/0.parquet +3 -0
- projects/tutorial_1/0/post_retrieve_node_line/prompt_maker/best_0.parquet +3 -0
- projects/tutorial_1/0/post_retrieve_node_line/prompt_maker/summary.csv +2 -0
- projects/tutorial_1/0/post_retrieve_node_line/summary.csv +3 -0
- projects/tutorial_1/0/retrieve_node_line/retrieval/0.parquet +3 -0
- projects/tutorial_1/0/retrieve_node_line/retrieval/1.parquet +3 -0
- projects/tutorial_1/0/retrieve_node_line/retrieval/2.parquet +3 -0
- projects/tutorial_1/0/retrieve_node_line/retrieval/best_2.parquet +3 -0
- projects/tutorial_1/0/retrieve_node_line/retrieval/summary.csv +4 -0
- projects/tutorial_1/0/retrieve_node_line/summary.csv +2 -0
- projects/tutorial_1/0/summary.csv +4 -0
- projects/tutorial_1/config.yaml +37 -0
- projects/tutorial_1/corpus.parquet +3 -0
- projects/tutorial_1/data/corpus.parquet +3 -0
- projects/tutorial_1/data/qa.parquet +3 -0
- projects/tutorial_1/qa_test.parquet +3 -0
- projects/tutorial_1/qa_train.parquet +3 -0
- projects/tutorial_1/resources/bm25_porter_stemmer.pkl +3 -0
- projects/tutorial_1/resources/chroma/f56771fb-90d3-4c0c-ab5b-952946d77f87/data_level0.bin +3 -0
- projects/tutorial_1/resources/chroma/f56771fb-90d3-4c0c-ab5b-952946d77f87/header.bin +3 -0
- projects/tutorial_1/resources/chroma/f56771fb-90d3-4c0c-ab5b-952946d77f87/index_metadata.pickle +3 -0
- projects/tutorial_1/resources/chroma/f56771fb-90d3-4c0c-ab5b-952946d77f87/length.bin +3 -0
- projects/tutorial_1/resources/chroma/f56771fb-90d3-4c0c-ab5b-952946d77f87/link_lists.bin +3 -0
- projects/tutorial_1/trial.json +6 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.sqlite3 filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
CHANGED
@@ -16,5 +16,8 @@ COPY --chown=user ./autorag/projects/tutorial_1/0 /app/projects/tutorial_1/0
|
|
16 |
|
17 |
COPY --chown=user . /app
|
18 |
|
|
|
|
|
|
|
19 |
# AutoRAG 실행 명령어를 CMD로 변경
|
20 |
CMD ["autorag", "run_web", "--trial_path", "./projects/tutorial_1/0"]
|
|
|
16 |
|
17 |
COPY --chown=user . /app
|
18 |
|
19 |
+
# contains files larger than 10 MiB.
|
20 |
+
COPY --chown=user ./projects/tutorial_1/0 /app/projects/tutorial_1/0
|
21 |
+
|
22 |
# AutoRAG 실행 명령어를 CMD로 변경
|
23 |
CMD ["autorag", "run_web", "--trial_path", "./projects/tutorial_1/0"]
|
projects/tutorial_1/0/config.yaml
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
node_lines:
|
3 |
+
- node_line_name: retrieve_node_line
|
4 |
+
nodes:
|
5 |
+
- node_type: retrieval
|
6 |
+
strategy:
|
7 |
+
metrics: [retrieval_f1, retrieval_recall, retrieval_ndcg, retrieval_mrr]
|
8 |
+
top_k: 3
|
9 |
+
modules:
|
10 |
+
- module_type: vectordb
|
11 |
+
embedding_model: openai
|
12 |
+
- module_type: bm25
|
13 |
+
- module_type: hybrid_rrf
|
14 |
+
weight_range: (4,80)
|
15 |
+
- node_line_name: post_retrieve_node_line
|
16 |
+
nodes:
|
17 |
+
- node_type: prompt_maker
|
18 |
+
strategy:
|
19 |
+
metrics:
|
20 |
+
- metric_name: meteor
|
21 |
+
- metric_name: rouge
|
22 |
+
- metric_name: sem_score
|
23 |
+
embedding_model: openai
|
24 |
+
modules:
|
25 |
+
- module_type: fstring
|
26 |
+
prompt: "Read the passages and answer the given question. \n Question: {query} \n Passage: {retrieved_contents} \n Answer : "
|
27 |
+
- node_type: generator
|
28 |
+
strategy:
|
29 |
+
metrics:
|
30 |
+
- metric_name: meteor
|
31 |
+
- metric_name: rouge
|
32 |
+
- metric_name: sem_score
|
33 |
+
embedding_model: openai
|
34 |
+
modules:
|
35 |
+
- module_type: openai_llm
|
36 |
+
llm: gpt-4o-mini
|
37 |
+
batch: 16 # If you have low tier at OpenAI, decrease this.
|
projects/tutorial_1/0/post_retrieve_node_line/generator/0.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2bb7582072fb85e1d7d58812664028a8aa7696a6bb9a9e3c4f922be8a73c204
|
3 |
+
size 1250306
|
projects/tutorial_1/0/post_retrieve_node_line/generator/best_0.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75fe9675ba1a745e74829af6d68cafa27863e3fd1f74083eb3e0a00fe103486d
|
3 |
+
size 9195921
|
projects/tutorial_1/0/post_retrieve_node_line/generator/summary.csv
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
filename,module_name,module_params,execution_time,average_output_token,meteor,rouge,sem_score,is_best
|
2 |
+
0.parquet,OpenAILLM,"{'llm': 'gpt-4o-mini', 'batch': 16}",0.4728041404485703,213.3475,0.20115987187873874,0.12026338139765363,0.8632004986231319,True
|
projects/tutorial_1/0/post_retrieve_node_line/prompt_maker/0.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21e98811cabeac17de30f37d6ccd8ae079a41cfbe247025e7876469db237be01
|
3 |
+
size 4311321
|
projects/tutorial_1/0/post_retrieve_node_line/prompt_maker/best_0.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd2a0ceaa2711469f2845e7ba3c18bc0c19ac57df35af276097cc2d8aa61e170
|
3 |
+
size 7946233
|
projects/tutorial_1/0/post_retrieve_node_line/prompt_maker/summary.csv
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
filename,module_name,module_params,execution_time,average_prompt_token,is_best
|
2 |
+
0.parquet,Fstring,{'prompt': 'Read the passages and answer the given question. \n Question: {query} \n Passage: {retrieved_contents} \n Answer : '},9.610652923583985e-06,3747.7675,True
|
projects/tutorial_1/0/post_retrieve_node_line/summary.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
node_type,best_module_filename,best_module_name,best_module_params,best_execution_time
|
2 |
+
prompt_maker,0.parquet,Fstring,{'prompt': 'Read the passages and answer the given question. \n Question: {query} \n Passage: {retrieved_contents} \n Answer : '},9.610652923583983e-06
|
3 |
+
generator,0.parquet,OpenAILLM,"{'llm': 'gpt-4o-mini', 'batch': 16}",0.4728041404485703
|
projects/tutorial_1/0/retrieve_node_line/retrieval/0.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48cddd9ec7153a2bfadfae22ca34dc5defc1dc4a23dcdb60a1d861ce4935233e
|
3 |
+
size 3264206
|
projects/tutorial_1/0/retrieve_node_line/retrieval/1.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3faf617d528d7434603c6840d689790b3abbc6e7f7032edb45370ce60d08a1e8
|
3 |
+
size 3551541
|
projects/tutorial_1/0/retrieve_node_line/retrieval/2.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1502be25ddf4614531ca9f3e2ca39866e0b9c485f5fc696273920e6957a4b0f5
|
3 |
+
size 3404296
|
projects/tutorial_1/0/retrieve_node_line/retrieval/best_2.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:921fc2dc2fbe1a94402c8ab9d85033e6ce0c693154ca0cef93e18e524050e090
|
3 |
+
size 3635528
|
projects/tutorial_1/0/retrieve_node_line/retrieval/summary.csv
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
filename,module_name,module_params,execution_time,retrieval_f1,retrieval_recall,retrieval_ndcg,retrieval_mrr,is_best
|
2 |
+
0.parquet,VectorDB,"{'top_k': 3, 'embedding_model': 'openai'}",0.015339269042015076,0.4325,0.865,0.8356831095982161,0.8254166666666666,False
|
3 |
+
1.parquet,BM25,{'top_k': 3},0.003347758650779724,0.475,0.95,0.9104743802857166,0.8966666666666666,False
|
4 |
+
2.parquet,HybridRRF,"{'top_k': 3, 'weight': 4.0, 'target_modules': ('VectorDB', 'BM25'), 'target_module_params': ({'top_k': 3, 'embedding_model': 'openai'}, {'top_k': 3})}",0.0186870276927948,0.47875,0.9575,0.9138069216607175,0.8983333333333333,True
|
projects/tutorial_1/0/retrieve_node_line/summary.csv
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
node_type,best_module_filename,best_module_name,best_module_params,best_execution_time
|
2 |
+
retrieval,2.parquet,HybridRRF,"{'top_k': 3, 'weight': 4.0, 'target_modules': ('VectorDB', 'BM25'), 'target_module_params': ({'top_k': 3, 'embedding_model': 'openai'}, {'top_k': 3})}",0.0186870276927948
|
projects/tutorial_1/0/summary.csv
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
node_line_name,node_type,best_module_filename,best_module_name,best_module_params,best_execution_time
|
2 |
+
retrieve_node_line,retrieval,2.parquet,HybridRRF,"{'top_k': 3, 'weight': 4.0, 'target_modules': ('VectorDB', 'BM25'), 'target_module_params': ({'top_k': 3, 'embedding_model': 'openai'}, {'top_k': 3})}",0.0186870276927948
|
3 |
+
post_retrieve_node_line,prompt_maker,0.parquet,Fstring,{'prompt': 'Read the passages and answer the given question. \n Question: {query} \n Passage: {retrieved_contents} \n Answer : '},9.610652923583983e-06
|
4 |
+
post_retrieve_node_line,generator,0.parquet,OpenAILLM,"{'llm': 'gpt-4o-mini', 'batch': 16}",0.4728041404485703
|
projects/tutorial_1/config.yaml
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
node_lines:
|
3 |
+
- node_line_name: retrieve_node_line
|
4 |
+
nodes:
|
5 |
+
- node_type: retrieval
|
6 |
+
strategy:
|
7 |
+
metrics: [retrieval_f1, retrieval_recall, retrieval_ndcg, retrieval_mrr]
|
8 |
+
top_k: 3
|
9 |
+
modules:
|
10 |
+
- module_type: vectordb
|
11 |
+
embedding_model: openai
|
12 |
+
- module_type: bm25
|
13 |
+
- module_type: hybrid_rrf
|
14 |
+
weight_range: (4,80)
|
15 |
+
- node_line_name: post_retrieve_node_line
|
16 |
+
nodes:
|
17 |
+
- node_type: prompt_maker
|
18 |
+
strategy:
|
19 |
+
metrics:
|
20 |
+
- metric_name: meteor
|
21 |
+
- metric_name: rouge
|
22 |
+
- metric_name: sem_score
|
23 |
+
embedding_model: openai
|
24 |
+
modules:
|
25 |
+
- module_type: fstring
|
26 |
+
prompt: "Read the passages and answer the given question. \n Question: {query} \n Passage: {retrieved_contents} \n Answer : "
|
27 |
+
- node_type: generator
|
28 |
+
strategy:
|
29 |
+
metrics:
|
30 |
+
- metric_name: meteor
|
31 |
+
- metric_name: rouge
|
32 |
+
- metric_name: sem_score
|
33 |
+
embedding_model: openai
|
34 |
+
modules:
|
35 |
+
- module_type: openai_llm
|
36 |
+
llm: gpt-4o-mini
|
37 |
+
batch: 16 # If you have low tier at OpenAI, decrease this.
|
projects/tutorial_1/corpus.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a17ad7cb152b2bfc5a4bbdf08a1020a1dfc0be9f86c9b0dc0159d9155d374a2
|
3 |
+
size 7322737
|
projects/tutorial_1/data/corpus.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a246e68ba6ea2d7778b4d584b8a7b1e31b874bb2f2f34b81239eb2db8e883139
|
3 |
+
size 7322818
|
projects/tutorial_1/data/qa.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55a997e2f54d06fbe591f7d7f7c5b433f8e8257bc6384dd5d916cf4861d34fa7
|
3 |
+
size 209368
|
projects/tutorial_1/qa_test.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ac2855ed349521c2826070a3f637f161925f8d94154f6b0d3e9267046c7aa20
|
3 |
+
size 209810
|
projects/tutorial_1/qa_train.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e22589613d59039a81cbe0130fd2122227eea9380332a9a9edd3e45fed7a3f02
|
3 |
+
size 324781
|
projects/tutorial_1/resources/bm25_porter_stemmer.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:087324dfd19322e18c53f4a929c844086591e51a53027112e53a3788c5bcc3ba
|
3 |
+
size 6326431
|
projects/tutorial_1/resources/chroma/f56771fb-90d3-4c0c-ab5b-952946d77f87/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9ddc1f331d40532357ddeeecefeca163233900ba792a426fa61578db6ff0007
|
3 |
+
size 12568000
|
projects/tutorial_1/resources/chroma/f56771fb-90d3-4c0c-ab5b-952946d77f87/header.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72bc9139e872f9a18e4cf4554254facfebad88439df6da1a4cd9d63ae7593c91
|
3 |
+
size 100
|
projects/tutorial_1/resources/chroma/f56771fb-90d3-4c0c-ab5b-952946d77f87/index_metadata.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:570167be2c20d252fb0878b2bef0f38b5a9440b2a441b881212d2586b1f2e949
|
3 |
+
size 113967
|
projects/tutorial_1/resources/chroma/f56771fb-90d3-4c0c-ab5b-952946d77f87/length.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d063399d9595d052b5908a857e2ce05095e0011114a3b5edac7f4f3d6742c13f
|
3 |
+
size 8000
|
projects/tutorial_1/resources/chroma/f56771fb-90d3-4c0c-ab5b-952946d77f87/link_lists.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3c89af5a90ecdad9f2d5bbe34a838cf69c791a700c444694f5419eb58499c5a
|
3 |
+
size 16976
|
projects/tutorial_1/trial.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"trial_name": "0",
|
4 |
+
"start_time": "2024-09-30 01:43:30"
|
5 |
+
}
|
6 |
+
]
|