Spaces:
Sleeping
Sleeping
stages: | |
data_ingestion: | |
cmd: python src/benglasummarization/pipeline/stage01_data_ingestion.py | |
deps: | |
- src/benglasummarization/pipeline/stage01_data_ingestion.py | |
- config/config.yaml | |
prepare_Ban_tok: | |
cmd: python src/benglasummarization/pipeline/stage_02_prepare_ben_tok.py | |
deps: | |
- src/benglasummarization/pipeline/stage_02_prepare_ben_tok.py | |
- config/config.yaml | |
params: | |
- output_file | |
tokenize_training: | |
cmd: python src/benglasummarization/pipeline/stage_03_train_ban_token.py | |
deps: | |
- src/benglasummarization/pipeline/stage_03_train_ban_token.py | |
- config/config.yaml | |
- artifacts/ban_tokenization/combined_text.txt | |
params: | |
- model_prefix # List format for params | |
- model_type | |
- vocab_size | |
training: | |
cmd: python src/benglasummarization/pipeline/stage_04_model_Training.py | |
deps: | |
- src/benglasummarization/pipeline/stage_04_model_Training.py | |
- config/config.yaml | |
- artifacts/data_ingestion/BanSum.csv | |
- artifacts/train_tokenization/cbengali_tokenizer.model | |
params: | |
- max_input_length # List format for params | |
- max_output_length | |
- model_name | |
- batch_size | |
- num_epochs | |
- learning_rate | |
- accumulator_steps | |
- max_grad_norm | |
- early_stopping_patience | |
- patience_counter | |