music2emo-youtube-link-ja

Running

App Files Files Community

kjysmu commited on 16 days ago

Commit

51f9444

verified ·

1 Parent(s): 4c53a91

Upload 17 files

Browse files

Files changed (18) hide show

.gitattributes +1 -0
inference/.DS_Store +0 -0
inference/data/base_config.yaml +106 -0
inference/data/btc_model.pt +3 -0
inference/data/btc_model_large_voca.pt +3 -0
inference/data/chord.json +1 -0
inference/data/chord_attr.json +1 -0
inference/data/chord_attr_inv.json +16 -0
inference/data/chord_inv.json +1 -0
inference/data/chord_root.json +1 -0
inference/data/chord_root_inv.json +15 -0
inference/data/prep_config.yaml +51 -0
inference/data/run_config.yaml +43 -0
inference/data/tag_list.npy +3 -0
inference/data/test_config.yaml +41 -0
inference/data/train_config.yaml +94 -0
inference/input/test.mp3 +3 -0
inference/temp_out/.DS_Store +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+inference/input/test.mp3 filter=lfs diff=lfs merge=lfs -text

inference/.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

inference/data/base_config.yaml ADDED Viewed

	@@ -0,0 +1,106 @@

+version: "1.34"
+lr: 1e-4
+log_step: 1
+split: 0
+batch_size: 8
+sr: 16000
+datasets:
+  - jamendo
+  - emomusic
+  - pmemo
+  - deam
+model:
+  encoder: "MERT"
+  layers:
+    - 5
+    - 6
+  classifier: "linear-mt-attn-ck"
+  # - linear
+  # - linear-attn-ck
+  # - linear-mt-attn-ck
+  kd: True
+  kd_weight: 0.8
+  kd_temperature: 1
+  lr: 1e-4
+# audio_path: './dataset/jamendo'
+# subset: 'moodtheme'
+dataset:
+  jamendo:
+    root: './dataset/jamendo'
+    subset: 'moodtheme'
+    batch_size: 8
+    output_size : 56
+    split: 0
+    segment_type: "all" # [all,f10s,f30s,10s,30s]
+    num_workers: 4
+  deam:
+    root: './dataset/deam'
+    batch_size: 8
+    output_size : 2
+    segment_type: "all" # [all,f10s,f30s,10s,30s]
+    num_workers: 4
+  pmemo:
+    root: './dataset/pmemo'
+    batch_size: 8
+    output_size : 2
+    segment_type: "all" # [all,f10s,f30s,10s,30s]
+    num_workers: 4
+  emomusic:
+    root: './dataset/emomusic'
+    batch_size: 8
+    output_size : 2
+    segment_type: "all" # [all,f10s,f30s,10s,30s]
+    num_workers: 4
+# --------------------------------------- #
+genre_class_size: 87
+mood_class_size: 56
+instr_class_size: 40
+dac_latents_size: 72
+dac_rvq_size: 9
+# --------------------------------------- #
+#PMEMO BEST (0.5360 0.7772), mt: (0.5401 0.7780)
+checkpoint_pmemo: "tb_logs/best/P.ckpt"
+#DEAM BEST (0.5131 0.6025), mt: (0.5150 0.6125)
+checkpoint_deam: "tb_logs/best/D.ckpt"
+#EMOMUSIC BEST (0.5957 0.7489), mt: (0.6091 0.7525)
+checkpoint_emomusic: "tb_logs/best/E.ckpt"
+#JAMENDO BEST (0.1521 0.7806)
+checkpoint_jamendo: "tb_logs/best/J.ckpt"
+# datasets:
+#   - jamendo
+#   - pmemo
+#   - deam
+#   - emomusic
+#   - pmemo
+#   - jamendo
+# datasets_val:
+#   - emomusic
+# model_save_path: './saved_models/'
+# results_save_path: './results/'
+# hydra:
+#   job:
+#     chdir: True
+# - MERT M2L LIBROSA Encodec DAC
+# aggr_method: "mean"
+# - mean
+# - median
+# - 80th_percentile
+# - max

inference/data/btc_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:71c2c5db17e8c43b8a9a9da5db36ef2d667158c07a214eba16344c154c00bf54
+size 12154754

inference/data/btc_model_large_voca.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1673d23f8f9a55ae7f9e8b80a51da616debb22675b8d8b67ea6ce0ef37b0ab51
+size 12229576

inference/data/chord.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"N": 0, "C": 1, "C:dim": 2, "C:sus4": 3, "C:min7": 4, "C:min": 5, "C:sus2": 6, "C:aug": 7, "C:dim7": 8, "C:maj6": 9, "C:hdim7": 10, "C:7": 11, "C:min6": 12, "C:maj7": 13, "C#": 14, "C#:dim": 15, "C#:sus4": 16, "C#:min7": 17, "C#:min": 18, "C#:sus2": 19, "C#:aug": 20, "C#:dim7": 21, "C#:maj6": 22, "C#:hdim7": 23, "C#:7": 24, "C#:min6": 25, "C#:maj7": 26, "D": 27, "D:dim": 28, "D:sus4": 29, "D:min7": 30, "D:min": 31, "D:sus2": 32, "D:aug": 33, "D:dim7": 34, "D:maj6": 35, "D:hdim7": 36, "D:7": 37, "D:min6": 38, "D:maj7": 39, "D#": 40, "D#:dim": 41, "D#:sus4": 42, "D#:min7": 43, "D#:min": 44, "D#:sus2": 45, "D#:aug": 46, "D#:dim7": 47, "D#:maj6": 48, "D#:hdim7": 49, "D#:7": 50, "D#:min6": 51, "D#:maj7": 52, "E": 53, "E:dim": 54, "E:sus4": 55, "E:min7": 56, "E:min": 57, "E:sus2": 58, "E:aug": 59, "E:dim7": 60, "E:maj6": 61, "E:hdim7": 62, "E:7": 63, "E:min6": 64, "E:maj7": 65, "F": 66, "F:dim": 67, "F:sus4": 68, "F:min7": 69, "F:min": 70, "F:sus2": 71, "F:aug": 72, "F:dim7": 73, "F:maj6": 74, "F:hdim7": 75, "F:7": 76, "F:min6": 77, "F:maj7": 78, "F#": 79, "F#:dim": 80, "F#:sus4": 81, "F#:min7": 82, "F#:min": 83, "F#:sus2": 84, "F#:aug": 85, "F#:dim7": 86, "F#:maj6": 87, "F#:hdim7": 88, "F#:7": 89, "F#:min6": 90, "F#:maj7": 91, "G": 92, "G:dim": 93, "G:sus4": 94, "G:min7": 95, "G:min": 96, "G:sus2": 97, "G:aug": 98, "G:dim7": 99, "G:maj6": 100, "G:hdim7": 101, "G:7": 102, "G:min6": 103, "G:maj7": 104, "G#": 105, "G#:dim": 106, "G#:sus4": 107, "G#:min7": 108, "G#:min": 109, "G#:sus2": 110, "G#:aug": 111, "G#:dim7": 112, "G#:maj6": 113, "G#:hdim7": 114, "G#:7": 115, "G#:min6": 116, "G#:maj7": 117, "A": 118, "A:dim": 119, "A:sus4": 120, "A:min7": 121, "A:min": 122, "A:sus2": 123, "A:aug": 124, "A:dim7": 125, "A:maj6": 126, "A:hdim7": 127, "A:7": 128, "A:min6": 129, "A:maj7": 130, "A#": 131, "A#:dim": 132, "A#:sus4": 133, "A#:min7": 134, "A#:min": 135, "A#:sus2": 136, "A#:aug": 137, "A#:dim7": 138, "A#:maj6": 139, "A#:hdim7": 140, "A#:7": 141, "A#:min6": 142, "A#:maj7": 143, "B": 144, "B:dim": 145, "B:sus4": 146, "B:min7": 147, "B:min": 148, "B:sus2": 149, "B:aug": 150, "B:dim7": 151, "B:maj6": 152, "B:hdim7": 153, "B:7": 154, "B:min6": 155, "B:maj7": 156, "X": 157 }

inference/data/chord_attr.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"N": 0, "maj": 1, "dim": 2, "sus4": 3, "min7": 4, "min": 5, "sus2": 6, "aug": 7, "dim7": 8, "maj6": 9, "hdim7": 10, "7": 11, "min6": 12, "maj7": 13}

inference/data/chord_attr_inv.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "0": "N",
+    "1": "maj",
+    "2": "dim",
+    "3": "sus4",
+    "4": "min7",
+    "5": "min",
+    "6": "sus2",
+    "7": "aug",
+    "8": "dim7",
+    "9": "maj6",
+    "10": "hdim7",
+    "11": "7",
+    "12": "min6",
+    "13": "maj7"
+}

inference/data/chord_inv.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"0": "N", "1": "C", "2": "C:dim", "3": "C:sus4", "4": "C:min7", "5": "C:min", "6": "C:sus2", "7": "C:aug", "8": "C:dim7", "9": "C:maj6", "10": "C:hdim7", "11": "C:7", "12": "C:min6", "13": "C:maj7", "14": "C#", "15": "C#:dim", "16": "C#:sus4", "17": "C#:min7", "18": "C#:min", "19": "C#:sus2", "20": "C#:aug", "21": "C#:dim7", "22": "C#:maj6", "23": "C#:hdim7", "24": "C#:7", "25": "C#:min6", "26": "C#:maj7", "27": "D", "28": "D:dim", "29": "D:sus4", "30": "D:min7", "31": "D:min", "32": "D:sus2", "33": "D:aug", "34": "D:dim7", "35": "D:maj6", "36": "D:hdim7", "37": "D:7", "38": "D:min6", "39": "D:maj7", "40": "D#", "41": "D#:dim", "42": "D#:sus4", "43": "D#:min7", "44": "D#:min", "45": "D#:sus2", "46": "D#:aug", "47": "D#:dim7", "48": "D#:maj6", "49": "D#:hdim7", "50": "D#:7", "51": "D#:min6", "52": "D#:maj7", "53": "E", "54": "E:dim", "55": "E:sus4", "56": "E:min7", "57": "E:min", "58": "E:sus2", "59": "E:aug", "60": "E:dim7", "61": "E:maj6", "62": "E:hdim7", "63": "E:7", "64": "E:min6", "65": "E:maj7", "66": "F", "67": "F:dim", "68": "F:sus4", "69": "F:min7", "70": "F:min", "71": "F:sus2", "72": "F:aug", "73": "F:dim7", "74": "F:maj6", "75": "F:hdim7", "76": "F:7", "77": "F:min6", "78": "F:maj7", "79": "F#", "80": "F#:dim", "81": "F#:sus4", "82": "F#:min7", "83": "F#:min", "84": "F#:sus2", "85": "F#:aug", "86": "F#:dim7", "87": "F#:maj6", "88": "F#:hdim7", "89": "F#:7", "90": "F#:min6", "91": "F#:maj7", "92": "G", "93": "G:dim", "94": "G:sus4", "95": "G:min7", "96": "G:min", "97": "G:sus2", "98": "G:aug", "99": "G:dim7", "100": "G:maj6", "101": "G:hdim7", "102": "G:7", "103": "G:min6", "104": "G:maj7", "105": "G#", "106": "G#:dim", "107": "G#:sus4", "108": "G#:min7", "109": "G#:min", "110": "G#:sus2", "111": "G#:aug", "112": "G#:dim7", "113": "G#:maj6", "114": "G#:hdim7", "115": "G#:7", "116": "G#:min6", "117": "G#:maj7", "118": "A", "119": "A:dim", "120": "A:sus4", "121": "A:min7", "122": "A:min", "123": "A:sus2", "124": "A:aug", "125": "A:dim7", "126": "A:maj6", "127": "A:hdim7", "128": "A:7", "129": "A:min6", "130": "A:maj7", "131": "A#", "132": "A#:dim", "133": "A#:sus4", "134": "A#:min7", "135": "A#:min", "136": "A#:sus2", "137": "A#:aug", "138": "A#:dim7", "139": "A#:maj6", "140": "A#:hdim7", "141": "A#:7", "142": "A#:min6", "143": "A#:maj7", "144": "B", "145": "B:dim", "146": "B:sus4", "147": "B:min7", "148": "B:min", "149": "B:sus2", "150": "B:aug", "151": "B:dim7", "152": "B:maj6", "153": "B:hdim7", "154": "B:7", "155": "B:min6", "156": "B:maj7", "157": "X"}

inference/data/chord_root.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"N": 0, "C": 1, "C#": 2, "D": 3, "D#": 4, "E": 5, "F": 6, "F#": 7, "G": 8, "G#": 9, "A": 10, "A#": 11, "B": 12}

inference/data/chord_root_inv.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "0": "N",
+    "1": "C",
+    "2": "C#",
+    "3": "D",
+    "4": "D#",
+    "5": "E",
+    "6": "F",
+    "7": "F#",
+    "8": "G",
+    "9": "G#",
+    "10": "A",
+    "11": "A#",
+    "12": "B"
+}

inference/data/prep_config.yaml ADDED Viewed

	@@ -0,0 +1,51 @@

+device_id: 3
+is_split: True
+segment_duration: 30
+# --- DATASET --- #
+# dataset:
+#   input_dir: '../dataset/jamendo/mp3'
+#   output_dir: '../dataset/jamendo/mert_30s'
+    # audio length : Full
+# dataset:
+#   input_dir: '../dataset/dmdd/mp3'
+#   output_dir: '../dataset/dmdd/mert_30s'
+#   # audio length : ~30s
+# dataset:
+#   input_dir: '../dataset/emomusic/mp3'
+#   output_dir: '../dataset/emomusic/mert_30s'
+#   # audio length : ~30s
+# dataset:
+#   input_dir: '../dataset/pmemo/mp3'
+#   output_dir: '../dataset/pmemo/mert_30s'
+#   # audio length : ~30s
+dataset:
+  input_dir: '../dataset/deam/mp3'
+  output_dir: '../dataset/deam/mert_30s'
+  # audio length : ~30s
+# --- ENCODER --- #
+model:
+  name: 'm-a-p/MERT-v1-95M'
+  sr: 24000
+# model:
+#   name: 'music2latent'
+#   sr: 44100

inference/data/run_config.yaml ADDED Viewed

	@@ -0,0 +1,43 @@

+mp3:
+  song_hz: 22050
+  inst_len: 10.0
+  skip_interval: 5.0
+feature:
+  n_bins: 144
+  bins_per_octave: 24
+  hop_length: 2048
+  #large_voca: False
+  large_voca: True
+experiment:
+  learning_rate : 0.0001
+  weight_decay : 0.0
+  max_epoch : 100
+  batch_size : 128
+  save_step : 40
+  data_ratio : 0.8
+model:
+  feature_size : 144
+  timestep : 108
+  #num_chords : 25
+  num_chords : 170
+  input_dropout : 0.2
+  layer_dropout : 0.2
+  attention_dropout : 0.2
+  relu_dropout : 0.2
+  num_layers : 8
+  num_heads : 4
+  hidden_size : 128
+  total_key_depth : 128
+  total_value_depth : 128
+  filter_size : 128
+  loss : 'ce'
+  probs_out : False
+path:
+  ckpt_path : 'model'
+  result_path : 'result'
+  asset_path : '/data/music/chord_recognition/jayg996/assets'
+  root_path : '/data/music/chord_recognition'

inference/data/tag_list.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9510e22fca2ac817c8af9287f1fa40dbbbc10c489ead8d7bfc99191c0569d60d
+size 22820

inference/data/test_config.yaml ADDED Viewed

	@@ -0,0 +1,41 @@

+defaults:
+  - base_config
+  - _self_
+batch_size: 8
+devices: [0]
+trainer:
+  devices: ${devices}
+  accelerator: 'gpu'
+# datasets:
+#   - jamendo
+#   - dmdd
+checkpoint_latest: True
+multitask: True
+dataset_type: "va"
+#'mood' or 'va'
+## If not True, then use following checkpoint.
+checkpoint: "tb_logs/best/EJ.ckpt"
+# checkpoint_J: "tb_logs/best/jamendo.ckpt"
+# checkpoint_P: "tb_logs/best/pmemo.ckpt"
+# checkpoint_E: "tb_logs/best/emomusic.ckpt"
+# checkpoint_D: "tb_logs/best/deam.ckpt"
+# checkpoint_PJ: "tb_logs/best/PJ.ckpt"
+# checkpoint_EJ: "tb_logs/best/EJ.ckpt"
+# checkpoint_DJ: "tb_logs/best/DJ.ckpt"
+# checkpoint_JP: "tb_logs/best/JP.ckpt"
+# checkpoint_JE: "tb_logs/best/JE.ckpt"
+# checkpoint_JD: "tb_logs/best/JD.ckpt"
+# checkpoint_ALL: "tb_logs/best/ALL.ckpt"
+# checkpoint: "tb_logs/train_audio_classification/version_110/checkpoints/21-0.1202.ckpt"

inference/data/train_config.yaml ADDED Viewed

	@@ -0,0 +1,94 @@

+defaults:
+  - base_config
+  - _self_
+devices: [0,1,2,3]
+epochs: 500
+batch_size: 8
+monitor_metric: "val_loss"
+monitor_metric_mood: "val_loss_mood"
+monitor_metric_va: "val_loss_va"
+checkpoint:
+  monitor: "${monitor_metric}"
+  filename: "{epoch:02d}-{${monitor_metric}:.4f}"
+  save_top_k: -1
+  mode: "min"
+  auto_insert_metric_name: False
+  save_last: True
+checkpoint_mood:
+  monitor: "${monitor_metric_mood}"
+  filename: "mood-{epoch:02d}-{${monitor_metric_mood}:.4f}"
+  save_top_k: -1
+  mode: "min"
+  auto_insert_metric_name: False
+  save_last: True
+checkpoint_va:
+  monitor: "${monitor_metric_va}"
+  filename: "va-{epoch:02d}-{${monitor_metric_va}:.4f}"
+  save_top_k: 5
+  mode: "min"
+  auto_insert_metric_name: False
+  save_last: True
+earlystopping:
+  monitor: "${monitor_metric_mood}"
+  patience: 10
+  min_delta: 0.0001
+  mode: "min"
+trainer:
+  devices: ${devices}
+  max_epochs: ${epochs}
+  accelerator: 'gpu'
+# strategy: 'ddp_find_unused_parameters_true'
+# optimizer:
+#   _target_: torch.optim.AdamW
+#   _partial_: true
+#   lr: 1e-4
+#   weight_decay: 0.01
+# scheduler:
+#   _target_: torch.optim.lr_scheduler.ReduceLROnPlateau
+#   _partial_: true
+#   cooldown: 5
+#   mode: max
+#   factor: 0.2
+#   patience: 10
+#   min_lr: 1.6e-7
+# monitor_metric: "val_loss"
+# # val_loss
+# # val_loss_mood
+# # val_loss_va
+# checkpoint:
+#   monitor: "${monitor_metric}"
+#   filename: "{epoch:02d}-{${monitor_metric}:.4f}"
+#   save_top_k: 2
+#   mode: "min"
+#   auto_insert_metric_name: False
+#   save_last: True
+# checkpoint:
+#   monitor: "val_loss_mood"
+#   filename: "{epoch:02d}-{val_loss_mood:.4f}"
+#   save_top_k: 2
+#   mode: "min"
+#   auto_insert_metric_name: False
+#   save_last: True
+# earlystopping:
+#   monitor: 'val_loss_mood'
+#   patience: 10
+#   min_delta: 0.0001
+#   mode: "min"
+# datasets:
+#   - jamendo
+#   - dmdd

inference/input/test.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:22a56123f5adb9d061d4ab80a97aae12c84937d86a5042343c05e108b4e9fdda
+size 8195178

inference/temp_out/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file