Spaces:

Tonic
/

audiocraft

Running on Zero

App Files Files Community

Tonic commited on 8 days ago

Commit

91d712c

unverified ·

1 Parent(s): c1b6b5f

add chord mapping

Browse files

Files changed (4) hide show

build_chord_maps.py +92 -0
extract_chords.py +73 -0
main.py +83 -1
requirements.txt +1 -0

build_chord_maps.py ADDED Viewed

	@@ -0,0 +1,92 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import os
+import pickle
+from tqdm import tqdm
+import argparse
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--chords_folder', type=str, required=True,
+                        help='path to directory containing parsed chords files')
+    parser.add_argument('--output_directory', type=str, required=False,
+                        help='path to output directory to generate code maps to, \
+                            if not given - chords_folder would be used', default='')
+    parser.add_argument('--path_to_pre_defined_map', type=str, required=False,
+                        help='for evaluation purpose, use pre-defined chord-to-index map', default='')
+    args = parser.parse_args()
+    return args
+def get_chord_dict(chord_folder: str):
+    chord_dict = {}
+    distinct_chords = set()
+    chord_to_index = {}  # Mapping between chord and index
+    index_counter = 0
+    for filename in tqdm(os.listdir(chord_folder)):
+        if filename.endswith(".chords"):
+            idx = filename.split(".")[0]
+            with open(os.path.join(chord_folder, filename), "rb") as file:
+                chord_data = pickle.load(file)
+            for chord, _ in chord_data:
+                distinct_chords.add(chord)
+                if chord not in chord_to_index:
+                    chord_to_index[chord] = index_counter
+                    index_counter += 1
+            chord_dict[idx] = chord_data
+    chord_to_index["UNK"] = index_counter
+    return chord_dict, distinct_chords, chord_to_index
+def get_predefined_chord_to_index_map(path_to_chords_to_index_map: str):
+    def inner(chord_folder: str):
+        chords_to_index = pickle.load(open(path_to_chords_to_index_map, "rb"))
+        distinct_chords = set(chords_to_index.keys())
+        chord_dict = {}
+        for filename in tqdm(os.listdir(chord_folder), desc=f'iterating: {chord_folder}'):
+            if filename.endswith(".chords"):
+                idx = filename.split(".")[0]
+                with open(os.path.join(chord_folder, filename), "rb") as file:
+                    chord_data = pickle.load(file)
+                chord_dict[idx] = chord_data
+        return chord_dict, distinct_chords, chords_to_index
+    return inner
+if __name__ == "__main__":
+    '''This script processes and maps chord data from a directory of parsed chords files,
+    generating two output files: a combined chord dictionary and a chord-to-index mapping.'''
+    args = parse_args()
+    chord_folder = args.chords_folder
+    output_dir = args.output_directory
+    if output_dir == '':
+        output_dir = chord_folder
+    func = get_chord_dict
+    if args.path_to_pre_defined_map != "":
+        func = get_predefined_chord_to_index_map(args.path_to_pre_defined_map)
+    chord_dict, distinct_chords, chord_to_index = func(chord_folder)
+    # Save the combined chord dictionary as a pickle file
+    combined_filename = os.path.join(output_dir, "combined_chord_dict.pkl")
+    with open(combined_filename, "wb") as file:
+        pickle.dump(chord_dict, file)
+    # Save the chord-to-index mapping as a pickle file
+    mapping_filename = os.path.join(output_dir, "chord_to_index_mapping.pkl")
+    with open(mapping_filename, "wb") as file:
+        pickle.dump(chord_to_index, file)
+    print("Number of distinct chords:", len(distinct_chords))
+    print("Chord dictionary:", chord_to_index)

extract_chords.py ADDED Viewed

	@@ -0,0 +1,73 @@

+# Env - chords_extraction on devfair
+import pickle
+import argparse
+from chord_extractor.extractors import Chordino  # type: ignore
+from chord_extractor import clear_conversion_cache, LabelledChordSequence  # type: ignore
+import os
+from tqdm import tqdm
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--src_jsonl_file', type=str, required=True,
+                        help='abs path to .jsonl file containing list of absolute file paths seperated by new line')
+    parser.add_argument('--target_output_dir', type=str, required=True,
+                        help='target directory to save parsed chord files to, individual files will be saved inside')
+    parser.add_argument("--override", action="store_true")
+    args = parser.parse_args()
+    return args
+def save_to_db_cb(tgt_dir: str):
+    # Every time one of the files has had chords extracted, receive the chords here
+    # along with the name of the original file and then run some logic here, e.g. to
+    # save the latest data to DB
+    def inner(results: LabelledChordSequence):
+        path = results.id.split(".wav")
+        sequence = [(item.chord, item.timestamp) for item in results.sequence]
+        if len(path) != 2:
+            print("Something")
+            print(path)
+        else:
+            file_idx = path[0].split("/")[-1]
+            with open(f"{tgt_dir}/{file_idx}.chords", "wb") as f:
+                # dump the object to the file
+                pickle.dump(sequence, f)
+    return inner
+if __name__ == "__main__":
+    '''This script extracts chord data from a list of audio files using the Chordino extractor,
+    and saves the extracted chords to individual files in a target directory.'''
+    print("parsed args")
+    args = parse_args()
+    files_to_extract_from = list()
+    with open(args.src_jsonl_file, "r") as json_file:
+        for line in tqdm(json_file.readlines()):
+            # fpath = json.loads(line.replace("\n", ""))['path']
+            fpath = line.replace("\n", "")
+            if not args.override:
+                fname = fpath.split("/")[-1].replace(".wav", ".chords")
+                if os.path.exists(f"{args.target_output_dir}/{fname}"):
+                    continue
+            files_to_extract_from.append(line.replace("\n", ""))
+    print(f"num files to parse: {len(files_to_extract_from)}")
+    chordino = Chordino()
+    # Optionally clear cache of file conversions (e.g. wav files that have been converted from midi)
+    clear_conversion_cache()
+    # Run bulk extraction
+    res = chordino.extract_many(
+        files_to_extract_from,
+        callback=save_to_db_cb(args.target_output_dir),
+        num_extractors=80,
+        num_preprocessors=80,
+        max_files_in_cache=400,
+        stop_on_error=False,
+    )

main.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import spaces
 import logging
 import os
 from concurrent.futures import ProcessPoolExecutor
 from pathlib import Path
 from tempfile import NamedTemporaryFile
@@ -23,7 +24,72 @@ MODEL = None
 MAX_BATCH_SIZE = 12
 INTERRUPTING = False
 # Wrap subprocess call to clean logs
 _old_call = sp.call
@@ -80,14 +146,30 @@ def load_model(version='facebook/jasco-chords-drums-400M'):
     print("Loading model", version)
     if MODEL is None or MODEL.name != version:
         MODEL = None
         try:
-            MODEL = JASCO.get_pretrained(version, device='cuda')
             MODEL.name = version
         except Exception as e:
             raise gr.Error(f"Error loading model: {str(e)}")
         if MODEL is None:
             raise gr.Error("Failed to load model")
     return MODEL
 @spaces.GPU

 import spaces
 import logging
 import os
+import pickle
 from concurrent.futures import ProcessPoolExecutor
 from pathlib import Path
 from tempfile import NamedTemporaryFile
 MAX_BATCH_SIZE = 12
 INTERRUPTING = False
+os.makedirs(os.path.join(os.path.dirname(__file__), "models"), exist_ok=True)
+def generate_chord_mappings():
+    # Define basic chord mappings
+    basic_chords = ['N', 'C', 'Dm7', 'Am', 'F', 'D', 'Ab', 'Bb'] + ['UNK']
+    chord_to_index = {chord: idx for idx, chord in enumerate(basic_chords)}
+    # Save the mapping
+    mapping_path = os.path.join(os.path.dirname(__file__), "models", "chord_to_index_mapping.pkl")
+    os.makedirs(os.path.dirname(mapping_path), exist_ok=True)
+    with open(mapping_path, "wb") as f:
+        pickle.dump(chord_to_index, f)
+    return mapping_path
+def create_default_chord_mapping():
+    """Create a basic chord-to-index mapping with common chords"""
+    basic_chords = [
+        'N', 'C', 'Cm', 'C7', 'Cmaj7', 'Cm7',
+        'D', 'Dm', 'D7', 'Dmaj7', 'Dm7',
+        'E', 'Em', 'E7', 'Emaj7', 'Em7',
+        'F', 'Fm', 'F7', 'Fmaj7', 'Fm7',
+        'G', 'Gm', 'G7', 'Gmaj7', 'Gm7',
+        'A', 'Am', 'A7', 'Amaj7', 'Am7',
+        'B', 'Bm', 'B7', 'Bmaj7', 'Bm7',
+        'Ab', 'Abm', 'Ab7', 'Abmaj7', 'Abm7',
+        'Bb', 'Bbm', 'Bb7', 'Bbmaj7', 'Bbm7',
+        'UNK'
+    ]
+    return {chord: idx for idx, chord in enumerate(basic_chords)}
+def initialize_chord_mapping():
+    """Initialize chord mapping file if it doesn't exist"""
+    mapping_dir = os.path.join(os.path.dirname(__file__), "models")
+    os.makedirs(mapping_dir, exist_ok=True)
+    mapping_file = os.path.join(mapping_dir, "chord_to_index_mapping.pkl")
+    if not os.path.exists(mapping_file):
+        chord_to_index = create_default_chord_mapping()
+        with open(mapping_file, "wb") as f:
+            pickle.dump(chord_to_index, f)
+    return mapping_file
+def validate_chord(chord, chord_mapping):
+    if chord not in chord_mapping:
+        return 'UNK'
+    return chord
+mapping_file = initialize_chord_mapping()
+os.environ['AUDIOCRAFT_CHORD_MAPPING'] = mapping_file
+def chords_string_to_list(chords: str):
+    if chords == '':
+        return []
+    chords = chords.replace('[', '').replace(']', '').replace(' ', '')
+    chrd_times = [x.split(',') for x in chords[1:-1].split('),(')]
+    # Load chord mapping
+    mapping_path = os.path.join(os.path.dirname(__file__), "models", "chord_to_index_mapping.pkl")
+    with open(mapping_path, 'rb') as f:
+        chord_mapping = pickle.load(f)
+    return [(validate_chord(x[0], chord_mapping), float(x[1])) for x in chrd_times]
 # Wrap subprocess call to clean logs
 _old_call = sp.call
     print("Loading model", version)
     if MODEL is None or MODEL.name != version:
         MODEL = None
+        # Setup model directory
+        model_dir = os.path.join(os.path.dirname(__file__), "models")
+        os.makedirs(model_dir, exist_ok=True)
+        # Generate and save chord mappings
+        chord_mapping_path = os.path.join(model_dir, "chord_to_index_mapping.pkl")
+        if not os.path.exists(chord_mapping_path):
+            chord_mapping_path = generate_chord_mappings()
         try:
+            # Initialize JASCO with the chord mapping path
+            MODEL = JASCO.get_pretrained(
+                version,
+                device='cuda',
+                chords_mapping_path=chord_mapping_path
+            )
             MODEL.name = version
         except Exception as e:
             raise gr.Error(f"Error loading model: {str(e)}")
         if MODEL is None:
             raise gr.Error("Failed to load model")
     return MODEL
 @spaces.GPU

requirements.txt CHANGED Viewed

@@ -1,5 +1,6 @@
 numpy<2.0.0
 torch>=2.0.0
 transformers
 accelerate
 git+https://github.com/facebookresearch/audiocraft.git

 numpy<2.0.0
 torch>=2.0.0
+torchaudio
 transformers
 accelerate
 git+https://github.com/facebookresearch/audiocraft.git