"""Utility for downloading Piper voices.""" import json import logging import shutil from pathlib import Path from typing import Any, Dict, Iterable, Set, Tuple, Union from urllib.request import urlopen from .file_hash import get_file_hash URL_FORMAT = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/{file}" _DIR = Path(__file__).parent _LOGGER = logging.getLogger(__name__) _SKIP_FILES = {"MODEL_CARD"} class VoiceNotFoundError(Exception): pass def get_voices( download_dir: Union[str, Path], update_voices: bool = False ) -> Dict[str, Any]: """Loads available voices from downloaded or embedded JSON file.""" download_dir = Path(download_dir) voices_download = download_dir / "voices.json" if update_voices: # Download latest voices.json voices_url = URL_FORMAT.format(file="voices.json") _LOGGER.debug("Downloading %s to %s", voices_url, voices_download) with urlopen(voices_url) as response, open( voices_download, "wb" ) as download_file: shutil.copyfileobj(response, download_file) # Prefer downloaded file to embedded voices_embedded = _DIR / "voices.json" voices_path = voices_download if voices_download.exists() else voices_embedded _LOGGER.debug("Loading %s", voices_path) with open(voices_path, "r", encoding="utf-8") as voices_file: return json.load(voices_file) def ensure_voice_exists( name: str, data_dirs: Iterable[Union[str, Path]], download_dir: Union[str, Path], voices_info: Dict[str, Any], ): assert data_dirs, "No data dirs" if name not in voices_info: raise VoiceNotFoundError(name) voice_info = voices_info[name] voice_files = voice_info["files"] files_to_download: Set[str] = set() for data_dir in data_dirs: data_dir = Path(data_dir) # Check sizes/hashes for file_path, file_info in voice_files.items(): if file_path in files_to_download: # Already planning to download continue file_name = Path(file_path).name if file_name in _SKIP_FILES: continue data_file_path = data_dir / file_name _LOGGER.debug("Checking %s", data_file_path) if not data_file_path.exists(): _LOGGER.debug("Missing %s", data_file_path) files_to_download.add(file_path) continue expected_size = file_info["size_bytes"] actual_size = data_file_path.stat().st_size if expected_size != actual_size: _LOGGER.warning( "Wrong size (expected=%s, actual=%s) for %s", expected_size, actual_size, data_file_path, ) files_to_download.add(file_path) continue expected_hash = file_info["md5_digest"] actual_hash = get_file_hash(data_file_path) if expected_hash != actual_hash: _LOGGER.warning( "Wrong hash (expected=%s, actual=%s) for %s", expected_hash, actual_hash, data_file_path, ) files_to_download.add(file_path) continue if (not voice_files) and (not files_to_download): raise ValueError(f"Unable to find or download voice: {name}") # Download missing files download_dir = Path(download_dir) for file_path in files_to_download: file_name = Path(file_path).name if file_name in _SKIP_FILES: continue file_url = URL_FORMAT.format(file=file_path) download_file_path = download_dir / file_name download_file_path.parent.mkdir(parents=True, exist_ok=True) _LOGGER.debug("Downloading %s to %s", file_url, download_file_path) with urlopen(file_url) as response, open( download_file_path, "wb" ) as download_file: shutil.copyfileobj(response, download_file) _LOGGER.info("Downloaded %s (%s)", download_file_path, file_url) def find_voice(name: str, data_dirs: Iterable[Union[str, Path]]) -> Tuple[Path, Path]: for data_dir in data_dirs: data_dir = Path(data_dir) onnx_path = data_dir / f"{name}.onnx" config_path = data_dir / f"{name}.onnx.json" if onnx_path.exists() and config_path.exists(): return onnx_path, config_path raise ValueError(f"Missing files for voice {name}")