diff --git a/process.py b/process.py index a629d5b..eaf0348 100644 --- a/process.py +++ b/process.py @@ -63,22 +63,42 @@ def resample_audios(load_folder: Path, save_folder: Path, logging, resample_rat for idx, kwarg in enumerate(kwarg_list): pool_func(**kwarg) -def download_audio(save_folder: Path, file_name: Path, link: str): +def download_audio(save_folder: Path, file_name: Path, link: str, use_alt_links: bool = False): """ download one audio file. Inputs: save_folder: folder to save audio files file_path: location to save the audo file link: link to download the audio file + use_alt_links: Whether to use alternative download links for bigsoundbank files """ audio_save_path = os.path.join(save_folder,file_name) + base_name = os.path.splitext(file_name)[0] try: - wget.download(link, audio_save_path) + if use_alt_links and link and 'bigsoundbank.com' in link: + aiff_link = link.replace('/UPLOAD/wav/', '/UPLOAD/aiff/').replace('.wav', '.aiff') + tmp_path = os.path.join(save_folder, f"{base_name}.aiff") + wget.download(aiff_link, tmp_path) + logging.info(f'Downloaded {aiff_link} -> {tmp_path}') + try: + data, sr = sf.read(tmp_path) + sf.write(audio_save_path, data, sr, 'PCM_24') + os.remove(tmp_path) + logging.info(f'Converted {tmp_path} -> {audio_save_path}') + return + except Exception as conv_e: + logging.info(f'Failed to convert {tmp_path} to WAV: {conv_e}') + + else: + wget.download(link, audio_save_path) + logging.info(f'Downloaded {link} -> {audio_save_path}') + return + except Exception as e: logging.info(f'File {link} could not be downloaded because of error {e}') -def download_audios(csv_path: Path, save_folder: Path, logging, processes): +def download_audios(csv_path: Path, save_folder: Path, logging, processes, use_alt_links: bool = False): """ Download all audio files listed in fname.csv Inputs: @@ -87,6 +107,7 @@ def download_audios(csv_path: Path, save_folder: Path, logging, processes): logging: Logging module containing information about the progress of the code processes: Number of processes downloading audio content at the same time + use_alt_links: Whether to use alternative download links for bigsoundbank files """ fname = pd.read_csv(csv_path) download_links = list(fname['download_link']) @@ -98,6 +119,7 @@ def download_audios(csv_path: Path, save_folder: Path, logging, processes): "save_folder": save_folder, "file_name": file_names[i], "link": link, + "use_alt_links": use_alt_links, }) pool_func = download_audio @@ -115,6 +137,7 @@ def main(): parser.add_argument("--save_folder_path", type=Path, required=True) parser.add_argument("--resample_rate", type=int, default=44100, choices=[44100,16000,22050,32000,44100,44800]) parser.add_argument("--processes", type=int, default=1) + parser.add_argument("--use_alt_links", action='store_true') args = parser.parse_args() @@ -133,7 +156,7 @@ def main(): # Download files logging.info('Starting to download files') - download_audios(args.csv_path, download_folder_path, logging, args.processes) + download_audios(args.csv_path, download_folder_path, logging, args.processes, args.use_alt_links) # Resample files logging.info('Starting to resample files')