Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 27 additions & 4 deletions process.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,22 +63,42 @@ def resample_audios(load_folder: Path, save_folder: Path, logging, resample_rat
for idx, kwarg in enumerate(kwarg_list):
pool_func(**kwarg)

def download_audio(save_folder: Path, file_name: Path, link: str):
def download_audio(save_folder: Path, file_name: Path, link: str, use_alt_links: bool = False):
"""
download one audio file.
Inputs:
save_folder: folder to save audio files
file_path: location to save the audo file
link: link to download the audio file
use_alt_links: Whether to use alternative download links for bigsoundbank files
"""
audio_save_path = os.path.join(save_folder,file_name)
base_name = os.path.splitext(file_name)[0]

try:
wget.download(link, audio_save_path)
if use_alt_links and link and 'bigsoundbank.com' in link:
aiff_link = link.replace('/UPLOAD/wav/', '/UPLOAD/aiff/').replace('.wav', '.aiff')
tmp_path = os.path.join(save_folder, f"{base_name}.aiff")
wget.download(aiff_link, tmp_path)
logging.info(f'Downloaded {aiff_link} -> {tmp_path}')
try:
data, sr = sf.read(tmp_path)
sf.write(audio_save_path, data, sr, 'PCM_24')
os.remove(tmp_path)
logging.info(f'Converted {tmp_path} -> {audio_save_path}')
return
except Exception as conv_e:
logging.info(f'Failed to convert {tmp_path} to WAV: {conv_e}')

else:
wget.download(link, audio_save_path)
logging.info(f'Downloaded {link} -> {audio_save_path}')
return

except Exception as e:
logging.info(f'File {link} could not be downloaded because of error {e}')

def download_audios(csv_path: Path, save_folder: Path, logging, processes):
def download_audios(csv_path: Path, save_folder: Path, logging, processes, use_alt_links: bool = False):
"""
Download all audio files listed in fname.csv
Inputs:
Expand All @@ -87,6 +107,7 @@ def download_audios(csv_path: Path, save_folder: Path, logging, processes):
logging: Logging module containing information about the progress of the code
processes: Number of processes downloading audio content at
the same time
use_alt_links: Whether to use alternative download links for bigsoundbank files
"""
fname = pd.read_csv(csv_path)
download_links = list(fname['download_link'])
Expand All @@ -98,6 +119,7 @@ def download_audios(csv_path: Path, save_folder: Path, logging, processes):
"save_folder": save_folder,
"file_name": file_names[i],
"link": link,
"use_alt_links": use_alt_links,
})

pool_func = download_audio
Expand All @@ -115,6 +137,7 @@ def main():
parser.add_argument("--save_folder_path", type=Path, required=True)
parser.add_argument("--resample_rate", type=int, default=44100, choices=[44100,16000,22050,32000,44100,44800])
parser.add_argument("--processes", type=int, default=1)
parser.add_argument("--use_alt_links", action='store_true')

args = parser.parse_args()

Expand All @@ -133,7 +156,7 @@ def main():

# Download files
logging.info('Starting to download files')
download_audios(args.csv_path, download_folder_path, logging, args.processes)
download_audios(args.csv_path, download_folder_path, logging, args.processes, args.use_alt_links)

# Resample files
logging.info('Starting to resample files')
Expand Down