From 98209b4bba3bc326c4fd65f1f8cc90e06f3c57ad Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Wed, 18 Feb 2026 10:42:45 +0000 Subject: [PATCH 1/2] chore: Migrate gsutil usage to gcloud storage --- .../single-node/templates/benchmark-configmap.yaml | 2 +- src/launchers/trtllm-launcher.sh | 2 +- src/utils/data_processing/waymo_dataset/README.md | 1 - .../waymo_dataset/waymo_perception_data_processor.py | 6 +++--- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/helm-charts/a3ultra/trtllm-inference/single-node/templates/benchmark-configmap.yaml b/src/helm-charts/a3ultra/trtllm-inference/single-node/templates/benchmark-configmap.yaml index b81c3251..ce8b4ade 100644 --- a/src/helm-charts/a3ultra/trtllm-inference/single-node/templates/benchmark-configmap.yaml +++ b/src/helm-charts/a3ultra/trtllm-inference/single-node/templates/benchmark-configmap.yaml @@ -50,7 +50,7 @@ data: --kv_cache_free_gpu_mem_fraction 0.95 > $output_file cat $output_file - gsutil cp $output_file /gcs/benchmark_logs/ + gcloud storage cp $output_file /gcs/benchmark_logs/ rm -rf $engine_dir rm -f $dataset_file diff --git a/src/launchers/trtllm-launcher.sh b/src/launchers/trtllm-launcher.sh index 5e8ee091..b3805a34 100644 --- a/src/launchers/trtllm-launcher.sh +++ b/src/launchers/trtllm-launcher.sh @@ -213,7 +213,7 @@ run_benchmark() { fi cat $output_file - gsutil cp $output_file /gcs/benchmark_logs/trtllm/ + gcloud storage cp $output_file /gcs/benchmark_logs/trtllm/ rm -rf $engine_dir rm -f $dataset_file diff --git a/src/utils/data_processing/waymo_dataset/README.md b/src/utils/data_processing/waymo_dataset/README.md index 37345bc9..bcf7512b 100644 --- a/src/utils/data_processing/waymo_dataset/README.md +++ b/src/utils/data_processing/waymo_dataset/README.md @@ -111,4 +111,3 @@ print(processed_dataset[0]) - Make sure your GCP user or service account has `Storage Object Viewer` permissions on the `gs://waymo_open_dataset_v_2_0_1/` bucket. 3. **Corrupted Files**: If a specific Parquet file fails to process, it might be corrupted. The script is designed to be robust and will log an error and skip the corrupted segment, continuing with the rest of the data. - diff --git a/src/utils/data_processing/waymo_dataset/waymo_perception_data_processor.py b/src/utils/data_processing/waymo_dataset/waymo_perception_data_processor.py index 6837dc5c..97a01767 100644 --- a/src/utils/data_processing/waymo_dataset/waymo_perception_data_processor.py +++ b/src/utils/data_processing/waymo_dataset/waymo_perception_data_processor.py @@ -153,7 +153,7 @@ def _download_dataset_locally(input_dir: str): # If PARQUET_ID is empty, download all parquets in the directory source_for_gsutil = os.path.join(remote_path_item, "*.parquet") - gsutil_command = ["gsutil", "-m", "cp", "-r", source_for_gsutil, local_path_dir] + gsutil_command = ["gcloud", "storage", "cp", "--recursive", source_for_gsutil, local_path_dir] logger.info( f"[DATALOADER] Downloading dataset. Command: {' '.join(gsutil_command)}" @@ -164,9 +164,9 @@ def _download_dataset_locally(input_dir: str): ) logger.info(f"[DATALOADER] Successfully downloaded to {local_path_dir}.") if result.stdout: - logger.info(f"[DATALOADER] gsutil stdout: {result.stdout}") + logger.info(f"[DATALOADER] gcloud stdout: {result.stdout}") if result.stderr: # gsutil often prints status to stderr even on success - logger.info(f"[DATALOADER] gsutil stderr: {result.stderr}") + logger.info(f"[DATALOADER] gcloud stderr: {result.stderr}") except subprocess.CalledProcessError as e: logger.error( f"[Fatal][DATALOADER] Failed to download from {source_for_gsutil} to {local_path_dir}. " From 7b8189a12a7062c2f9c43156e758d5d2f779a112 Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Wed, 18 Feb 2026 10:55:20 +0000 Subject: [PATCH 2/2] chore: update --- src/utils/data_processing/waymo_dataset/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils/data_processing/waymo_dataset/README.md b/src/utils/data_processing/waymo_dataset/README.md index bcf7512b..8834355f 100644 --- a/src/utils/data_processing/waymo_dataset/README.md +++ b/src/utils/data_processing/waymo_dataset/README.md @@ -26,7 +26,7 @@ Before running the script, ensure you have the following prerequisites installed #### Google Cloud SDK -The `gsutil` command-line tool is required to download the dataset from Google Cloud Storage. +The `gcloud storage` command-line tool is required to download the dataset from Google Cloud Storage. 1. Install the Google Cloud SDK. 2. Authenticate with Google Cloud: @@ -103,7 +103,7 @@ print(processed_dataset[0]) ### 5. Common Issues -1. **`gsutil` Command Not Found**: This error occurs if the Google Cloud SDK is not installed or not in your system's `PATH`. Please follow the installation instructions in the Prerequisites section. +1. **`gcloud storage` Command Not Found**: This error occurs if the Google Cloud SDK is not installed or not in your system's `PATH`. Please follow the installation instructions in the Prerequisites section. 2. **GCS Access Denied / 401 Errors**: This indicates an authentication or permission issue. - Ensure you have registered for the Waymo dataset.