From 41e7f10d22daf9af1a6294b0a92970fbf182ef86 Mon Sep 17 00:00:00 2001 From: bhandarivijay Date: Wed, 4 Mar 2026 11:16:15 +0000 Subject: [PATCH] chore: Migrate gsutil usage to gcloud storage --- benchmarks/benchmarkUtil.py | 2 +- benchmarks/trigger_bigbench_benchmark.py | 2 +- benchmarks/trigger_hibench_benchmark.py | 2 +- gcloud/bin/audit-dpgce-create | 4 ++-- gcloud/bin/audit-dpgce-destroy | 4 ++-- gcloud/bin/audit-private-create | 4 ++-- gcloud/bin/audit-private-destroy | 4 ++-- gcloud/bin/recreate-dpgce | 7 +++---- gcloud/bin/recreate-dpgke | 3 +-- gcloud/lib/dataproc/jobs.sh | 2 +- gcloud/lib/gcp/gcs.sh | 20 ++++++++++---------- gcloud/lib/shared-functions.sh | 10 +++++----- spark-tensorflow/gcloud-tests/test-tf-tsv.sh | 2 +- spark-translate/README.md | 6 +++--- 14 files changed, 35 insertions(+), 37 deletions(-) diff --git a/benchmarks/benchmarkUtil.py b/benchmarks/benchmarkUtil.py index d7bfcf3f..1c259ca5 100644 --- a/benchmarks/benchmarkUtil.py +++ b/benchmarks/benchmarkUtil.py @@ -57,7 +57,7 @@ def upload_config_to_gs(self, scenario): experiment_name, scenario.name, self.clusterName) - command = "gsutil cp {} {} ".format(scenario.config_file_name, + command = "gcloud storage cp {} {} ".format(scenario.config_file_name, scenario_destination_bucket_path) execute_shell(command) diff --git a/benchmarks/trigger_bigbench_benchmark.py b/benchmarks/trigger_bigbench_benchmark.py index e106f2b6..6bbaada9 100644 --- a/benchmarks/trigger_bigbench_benchmark.py +++ b/benchmarks/trigger_bigbench_benchmark.py @@ -48,7 +48,7 @@ def upload_results(): for file in os.listdir('/Big-Data-Benchmark-for-Big-Bench/logs/'): if file.__contains__(".csv") or file.__contains__(".zip"): output_path = "{}/{}/{}/".format(sys.argv[1], sys.argv[2], cluster_name) - command = "gsutil cp /Big-Data-Benchmark-for-Big-Bench/logs/{} {}{}" \ + command = "gcloud storage cp /Big-Data-Benchmark-for-Big-Bench/logs/{} {}{}" \ .format(file, output_path, file) execute_shell(command) diff --git a/benchmarks/trigger_hibench_benchmark.py b/benchmarks/trigger_hibench_benchmark.py index 132e7db1..5dba58ab 100644 --- a/benchmarks/trigger_hibench_benchmark.py +++ b/benchmarks/trigger_hibench_benchmark.py @@ -47,7 +47,7 @@ def upload_results(): attributes/dataproc-cluster-name") \ .read() output_path = "{}/{}/{}/hibench.report".format(sys.argv[1], sys.argv[2], cluster_name) - cmd = "gsutil cp /HiBench/report/hibench.report {}".format(output_path) + cmd = "gcloud storage cp /HiBench/report/hibench.report {}".format(output_path) execute_shell(cmd) diff --git a/gcloud/bin/audit-dpgce-create b/gcloud/bin/audit-dpgce-create index 72a12e6a..ba049144 100644 --- a/gcloud/bin/audit-dpgce-create +++ b/gcloud/bin/audit-dpgce-create @@ -108,8 +108,8 @@ check_exists "Main Subnet ${SUBNET}" "gcloud compute networks subnets describe ' check_exists "Service Account ${GSA}" "gcloud iam service-accounts describe '${GSA}' --project='${PROJECT_ID}'" # 4. GCS Buckets -check_exists "GCS Staging Bucket gs://${BUCKET}" "gsutil ls -b 'gs://${BUCKET}'" -check_exists "GCS Temp Bucket gs://${TEMP_BUCKET}" "gsutil ls -b 'gs://${TEMP_BUCKET}'" +check_exists "GCS Staging Bucket gs://${BUCKET}" "gcloud storage ls --buckets 'gs://${BUCKET}'" +check_exists "GCS Temp Bucket gs://${TEMP_BUCKET}" "gcloud storage ls --buckets 'gs://${TEMP_BUCKET}'" # 5. Cloud Router check_exists "Cloud Router ${ROUTER_NAME}" "gcloud compute routers describe '${ROUTER_NAME}' --region='${REGION}' --project='${PROJECT_ID}'" diff --git a/gcloud/bin/audit-dpgce-destroy b/gcloud/bin/audit-dpgce-destroy index bf08ff25..5943bc78 100644 --- a/gcloud/bin/audit-dpgce-destroy +++ b/gcloud/bin/audit-dpgce-destroy @@ -63,8 +63,8 @@ check_resource "Main Subnet" \ check_resource_exact "VPC Network ${NETWORK}" "gcloud compute networks describe \"${NETWORK}\" --project=\"${PROJECT_ID}\"" # 8. GCS Buckets (Optional without --force) -check_resource_exact "GCS Staging Bucket gs://${BUCKET}" "gsutil ls -b 'gs://${BUCKET}'" true -check_resource_exact "GCS Temp Bucket gs://${TEMP_BUCKET}" "gsutil ls -b 'gs://${TEMP_BUCKET}'" true +check_resource_exact "GCS Staging Bucket gs://${BUCKET}" "gcloud storage ls --buckets 'gs://${BUCKET}'" true +check_resource_exact "GCS Temp Bucket gs://${TEMP_BUCKET}" "gcloud storage ls --buckets 'gs://${TEMP_BUCKET}'" true echo -e "\nAudit complete." echo -e "[${YELLOW}Pass*${NC}] indicates the resource was not found (which is expected after destroy)." diff --git a/gcloud/bin/audit-private-create b/gcloud/bin/audit-private-create index 77284a05..7a47de07 100644 --- a/gcloud/bin/audit-private-create +++ b/gcloud/bin/audit-private-create @@ -38,8 +38,8 @@ check_exists "SWP Subnet ${SWP_SUBNET}" "gcloud compute networks subnets describ check_exists "Service Account ${GSA}" "gcloud iam service-accounts describe '${GSA}' --project='${PROJECT_ID}'" # 4. GCS Buckets -check_exists "GCS Staging Bucket gs://${BUCKET}" "gsutil ls -b 'gs://${BUCKET}'" -check_exists "GCS Temp Bucket gs://${TEMP_BUCKET}" "gsutil ls -b 'gs://${TEMP_BUCKET}'" +check_exists "GCS Staging Bucket gs://${BUCKET}" "gcloud storage ls --buckets 'gs://${BUCKET}'" +check_exists "GCS Temp Bucket gs://${TEMP_BUCKET}" "gcloud storage ls --buckets 'gs://${TEMP_BUCKET}'" # 5. SWP Certificate Components SUFFIX=${RESOURCE_SUFFIX} diff --git a/gcloud/bin/audit-private-destroy b/gcloud/bin/audit-private-destroy index ef2a09d7..1bd974e9 100644 --- a/gcloud/bin/audit-private-destroy +++ b/gcloud/bin/audit-private-destroy @@ -131,8 +131,8 @@ check_resource "Main Subnet" \ check_resource_exact "VPC Network ${NETWORK}" "gcloud compute networks describe \"${NETWORK}\" --project=\"${PROJECT_ID}\"" # 14. GCS Buckets (Optional without --force) -check_resource_exact "GCS Staging Bucket gs://${BUCKET}" "gsutil ls -b 'gs://${BUCKET}'" true -check_resource_exact "GCS Temp Bucket gs://${TEMP_BUCKET}" "gsutil ls -b 'gs://${TEMP_BUCKET}'" true +check_resource_exact "GCS Staging Bucket gs://${BUCKET}" "gcloud storage ls --buckets 'gs://${BUCKET}'" true +check_resource_exact "GCS Temp Bucket gs://${TEMP_BUCKET}" "gcloud storage ls --buckets 'gs://${TEMP_BUCKET}'" true echo -e "\nAudit complete." echo -e "[${YELLOW}Pass*${NC}] indicates the resource was not found (which is expected after destroy)." diff --git a/gcloud/bin/recreate-dpgce b/gcloud/bin/recreate-dpgce index afd1c6b0..d32818cf 100755 --- a/gcloud/bin/recreate-dpgce +++ b/gcloud/bin/recreate-dpgce @@ -21,9 +21,9 @@ source lib/gcp/iam.sh # Copy latest initialization action scripts #echo -n "copying actions to gcs bucket..." -#gsutil -m cp \ -# -L action-update.log \ -# -r init/* gs://${BUCKET}/dataproc-initialization-actions +#gcloud storage cp \ +# --manifest-path action-update.log \ +# --recursive init/* gs://${BUCKET}/dataproc-initialization-actions #if [[ $? == 0 ]]; then # echo "done" #else @@ -39,4 +39,3 @@ create_dpgce_cluster echo "========================================" echo "General Purpose DPGCE Cluster re-created" echo "========================================" - diff --git a/gcloud/bin/recreate-dpgke b/gcloud/bin/recreate-dpgke index 8756ae90..1c410221 100755 --- a/gcloud/bin/recreate-dpgke +++ b/gcloud/bin/recreate-dpgke @@ -19,7 +19,7 @@ source lib/env.sh source lib/gke.sh # Assuming gke functions are here # Copy latest initialization action scripts -gsutil -m cp -r init/* gs://${BUCKET}/dataproc-initialization-actions +gcloud storage cp --recursive init/* gs://${BUCKET}/dataproc-initialization-actions # re-create normal dataproc cluster delete_gke_cluster @@ -28,4 +28,3 @@ create_gke_cluster echo "========================" echo "DPGKE Cluster re-created" echo "========================" - diff --git a/gcloud/lib/dataproc/jobs.sh b/gcloud/lib/dataproc/jobs.sh index 0b1a1a00..e4001661 100644 --- a/gcloud/lib/dataproc/jobs.sh +++ b/gcloud/lib/dataproc/jobs.sh @@ -29,7 +29,7 @@ function diagnose () { DIAG_URL=$(cat "${REPRO_TMPDIR}/${log_file}" | perl -ne 'print if m{^gs://.*/diagnostic.tar.gz\s*$}') if [[ -n "${DIAG_URL}" ]]; then print_status " Downloading ${DIAG_URL}..." - if run_gcloud "download_diagnose.log" gsutil cp -q "${DIAG_URL}" "${REPRO_TMPDIR}/"; then + if run_gcloud "download_diagnose.log" gcloud storage cp "${DIAG_URL}" "${REPRO_TMPDIR}/"; then report_result "Pass" local diag_file="${REPRO_TMPDIR}/$(basename ${DIAG_URL})" print_status " Running drproc on ${diag_file}..." diff --git a/gcloud/lib/gcp/gcs.sh b/gcloud/lib/gcp/gcs.sh index edf24a4c..97effc2e 100644 --- a/gcloud/lib/gcp/gcs.sh +++ b/gcloud/lib/gcp/gcs.sh @@ -12,8 +12,8 @@ function create_bucket () { print_status "Creating GCS Staging Bucket gs://${BUCKET}..." local log_file="create_bucket_${BUCKET}.log" - if ! gsutil ls -b "gs://${BUCKET}" > /dev/null 2>&1 ; then - if run_gcloud "${log_file}" gsutil mb -l ${REGION} gs://${BUCKET}; then + if ! gcloud storage ls --buckets "gs://${BUCKET}" > /dev/null 2>&1 ; then + if run_gcloud "${log_file}" gcloud storage buckets create --location ${REGION} gs://${BUCKET}; then report_result "Created" else report_result "Fail" @@ -24,7 +24,7 @@ function create_bucket () { fi # Grant SA permissions on BUCKET print_status " Granting Storage Admin on gs://${BUCKET}..." - if run_gcloud "${log_file}" gsutil iam ch "serviceAccount:${GSA}:roles/storage.admin" "gs://${BUCKET}"; then + if run_gcloud "${log_file}" gcloud storage buckets add-iam-policy-binding "gs://${BUCKET}" --member="serviceAccount:${GSA}" --role="roles/storage.admin"; then report_result "Pass" else report_result "Fail" @@ -32,8 +32,8 @@ function create_bucket () { print_status "Creating GCS Temp Bucket gs://${TEMP_BUCKET}..." local temp_log_file="create_bucket_${TEMP_BUCKET}.log" - if ! gsutil ls -b "gs://${TEMP_BUCKET}" > /dev/null 2>&1 ; then - if run_gcloud "${temp_log_file}" gsutil mb -l ${REGION} gs://${TEMP_BUCKET}; then + if ! gcloud storage ls --buckets "gs://${TEMP_BUCKET}" > /dev/null 2>&1 ; then + if run_gcloud "${temp_log_file}" gcloud storage buckets create --location ${REGION} gs://${TEMP_BUCKET}; then report_result "Created" else report_result "Fail" @@ -44,7 +44,7 @@ function create_bucket () { fi # Grant SA permissions on TEMP_BUCKET print_status " Granting Storage Admin on gs://${TEMP_BUCKET}..." - if run_gcloud "${temp_log_file}" gsutil iam ch "serviceAccount:${GSA}:roles/storage.admin" "gs://${TEMP_BUCKET}"; then + if run_gcloud "${temp_log_file}" gcloud storage buckets add-iam-policy-binding "gs://${TEMP_BUCKET}" --member="serviceAccount:${GSA}" --role="roles/storage.admin"; then report_result "Pass" else report_result "Fail" @@ -54,7 +54,7 @@ function create_bucket () { if [[ -d init ]] ; then print_status "Copying init scripts to ${INIT_ACTIONS_ROOT}..." local cp_log="copy_init_scripts.log" - if run_gcloud "${cp_log}" gsutil -m cp -r "init/*" "${INIT_ACTIONS_ROOT}/"; then + if run_gcloud "${cp_log}" gcloud storage cp --recursive "init/*" "${INIT_ACTIONS_ROOT}/"; then report_result "Pass" else report_result "Fail" @@ -66,8 +66,8 @@ function create_bucket () { function delete_bucket () { print_status "Deleting GCS Bucket gs://${BUCKET}..." local log_file="delete_bucket_${BUCKET}.log" - if gsutil ls -b "gs://${BUCKET}" > /dev/null 2>&1; then - if run_gcloud "${log_file}" gsutil -m rm -r "gs://${BUCKET}"; then + if gcloud storage ls --buckets "gs://${BUCKET}" > /dev/null 2>&1; then + if run_gcloud "${log_file}" gcloud storage rm --recursive "gs://${BUCKET}"; then report_result "Deleted" remove_sentinel "create_bucket" "done" else @@ -76,5 +76,5 @@ function delete_bucket () { else report_result "Not Found" fi - # gsutil -m rm -r "gs://${TEMP_BUCKET}" > /dev/null 2>&1 || true # huge cache here, not so great to lose it + # gcloud storage rm --recursive "gs://${TEMP_BUCKET}" > /dev/null 2>&1 || true # huge cache here, not so great to lose it } diff --git a/gcloud/lib/shared-functions.sh b/gcloud/lib/shared-functions.sh index 6e72a5f0..32599fc5 100644 --- a/gcloud/lib/shared-functions.sh +++ b/gcloud/lib/shared-functions.sh @@ -1029,12 +1029,12 @@ source lib/database-functions.sh source lib/net-functions.sh function create_bucket () { - if gsutil ls -b "gs://${BUCKET}" ; then + if gcloud storage ls --buckets "gs://${BUCKET}" ; then echo "bucket already exists, skipping creation." return fi set -x - gsutil mb -l ${REGION} gs://${BUCKET} + gcloud storage buckets create --location=${REGION} gs://${BUCKET} set +x echo "===================" @@ -1045,7 +1045,7 @@ function create_bucket () { if [ -d init ] then set -x - gsutil -m cp -r init/* gs://${BUCKET}/dataproc-initialization-actions + gcloud storage cp --recursive init/* gs://${BUCKET}/dataproc-initialization-actions set +x fi @@ -1057,7 +1057,7 @@ function create_bucket () { function delete_bucket () { set -x - gsutil -m rm -r gs://${BUCKET} + gcloud storage rm --recursive gs://${BUCKET} set +x echo "bucket removed" @@ -1144,7 +1144,7 @@ function diagnose { DIAG_URL=$(echo $DIAG_OUT | perl -ne 'print if m{^gs://.*/diagnostic.tar.gz\s*$}') mkdir -p tmp - gsutil cp -q ${DIAG_URL} tmp/ + gcloud storage cp ${DIAG_URL} tmp/ if [[ ! -f venv/${CLUSTER_NAME}/pyvenv.cfg ]]; then mkdir -p venv/ diff --git a/spark-tensorflow/gcloud-tests/test-tf-tsv.sh b/spark-tensorflow/gcloud-tests/test-tf-tsv.sh index a67359fa..d2258509 100755 --- a/spark-tensorflow/gcloud-tests/test-tf-tsv.sh +++ b/spark-tensorflow/gcloud-tests/test-tf-tsv.sh @@ -78,7 +78,7 @@ fi MODEL_NAME=test_model MODEL_VERSION=v$TEST_TIME -ORIGIN=$(gsutil ls "$JOB_DIR/**/saved_model.pb" | sed 's/\(.\)saved_model.pb/\1/g') +ORIGIN=$(gcloud storage ls "$JOB_DIR/**/saved_model.pb" | sed 's/\(.\)saved_model.pb/\1/g') echo "Training succeeded. Creating model from saved model at $ORIGIN ..." diff --git a/spark-translate/README.md b/spark-translate/README.md index 7916e29c..1dce9646 100644 --- a/spark-translate/README.md +++ b/spark-translate/README.md @@ -37,12 +37,12 @@ Google's Translation API and running on Cloud Dataproc. 4. Create a bucket: ``` - gsutil mb gs://$PROJECT-bucket + gcloud storage buckets create gs://$PROJECT-bucket ``` 5. Upload `words.txt` to the bucket: ``` - gsutil cp ../words.txt gs://$PROJECT-bucket + gcloud storage cp ../words.txt gs://$PROJECT-bucket ``` The file `words.txt` contains the following: ``` @@ -69,7 +69,7 @@ Google's Translation API and running on Cloud Dataproc. 8. Verify that the words have been translated: ``` - gsutil cat gs://$PROJECT-bucket/translated-fr/part-* + gcloud storage cat gs://$PROJECT-bucket/translated-fr/part-* ``` The output is: ```