diff --git a/slurm-gcp/basic.tfvars b/slurm-gcp/basic.tfvars index 26885b2..41e8050 100644 --- a/slurm-gcp/basic.tfvars +++ b/slurm-gcp/basic.tfvars @@ -81,9 +81,9 @@ login_disk_size_gb = 20 partitions = [ { name = "debug" - machine_type = "n1-standard-2" + machine_type = "c2-standard-4" static_node_count = 0 - max_node_count = 10 + max_node_count = 5 zone = "us-central1-c" image = "projects/schedmd-slurm-public/global/images/family/schedmd-slurm-20-11-4-hpc-centos-7" image_hyperthreads = false diff --git a/slurm-gcp/scripts/controller.sh b/slurm-gcp/scripts/controller.sh new file mode 100644 index 0000000..a8eed2d --- /dev/null +++ b/slurm-gcp/scripts/controller.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +# Run on class-controller node. +# gcloud compute ssh class-controller -- < setup.sh + +echo "=== setup.sh $(hostname) $(date)" + +# TODO: wait for slurm to configure +# MOTD displays: +# *** Slurm is currently being configured in the background. *** + +echo "+++ configure" + +CONTROLLER=$(hostname) +K8S=class-container-cluster + +export ZONE=$(gcloud compute instances list --filter="name=('$CONTROLLER')" --format='value(zone)') + +sudo yum install -y kubectl +gcloud container clusters get-credentials --zone=$ZONE $K8S + +kubectl get nodes + +echo "+++ spack" +sudo yum install -y epel-release +sudo yum install -y git python3 ca-certificates procps curl unzip jq +sudo yum install -y gcc gcc-c++ make patch file bzip2 xz + +sudo install -dv /apps/spack +sudo chown $USER /apps/spack +git clone --depth=1 https://github.com/spack/spack.git /apps/spack + + +NODES=$(sinfo -p debug -h -Onodes) +CPUS=$(sinfo -p debug -h -Ocpus) + +cat > build-spack.sh <