diff --git a/gcp-deployment/gcp-cleanup-guide.md b/gcp-deployment/gcp-cleanup-guide.md new file mode 100644 index 0000000..a48dd4b --- /dev/null +++ b/gcp-deployment/gcp-cleanup-guide.md @@ -0,0 +1,55 @@ +# GCP and Kubernetes Resource Cleanup Guide + +## 1. Delete Kubernetes Resources + +```bash +kubectl delete cronjob data-pipeline-sequence +kubectl delete job,deployment,service --all +kubectl delete secret db-credentials +``` + +## 2. Delete GKE Cluster + +```bash +gcloud container clusters delete weather-cluster --zone=us-central1-a +``` + +## 3. Delete Container Images + +```bash +# List images +gcloud container images list + +# Delete images (repeat for each image) +gcloud container images list-tags gcr.io/YOUR_PROJECT_ID/IMAGE_NAME --format='get(digest)' | xargs -I {} gcloud container images delete gcr.io/YOUR_PROJECT_ID/IMAGE_NAME@{} --force-delete-tags --quiet +``` + +## 4. Clean Up Other Resources + +```bash +# Delete disks +gcloud compute disks delete DISK_NAME --zone=ZONE + +# Delete firewall rules +gcloud compute firewall-rules delete RULE_NAME + +# Delete load balancers +gcloud compute forwarding-rules delete RULE_NAME --global + +# Delete static IPs +gcloud compute addresses delete ADDRESS_NAME --region=REGION + +# Delete storage buckets +gsutil rm -r gs://BUCKET_NAME + +# Delete service accounts +gcloud iam service-accounts delete SERVICE_ACCOUNT_EMAIL +``` + +## 5. Delete Project (Optional, Use with Caution) + +```bash +gcloud projects delete YOUR_PROJECT_ID +``` + +Replace placeholders with actual resource names. Double-check the Google Cloud Console to ensure all resources are removed. diff --git a/gcp-deployment/k8s-artifacts/data-pipeline-job.yaml b/gcp-deployment/k8s-artifacts/data-pipeline-job.yaml new file mode 100644 index 0000000..eefcdae --- /dev/null +++ b/gcp-deployment/k8s-artifacts/data-pipeline-job.yaml @@ -0,0 +1,40 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: data-pipeline-sequence +spec: + schedule: "0 0 31 2 *" # This schedule will effectively never run automatically + jobTemplate: + spec: + template: + spec: + restartPolicy: OnFailure + volumes: + - name: shared-data + emptyDir: {} + initContainers: + - name: extract + image: gcr.io/${PROJECT_ID}/data-pipeline-extract:latest + envFrom: + - secretRef: + name: db-credentials + volumeMounts: + - name: shared-data + mountPath: /data + - name: load + image: gcr.io/${PROJECT_ID}/data-pipeline-load:latest + envFrom: + - secretRef: + name: db-credentials + volumeMounts: + - name: shared-data + mountPath: /data + containers: + - name: transform + image: gcr.io/${PROJECT_ID}/data-pipeline-transform:latest + envFrom: + - secretRef: + name: db-credentials + volumeMounts: + - name: shared-data + mountPath: /data \ No newline at end of file diff --git a/gcp-deployment/k8s-artifacts/flask-app-deployment.yaml b/gcp-deployment/k8s-artifacts/flask-app-deployment.yaml new file mode 100644 index 0000000..70a1bb0 --- /dev/null +++ b/gcp-deployment/k8s-artifacts/flask-app-deployment.yaml @@ -0,0 +1,22 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: flask-app +spec: + replicas: 2 + selector: + matchLabels: + app: flask-app + template: + metadata: + labels: + app: flask-app + spec: + containers: + - name: flask-app + image: gcr.io/${PROJECT_ID}/flask-app:latest + envFrom: + - secretRef: + name: db-credentials + ports: + - containerPort: 5000 \ No newline at end of file diff --git a/gcp-deployment/k8s-artifacts/flask-app-service.yaml b/gcp-deployment/k8s-artifacts/flask-app-service.yaml new file mode 100644 index 0000000..c77660e --- /dev/null +++ b/gcp-deployment/k8s-artifacts/flask-app-service.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: flask-app +spec: + type: LoadBalancer # Uses LoadBalancer for public access + selector: + app: flask-app + ports: + - protocol: TCP + port: 80 + targetPort: 5000 \ No newline at end of file diff --git a/gcp-deployment/k8s-artifacts/postgres-deployment.yaml b/gcp-deployment/k8s-artifacts/postgres-deployment.yaml new file mode 100644 index 0000000..042c39b --- /dev/null +++ b/gcp-deployment/k8s-artifacts/postgres-deployment.yaml @@ -0,0 +1,45 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: postgres +spec: + replicas: 1 + selector: + matchLabels: + app: postgres + template: + metadata: + labels: + app: postgres + spec: + containers: + - name: postgres + image: postgres:13 + env: + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: db-credentials + key: DB_PASSWORD + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + name: db-credentials + key: DB_USER + - name: POSTGRES_DB + valueFrom: + secretKeyRef: + name: db-credentials + key: DB_NAME + - name: DB_HOST + valueFrom: + secretKeyRef: + name: db-credentials + key: DB_HOST + - name: DB_PORT + valueFrom: + secretKeyRef: + name: db-credentials + key: DB_PORT + ports: + - containerPort: 5432 \ No newline at end of file diff --git a/gcp-deployment/k8s-artifacts/postgres-service.yaml b/gcp-deployment/k8s-artifacts/postgres-service.yaml new file mode 100644 index 0000000..e8e2648 --- /dev/null +++ b/gcp-deployment/k8s-artifacts/postgres-service.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: postgres +spec: + type: ClusterIP # Uses ClusterIP instead of LoadBalancer because we don't want to expose the database to the public internet + selector: + app: postgres + ports: + - protocol: TCP + port: 5432 + targetPort: 5432 \ No newline at end of file diff --git a/gcp-deployment/weather-data-pipeline-deployment-guide.md b/gcp-deployment/weather-data-pipeline-deployment-guide.md new file mode 100644 index 0000000..a95c8cc --- /dev/null +++ b/gcp-deployment/weather-data-pipeline-deployment-guide.md @@ -0,0 +1,164 @@ +# Weather Data Pipeline Deployment Guide + +## Prerequisites +- Google Cloud SDK installed +- kubectl installed +- Docker installed + +## Setup + +1. List and set your GCP project: + ```bash + gcloud projects list + gcloud config set project ${PROJECT_ID} + export PROJECT_ID=$(gcloud config get-value project) + ``` + +2. Create a GKE cluster: + ```bash + gcloud container clusters create weather-cluster --num-nodes=2 --zone=us-central1-a + ``` + +3. Get credentials for the cluster: + ```bash + gcloud container clusters get-credentials weather-cluster --zone=us-central1-a + ``` + +4. Create Kubernetes secret for database credentials: + ```bash + kubectl create secret generic db-credentials \ + --from-literal=DB_NAME=your_db_name \ + --from-literal=DB_USER=your_db_user \ + --from-literal=DB_PASSWORD=your_db_password \ + --from-literal=DB_HOST=postgres \ + --from-literal=DB_PORT=5432 + ``` + +## Build and Push Docker Images + +Navigate to the project root directory: + +```bash +cd ../gcp-deployment/k8s-artifacts-v2 +``` + +Build and push data pipeline images: + +```bash +# Navigate to the data-pipeline directory +cd data-pipeline + +# Build and push data pipeline images +docker build --target extract -t gcr.io/${PROJECT_ID}/data-pipeline-extract:latest . +docker build --target load -t gcr.io/${PROJECT_ID}/data-pipeline-load:latest . +docker build --target transform -t gcr.io/${PROJECT_ID}/data-pipeline-transform:latest . + +docker push gcr.io/${PROJECT_ID}/data-pipeline-extract:latest +docker push gcr.io/${PROJECT_ID}/data-pipeline-load:latest +docker push gcr.io/${PROJECT_ID}/data-pipeline-transform:latest + +# Navigate to the flask-app directory +cd ../flask-app + +# Build and push Flask app image +docker build -t gcr.io/${PROJECT_ID}/flask-app:latest . +docker push gcr.io/${PROJECT_ID}/flask-app:latest +``` + +## Deploy to Kubernetes + +Deploy PostgreSQL: + +```bash +envsubst < postgres-deployment.yaml | kubectl apply -f - +envsubst < postgres-service.yaml | kubectl apply -f - + +# Wait for PostgreSQL to be ready +kubectl wait --for=condition=ready pod -l app=postgres --timeout=300s +``` + +Deploy data pipeline job: + +```bash +envsubst < data-pipeline-job.yaml | kubectl apply -f - +kubectl create job --from=cronjob/data-pipeline-sequence data-pipeline-manual-trigger-new + +# Wait for data pipeline job to complete +kubectl wait --for=condition=complete job/data-pipeline --timeout=600s +``` + +Deploy Flask app: + +```bash +envsubst < flask-app-deployment.yaml | kubectl apply -f - +envsubst < flask-app-service.yaml | kubectl apply -f - +``` + +## Useful Commands for Monitoring and Debugging + +1. List all pods: + ```bash + kubectl get pods + ``` + +2. List all jobs: + ```bash + kubectl get jobs + ``` + +3. Delete a specific job: + ```bash + kubectl delete job data-pipeline-manual-trigger + ``` + +4. Delete a cronjob: + ```bash + kubectl delete cronjob data-pipeline-sequence + ``` + +5. View logs for a specific container in a pod: + ```bash + kubectl logs -c + ``` + +6. List init container names for a pod: + ```bash + kubectl get pod -o jsonpath='{.spec.initContainers[*].name}' + ``` + +7. View logs continuously: + ```bash + kubectl logs -f -c + ``` + +8. View logs for all containers in a pod: + ```bash + kubectl logs --all-containers=true + ``` + +9. Describe a pod (lists all containers and their statuses): + ```bash + kubectl describe pod + ``` + +10. Test environment variable substitution: + ```bash + envsubst < data-pipeline-job.yaml + ``` + +11. Port forward to access services locally: + ```bash + kubectl port-forward service/flask-app 8080:80 + ``` + +12. Scale a deployment: + ```bash + kubectl scale deployment flask-app --replicas=3 + ``` + +13. View cluster events: + ```bash + kubectl get events --sort-by=.metadata.creationTimestamp + ``` + +Remember to replace placeholders like `` and `` with actual values from your deployment.