-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
01-basic-deployment is an ETL pipeline. 02-advanced-deployment is an ELT pipeline. Its purpose is to handle frequently updated data sources.
- Loading branch information
Showing
15 changed files
with
318 additions
and
138 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| DB_NAME=weather_data | ||
| DB_USER=your_user | ||
| DB_PASSWORD=your_password | ||
| DB_HOST=postgres | ||
| DB_PORT=5432 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| # Base image | ||
| FROM python:3.9-slim AS base | ||
| WORKDIR /usr/src/app | ||
| COPY requirements.txt ./ | ||
| RUN pip install --no-cache-dir -r requirements.txt | ||
|
|
||
| # Extract stage | ||
| FROM base AS extract | ||
| WORKDIR /usr/src/app # Ensure the working directory is set | ||
| COPY scripts/extract.py ./ | ||
| COPY entrypoints/extract.sh ./ | ||
| RUN chmod +x extract.sh | ||
| ENTRYPOINT ["sh", "extract.sh"] | ||
|
|
||
| # Load stage | ||
| FROM base AS load | ||
| WORKDIR /usr/src/app # Ensure the working directory is set | ||
| COPY scripts/load.py ./ | ||
| COPY entrypoints/load.sh ./ | ||
| RUN chmod +x load.sh | ||
| ENTRYPOINT ["sh", "load.sh"] | ||
|
|
||
| # Transform stage | ||
| FROM base AS transform | ||
| WORKDIR /usr/src/app # Ensure the working directory is set | ||
| COPY scripts/transform.py ./ | ||
| COPY entrypoints/transform.sh ./ | ||
| RUN chmod +x transform.sh | ||
| ENTRYPOINT ["sh", "transform.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| version: '3' | ||
| services: | ||
| postgres: | ||
| image: postgres:13 | ||
| environment: | ||
| POSTGRES_USER: ${DB_USER} | ||
| POSTGRES_PASSWORD: ${DB_PASSWORD} | ||
| POSTGRES_DB: ${DB_NAME} | ||
| ports: | ||
| - "5432:5432" | ||
| volumes: | ||
| - pgdata:/var/lib/postgresql/data | ||
|
|
||
| extract: | ||
| build: | ||
| context: . | ||
| target: extract | ||
| environment: | ||
| - DB_NAME=${DB_NAME} | ||
| - DB_USER=${DB_USER} | ||
| - DB_PASSWORD=${DB_PASSWORD} | ||
| - DB_HOST=${DB_HOST} | ||
| - DB_PORT=${DB_PORT} | ||
| volumes: | ||
| - ./scripts:/usr/src/app | ||
| - shared-data:/data | ||
| depends_on: | ||
| - postgres | ||
|
|
||
| load: | ||
| build: | ||
| context: . | ||
| target: load | ||
| environment: | ||
| - DB_NAME=${DB_NAME} | ||
| - DB_USER=${DB_USER} | ||
| - DB_PASSWORD=${DB_PASSWORD} | ||
| - DB_HOST=${DB_HOST} | ||
| - DB_PORT=${DB_PORT} | ||
| volumes: | ||
| - ./scripts:/usr/src/app | ||
| - shared-data:/data | ||
| depends_on: | ||
| - extract | ||
|
|
||
| transform: | ||
| build: | ||
| context: . | ||
| target: transform | ||
| environment: | ||
| - DB_NAME=${DB_NAME} | ||
| - DB_USER=${DB_USER} | ||
| - DB_PASSWORD=${DB_PASSWORD} | ||
| - DB_HOST=${DB_HOST} | ||
| - DB_PORT=${DB_PORT} | ||
| volumes: | ||
| - ./scripts:/usr/src/app | ||
| - shared-data:/data | ||
| depends_on: | ||
| - load | ||
|
|
||
| volumes: | ||
| pgdata: | ||
| shared-data: |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| #!/bin/bash | ||
| set -e | ||
|
|
||
| # Ensure /data directory exists | ||
| mkdir -p /data | ||
|
|
||
| echo "Starting extract process..." | ||
| python /usr/src/app/extract.py | ||
|
|
||
| echo "Extract process completed." | ||
|
|
||
| # Signal successful completion | ||
| touch /data/extract_done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| #!/bin/bash | ||
| set -e | ||
|
|
||
| # Ensure /data directory exists | ||
| mkdir -p /data | ||
|
|
||
| # Wait for extract stage to complete | ||
| while [ ! -f /data/extract_done ]; do | ||
| echo "Waiting for extract stage to complete..." | ||
| sleep 5 | ||
| done | ||
|
|
||
| # Run the load script | ||
| python /usr/src/app/load.py | ||
|
|
||
| # Signal successful completion | ||
| touch /data/load_done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| #!/bin/bash | ||
| set -e | ||
|
|
||
| # Ensure /data directory exists | ||
| mkdir -p /data | ||
|
|
||
| # Wait for load stage to complete | ||
| while [ ! -f /data/load_done ]; do | ||
| echo "Waiting for load stage to complete..." | ||
| sleep 5 | ||
| done | ||
|
|
||
| # Run the transform script | ||
| python /usr/src/app/transform.py | ||
|
|
||
| # Signal successful completion | ||
| touch /data/transform_done |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| # extract.py | ||
|
|
||
| import boto3 | ||
| from botocore.config import Config | ||
| from botocore import UNSIGNED | ||
| import os | ||
|
|
||
| def extract_noaa_gsod_data(year, month, output_dir='/data'): | ||
| s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) | ||
| bucket_name = 'noaa-gsod-pds' | ||
| prefix = f'{year}/{str(month).zfill(2)}' | ||
|
|
||
| if not os.path.exists(output_dir): | ||
| os.makedirs(output_dir) | ||
|
|
||
| # List objects in the bucket for the specified month | ||
| response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix) | ||
|
|
||
| if 'Contents' not in response: | ||
| print(f"No files found for {prefix}") | ||
| return | ||
|
|
||
| for obj in response['Contents']: | ||
| key = obj['Key'] | ||
| local_path = os.path.join(output_dir, os.path.basename(key)) | ||
|
|
||
| # Download the file | ||
| s3.download_file(bucket_name, key, local_path) | ||
| print(f'Downloaded {key} to {local_path}') | ||
|
|
||
| if __name__ == '__main__': | ||
| extract_noaa_gsod_data(2020, 1) |
Oops, something went wrong.