Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
First version of advanced deployment
Showing
6 changed files
with
148 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,10 @@ | ||
# ignore the data files | ||
# ignore the data files from 01-basic-deployment | ||
01-basic-deployment/scripts/data/combined_data_cleaned.csv | ||
01-basic-deployment/scripts/data/combined_data.csv | ||
|
||
# ignore the done files | ||
# ignore the done files from 01-basic-deployment | ||
01-basic-deployment/scripts/extract_done | ||
01-basic-deployment/scripts/transform_done | ||
01-basic-deployment/scripts/transform_done | ||
|
||
# ignore the data files from 02-advanced-deployment | ||
02-advanced-deployment/docker/data/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Use an official Python runtime as a parent image | ||
FROM python:3.9-slim | ||
|
||
# Set the working directory | ||
WORKDIR /usr/src/app | ||
|
||
# Install required libraries | ||
COPY requirements.txt ./ | ||
RUN pip install --no-cache-dir -r requirements.txt | ||
|
||
# Copy the current directory contents into the container at /usr/src/app | ||
COPY . . | ||
|
||
# Define environment variable | ||
ENV OUTPUT_DIR=data | ||
|
||
# Make port 80 available to the world outside this container | ||
EXPOSE 80 | ||
|
||
# Run script when the container launches | ||
CMD ["python", "./extract_and_transform.py"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
version: '3' | ||
services: | ||
postgres: | ||
image: postgres:13 | ||
environment: | ||
POSTGRES_USER: your_user | ||
POSTGRES_PASSWORD: your_password | ||
POSTGRES_DB: weather_data | ||
ports: | ||
- "5432:5432" | ||
volumes: | ||
- pgdata:/var/lib/postgresql/data | ||
app: | ||
build: . | ||
depends_on: | ||
- postgres | ||
environment: | ||
- DB_NAME=weather_data | ||
- DB_USER=your_user | ||
- DB_PASSWORD=your_password | ||
- DB_HOST=postgres | ||
- DB_PORT=5432 | ||
volumes: | ||
- .:/usr/src/app | ||
command: python extract_and_transform.py | ||
|
||
volumes: | ||
pgdata: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
import boto3 | ||
from botocore.config import Config | ||
from botocore import UNSIGNED | ||
import os | ||
import pandas as pd | ||
import glob | ||
from sqlalchemy import create_engine, text | ||
|
||
def extract_noaa_gsod_data(year, month, output_dir='data'): | ||
s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) | ||
bucket_name = 'noaa-gsod-pds' | ||
prefix = f'{year}/{str(month).zfill(2)}' | ||
|
||
if not os.path.exists(output_dir): | ||
os.makedirs(output_dir) | ||
|
||
# List objects in the bucket for the specified month | ||
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix) | ||
|
||
if 'Contents' not in response: | ||
print(f"No files found for {prefix}") | ||
return | ||
|
||
for obj in response['Contents']: | ||
key = obj['Key'] | ||
local_path = os.path.join(output_dir, os.path.basename(key)) | ||
|
||
# Download the file | ||
s3.download_file(bucket_name, key, local_path) | ||
print(f'Downloaded {key} to {local_path}') | ||
|
||
def transform_and_load_to_postgres(input_dir='data', db_name='weather_data'): | ||
db_user = os.getenv('DB_USER', 'your_user') | ||
db_password = os.getenv('DB_PASSWORD', 'your_password') | ||
db_host = os.getenv('DB_HOST', 'postgres') # Ensure this is 'postgres' in Docker | ||
db_port = os.getenv('DB_PORT', '5432') | ||
|
||
# Create SQLAlchemy engine | ||
engine = create_engine(f'postgresql+psycopg2://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}') | ||
|
||
# Ensure the 'weather' table exists | ||
with engine.connect() as conn: | ||
conn.execute(text(''' | ||
CREATE TABLE IF NOT EXISTS weather ( | ||
station TEXT, | ||
date TEXT, | ||
latitude REAL, | ||
longitude REAL, | ||
elevation REAL, | ||
name TEXT, | ||
temp REAL, | ||
temp_attributes TEXT, | ||
dewp REAL, | ||
dewp_attributes TEXT, | ||
slp REAL, | ||
slp_attributes TEXT, | ||
stp REAL, | ||
stp_attributes TEXT, | ||
visib REAL, | ||
visib_attributes TEXT, | ||
wdsp REAL, | ||
wdsp_attributes TEXT, | ||
mxspd REAL, | ||
gust REAL, | ||
max REAL, | ||
max_attributes TEXT, | ||
min REAL, | ||
min_attributes TEXT, | ||
prcp REAL, | ||
prcp_attributes TEXT, | ||
sndp REAL, | ||
frshtt TEXT | ||
) | ||
''')) | ||
|
||
# Process each CSV file in the input directory | ||
for file_path in glob.glob(f'{input_dir}/*.csv'): | ||
print(f'Processing {file_path}') | ||
df = pd.read_csv(file_path) | ||
|
||
# Insert data into PostgreSQL table | ||
df.to_sql('weather', engine, if_exists='append', index=False) | ||
print(f'Loaded {file_path} into database') | ||
|
||
if __name__ == '__main__': | ||
extract_noaa_gsod_data(2020, 1) | ||
transform_and_load_to_postgres() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
boto3==1.26.0 | ||
botocore==1.29.0 | ||
pandas==2.0.0 | ||
numpy==1.25.0 | ||
sqlalchemy==2.0.0 | ||
psycopg2-binary==2.9.6 |