Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
First version of advanced deployment
tmanik committed Aug 7, 2024
1 parent 015f41b commit 13639cb
Showing 6 changed files with 148 additions and 3 deletions.
Binary file modified .DS_Store
Binary file not shown.
9 changes: 6 additions & 3 deletions .gitignore
@@ -1,7 +1,10 @@
# ignore the data files
# ignore the data files from 01-basic-deployment
01-basic-deployment/scripts/data/combined_data_cleaned.csv
01-basic-deployment/scripts/data/combined_data.csv

# ignore the done files
# ignore the done files from 01-basic-deployment
01-basic-deployment/scripts/extract_done
01-basic-deployment/scripts/transform_done
01-basic-deployment/scripts/transform_done

# ignore the data files from 02-advanced-deployment
02-advanced-deployment/docker/data/*
21 changes: 21 additions & 0 deletions 02-advanced-deployment/docker/Dockerfile
@@ -0,0 +1,21 @@
# Use an official Python runtime as a parent image
FROM python:3.9-slim

# Set the working directory
WORKDIR /usr/src/app

# Install required libraries
COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt

# Copy the current directory contents into the container at /usr/src/app
COPY . .

# Define environment variable
ENV OUTPUT_DIR=data

# Make port 80 available to the world outside this container
EXPOSE 80

# Run script when the container launches
CMD ["python", "./extract_and_transform.py"]
28 changes: 28 additions & 0 deletions 02-advanced-deployment/docker/docker-compose.yml
@@ -0,0 +1,28 @@
version: '3'
services:
postgres:
image: postgres:13
environment:
POSTGRES_USER: your_user
POSTGRES_PASSWORD: your_password
POSTGRES_DB: weather_data
ports:
- "5432:5432"
volumes:
- pgdata:/var/lib/postgresql/data
app:
build: .
depends_on:
- postgres
environment:
- DB_NAME=weather_data
- DB_USER=your_user
- DB_PASSWORD=your_password
- DB_HOST=postgres
- DB_PORT=5432
volumes:
- .:/usr/src/app
command: python extract_and_transform.py

volumes:
pgdata:
87 changes: 87 additions & 0 deletions 02-advanced-deployment/docker/extract_and_transform.py
@@ -0,0 +1,87 @@
import boto3
from botocore.config import Config
from botocore import UNSIGNED
import os
import pandas as pd
import glob
from sqlalchemy import create_engine, text

def extract_noaa_gsod_data(year, month, output_dir='data'):
s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
bucket_name = 'noaa-gsod-pds'
prefix = f'{year}/{str(month).zfill(2)}'

if not os.path.exists(output_dir):
os.makedirs(output_dir)

# List objects in the bucket for the specified month
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)

if 'Contents' not in response:
print(f"No files found for {prefix}")
return

for obj in response['Contents']:
key = obj['Key']
local_path = os.path.join(output_dir, os.path.basename(key))

# Download the file
s3.download_file(bucket_name, key, local_path)
print(f'Downloaded {key} to {local_path}')

def transform_and_load_to_postgres(input_dir='data', db_name='weather_data'):
db_user = os.getenv('DB_USER', 'your_user')
db_password = os.getenv('DB_PASSWORD', 'your_password')
db_host = os.getenv('DB_HOST', 'postgres') # Ensure this is 'postgres' in Docker
db_port = os.getenv('DB_PORT', '5432')

# Create SQLAlchemy engine
engine = create_engine(f'postgresql+psycopg2://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}')

# Ensure the 'weather' table exists
with engine.connect() as conn:
conn.execute(text('''
CREATE TABLE IF NOT EXISTS weather (
station TEXT,
date TEXT,
latitude REAL,
longitude REAL,
elevation REAL,
name TEXT,
temp REAL,
temp_attributes TEXT,
dewp REAL,
dewp_attributes TEXT,
slp REAL,
slp_attributes TEXT,
stp REAL,
stp_attributes TEXT,
visib REAL,
visib_attributes TEXT,
wdsp REAL,
wdsp_attributes TEXT,
mxspd REAL,
gust REAL,
max REAL,
max_attributes TEXT,
min REAL,
min_attributes TEXT,
prcp REAL,
prcp_attributes TEXT,
sndp REAL,
frshtt TEXT
)
'''))

# Process each CSV file in the input directory
for file_path in glob.glob(f'{input_dir}/*.csv'):
print(f'Processing {file_path}')
df = pd.read_csv(file_path)

# Insert data into PostgreSQL table
df.to_sql('weather', engine, if_exists='append', index=False)
print(f'Loaded {file_path} into database')

if __name__ == '__main__':
extract_noaa_gsod_data(2020, 1)
transform_and_load_to_postgres()
6 changes: 6 additions & 0 deletions 02-advanced-deployment/docker/requirements.txt
@@ -0,0 +1,6 @@
boto3==1.26.0
botocore==1.29.0
pandas==2.0.0
numpy==1.25.0
sqlalchemy==2.0.0
psycopg2-binary==2.9.6

0 comments on commit 13639cb

Please sign in to comment.