Skip to content

Commit

Permalink
Containerized data processing worklfow
Browse files Browse the repository at this point in the history
Containerized data processing workflow from github copilot workshop. Will need to restructure directory in future commits.
  • Loading branch information
tmanik committed Jul 30, 2024
1 parent ca2accc commit 0b1d9aa
Show file tree
Hide file tree
Showing 14 changed files with 908 additions and 1 deletion.
Binary file added .DS_Store
Binary file not shown.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
scripts/data/combined_data_cleaned.csv
scripts/data/combined_data.csv

scripts/extract_done
scripts/transform_done
18 changes: 18 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Use the official Python base image
FROM python:3.9

# Set the working directory in the container
WORKDIR /app

# Copy the requirements file into the container
COPY requirements.txt /app/

# Install the required packages
RUN apt-get update && apt-get install -y libpq-dev
RUN pip install --no-cache-dir -r requirements.txt

# Copy the rest of your application code into the container
COPY . /app/

# Default command (can be overridden by docker-compose)
CMD ["sh", "entrypoint_extract.sh"]
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,5 @@ Container-Orchestration-for-Research-Workflows/
│ │ │ │ ├── components/
│ │ │ │ │ ├── VisualizationComponent.js
│ │ │ │ ├── public/
│ │ │ │ │ ├── index.html
│ │ │ │ │ ├── index.html
```
10 changes: 10 additions & 0 deletions app/entrypoint_extract.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash
set -e

echo "Starting extract process..."
python /app/scripts/extract.py

echo "Extract process completed."

# Signal successful completion
touch /app/scripts/extract_done
11 changes: 11 additions & 0 deletions app/entrypoint_load.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/sh
set -e

# Wait for transform stage to complete
while [ ! -f /app/scripts/transform_done ]; do
echo "Waiting for transform stage to complete..."
sleep 5
done

# Run the load script
python /app/scripts/load.py
14 changes: 14 additions & 0 deletions app/entrypoint_transform.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/sh
set -e

# Wait for extract stage to complete
while [ ! -f /app/scripts/extract_done ]; do
echo "Waiting for extract stage to complete..."
sleep 5
done

# Run the transform script
python /app/scripts/transform.py

# Signal successful completion
touch /app/scripts/transform_done
41 changes: 41 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
services:
postgres:
image: postgres
container_name: postgres-container
environment:
POSTGRES_PASSWORD: mysecretpassword
ports:
- "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data

extract:
build: .
depends_on:
- postgres
environment:
DB_URL: "postgresql://postgres:mysecretpassword@postgres:5432/postgres"
volumes:
- .:/app
entrypoint: ["/app/app/entrypoint_extract.sh"]

transform:
build: .
depends_on:
- extract
volumes:
- .:/app
entrypoint: ["/app/app/entrypoint_transform.sh"]

load:
build: .
depends_on:
- transform
environment:
DB_URL: "postgresql://postgres:mysecretpassword@postgres:5432/postgres"
volumes:
- .:/app
entrypoint: ["/app/app/entrypoint_load.sh"]

volumes:
postgres_data:
18 changes: 18 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Data extraction and transformation
pandas==1.5.2
numpy==1.23.5
python-dotenv==0.20.0

# Database interaction (assuming Postgres)
psycopg2==2.9.5

# ORM (Object-Relational Mapping)
sqlalchemy==1.4.39

# AWS SDK
boto3==1.26.13

# Other utilities
click==8.1.3

Flask==2.3.2
Binary file added scripts/.DS_Store
Binary file not shown.
Loading

0 comments on commit 0b1d9aa

Please sign in to comment.