Skip to content
This repository was archived by the owner on Dec 12, 2025. It is now read-only.

Commit

Permalink
Cleanup analysis for speed and presentation
Browse files Browse the repository at this point in the history
  • Loading branch information
Timothy Middelkoop authored and Timothy Middelkoop committed Nov 10, 2021
1 parent 9073f0d commit 1b8618b
Show file tree
Hide file tree
Showing 2 changed files with 250 additions and 28 deletions.
268 changes: 245 additions & 23 deletions content/GCP/06_running_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"source": [
"## Connect to the VM\n",
"\n",
"First login to the instance from the Cloud Shell\n",
"First login to the instance from the Cloud Shell by running the following command:\n",
"```\n",
"gcloud compute ssh instance-1\n",
"```\n",
Expand Down Expand Up @@ -106,6 +106,16 @@
{
"cell_type": "code",
"execution_count": 2,
"id": "96db6a66-3fbf-419a-b8c8-dbb27639e990",
"metadata": {},
"outputs": [],
"source": [
"cd ~"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "36554c99-ba08-4733-8ef2-e68d42d0d2b7",
"metadata": {},
"outputs": [
Expand All @@ -114,32 +124,31 @@
"output_type": "stream",
"text": [
"Cloning into 'CLASS-Examples'...\n",
"remote: Enumerating objects: 16, done.\u001b[K\n",
"remote: Counting objects: 100% (16/16), done.\u001b[K\n",
"remote: Compressing objects: 100% (13/13), done.\u001b[K\n",
"remote: Total 41 (delta 4), reused 15 (delta 3), pack-reused 25\u001b[K\n",
"Unpacking objects: 100% (41/41), done.\n"
"remote: Enumerating objects: 23, done.\u001b[K\n",
"remote: Counting objects: 100% (23/23), done.\u001b[K\n",
"remote: Compressing objects: 100% (18/18), done.\u001b[K\n",
"remote: Total 48 (delta 8), reused 20 (delta 5), pack-reused 25\u001b[K\n",
"Unpacking objects: 100% (48/48), done.\n"
]
}
],
"source": [
"cd ~\n",
"git clone https://github.internet2.edu/CLASS/CLASS-Examples.git"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"id": "90c1cda7-60d4-44bb-84f8-e776a77a94ab",
"metadata": {},
"outputs": [],
"source": [
"cd CLASS-Examples/landsat/"
"cd ~/CLASS-Examples/landsat/"
]
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"id": "55b628d5-6e5c-45a5-9cd3-c129db9cdcd2",
"metadata": {},
"outputs": [
Expand All @@ -148,12 +157,12 @@
"output_type": "stream",
"text": [
"total 24\n",
"-rw-r--r-- 1 learner learner 841 Nov 10 20:50 ReadMe.md\n",
"-rw-r--r-- 1 learner learner 72 Nov 10 20:50 clean.sh\n",
"-rw-r--r-- 1 learner learner 256 Nov 10 20:50 download.sh\n",
"-rw-r--r-- 1 learner learner 314 Nov 10 20:50 get-index.sh\n",
"-rw-r--r-- 1 learner learner 110 Nov 10 20:50 search.json\n",
"-rw-r--r-- 1 learner learner 1447 Nov 10 20:50 search.py\n"
"-rw-r--r-- 1 learner learner 862 Nov 10 22:31 ReadMe.md\n",
"-rw-r--r-- 1 learner learner 72 Nov 10 22:31 clean.sh\n",
"-rw-r--r-- 1 learner learner 280 Nov 10 22:31 download.sh\n",
"-rw-r--r-- 1 learner learner 314 Nov 10 22:31 get-index.sh\n",
"-rw-r--r-- 1 learner learner 76 Nov 10 22:31 search.json\n",
"-rw-r--r-- 1 learner learner 783 Nov 10 22:31 search.py\n"
]
}
],
Expand All @@ -173,7 +182,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"id": "e56ab74a-ae6d-4602-a26b-4a2656bd40cd",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -210,7 +219,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"id": "bbe85b75-c7cd-40ed-a3b0-37cbd0a5f52e",
"metadata": {},
"outputs": [
Expand All @@ -228,7 +237,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"id": "18a9b71c-5871-4ce2-a202-b48ad04e8d38",
"metadata": {},
"outputs": [
Expand All @@ -237,7 +246,7 @@
"output_type": "stream",
"text": [
"Copying gs://gcp-public-data-landsat/index.csv.gz...\n",
"/ [1 files][757.2 MiB/757.2 MiB] 59.4 MiB/s \n",
"- [1 files][757.2 MiB/757.2 MiB] 54.0 MiB/s \n",
"Operation completed over 1 objects/757.2 MiB. \n"
]
}
Expand All @@ -248,7 +257,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"id": "2cdaf24c-c4aa-4e80-9236-939e7c982916",
"metadata": {},
"outputs": [],
Expand All @@ -258,7 +267,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"id": "b005876c-f9af-43d6-80c6-f04295413b9b",
"metadata": {},
"outputs": [
Expand All @@ -267,13 +276,226 @@
"output_type": "stream",
"text": [
"total 2.6G\n",
"-rw-r--r-- 1 learner learner 2.6G Nov 10 20:50 index.csv\n"
"-rw-r--r-- 1 learner learner 2.6G Nov 10 22:32 index.csv\n"
]
}
],
"source": [
"ls -lh data"
]
},
{
"cell_type": "markdown",
"id": "fcde8334-f58d-4c3d-995a-2491be0f95ea",
"metadata": {},
"source": [
"We will now explore the data"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "ffe969db-d207-44fe-8957-8d129c76ee8f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"SCENE_ID,PRODUCT_ID,SPACECRAFT_ID,SENSOR_ID,DATE_ACQUIRED,COLLECTION_NUMBER,COLLECTION_CATEGORY,SENSING_TIME,DATA_TYPE,WRS_PATH,WRS_ROW,CLOUD_COVER,NORTH_LAT,SOUTH_LAT,WEST_LON,EAST_LON,TOTAL_SIZE,BASE_URL\n",
"LM41170311983272FFF03,LM04_L1TP_117031_19830929_20180412_01_T2,LANDSAT_4,MSS,1983-09-29,01,T2,1983-09-29T01:45:39.0520000Z,L1TP,117,31,2.0,42.79515,40.7823,124.88634,127.85668,27769529,gs://gcp-public-data-landsat/LM04/01/117/031/LM04_L1TP_117031_19830929_20180412_01_T2\n",
"LM10890151972214AAA05,LM01_L1GS_089015_19720801_20180428_01_T2,LANDSAT_1,MSS,1972-08-01,01,T2,1972-08-01T22:10:17.7940000Z,L1GS,89,15,0.0,65.211,62.9963,-170.33714,-165.11701,16228538,gs://gcp-public-data-landsat/LM01/01/089/015/LM01_L1GS_089015_19720801_20180428_01_T2\n",
"LC80660912015026LGN02,LC08_L1GT_066091_20150126_20180202_01_T2,LANDSAT_8,OLI_TIRS,2015-01-26,01,T2,2015-01-26T21:24:43.3704780Z,L1GT,66,91,94.98,-43.51716,-45.68406,-177.72298,-174.66884,1075234161,gs://gcp-public-data-landsat/LC08/01/066/091/LC08_L1GT_066091_20150126_20180202_01_T2\n"
]
}
],
"source": [
"head --lines=4 data/index.csv"
]
},
{
"cell_type": "markdown",
"id": "532e6da3-302a-4e8a-8570-752995f30f1d",
"metadata": {},
"source": [
"## Search for Data\n",
"\n",
"We can see the data is well formed and what we expect. We will now use this data to download data related to a specific point and for the Landsat 8. The following script does a simple filter."
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "c5e300c3-e1f3-4cd4-9679-77725e61c4db",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"#!/usr/bin/python3\n",
"import json\n",
"import csv\n",
"import sys\n",
"\n",
"# Example: Burr Oak Tree\n",
"# 38.899313,-92.464562 (Lat north+, Long west-) ; Landsat Path 025, Row 033\n",
"config=json.load(open(\"search.json\"))\n",
"lat,lon=config['lat'],config['lon']\n",
"landsat=config['landsat']\n",
"\n",
"reader=csv.reader(sys.stdin)\n",
"header=next(reader) # skip header\n",
"for l in reader:\n",
" SCENE_ID,PRODUCT_ID,SPACECRAFT_ID,SENSOR_ID,DATE_ACQUIRED,COLLECTION_NUMBER,COLLECTION_CATEGORY,SENSING_TIME,DATA_TYPE,WRS_PATH,WRS_ROW,CLOUD_COVER,NORTH_LAT,SOUTH_LAT,WEST_LON,EAST_LON,TOTAL_SIZE,BASE_URL=l\n",
" west,east=float(WEST_LON),float(EAST_LON)\n",
" north,south=float(NORTH_LAT),float(SOUTH_LAT)\n",
" if SPACECRAFT_ID==landsat and north >= lat and south <= lat and west <= lon and east >= lon:\n",
" print(BASE_URL) # output BASE_URL\n"
]
}
],
"source": [
"cat search.py"
]
},
{
"cell_type": "markdown",
"id": "4aa3de47-3dd4-4a0f-9f07-f2f004de7054",
"metadata": {},
"source": [
"We can see that the actual search data comes from the file `search.json`. The program reads the data from the standard input and iterates over all rows in the CSV file. It filters the results for which the image contains the pint and prints out the bucket URL for them. We are interested in all products that contain the Burr Oak Tree."
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "c9872510-4265-4b0e-aeb5-5a829ff69b24",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"lat\": 38.899313,\n",
" \"lon\": -92.464562,\n",
" \"landsat\": \"LANDSAT_8\"\n",
"}\n"
]
}
],
"source": [
"cat search.json"
]
},
{
"cell_type": "markdown",
"id": "cbb27235-6bc4-4eb6-b668-5c30427a28b8",
"metadata": {},
"source": [
"Now lets test this on a subset of the data."
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "6912a9ec-0f9b-4500-ba20-d4280592b323",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1\n",
"gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1\n"
]
}
],
"source": [
"head --lines=100000 data/index.csv | python3 search.py"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "3572c518-df83-4906-bfa6-a37bde2a5063",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"#!/bin/bash\n",
"\n",
"# Read space separated URL from STDIN and download \n",
"while read -r URL ; do\n",
" echo \"+++ $URL\"\n",
" # -m parallel\n",
" # -n no-clobber (do not re-download data)\n",
" # -r recursive (download all the data in the specified URL)\n",
" gsutil -m cp -n -r \"${URL}/\" data/\n",
"done\n"
]
}
],
"source": [
"cat download.sh"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "cccec3e1-0dcd-4e3b-a059-a884f5219b66",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+++ gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_ANG.txt...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B1.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B11.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B10.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B2.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B8.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B9.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_BQA.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B3.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B4.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B6.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B5.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_MTL.txt...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B7.TIF...\n",
"- [14/14 files][ 1021 MiB/ 1021 MiB] 100% Done \n",
"Operation completed over 14 objects/1021.8 MiB. \n",
"+++ gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_ANG.txt...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B10.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B1.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B2.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B11.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B8.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B4.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_MTL.txt...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B9.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B3.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B7.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B6.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_BQA.TIF...\n",
"Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B5.TIF...\n",
"- [14/14 files][ 1.0 GiB/ 1.0 GiB] 100% Done \n",
"Operation completed over 14 objects/1.0 GiB. \n"
]
}
],
"source": [
"head --lines=100000 data/index.csv | python3 search.py | bash download.sh"
]
}
],
"metadata": {
Expand Down
10 changes: 5 additions & 5 deletions scripts/gcp-create.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ NAME=learner
VM=essentials
ZONE=us-west2-c
PROJECT=CLASS-Essentials
REPO="git@github.internet2.edu:CLASS/${PROJECT}.git"
GITHUB=github.internet2.edu
REPO="git@${GITHUB}:CLASS/${PROJECT}.git"

echo "=== gcp-dev.sh $PROJECT $BRANCH"

Expand All @@ -25,14 +26,13 @@ echo "+++ configuring VM"

gcloud compute ssh --zone=$ZONE $NAME@$VM --ssh-flag='-A' <<EOF
sudo apt install --yes git python3-pip python3-venv bash-completion
ssh-keyscan github.internet2.edu > .ssh/known_hosts
ssh-keyscan ${GITHUB} > .ssh/known_hosts
git config --global color.ui auto
git config --global push.default simple
git config --global pull.ff only
git config --global user.name "$(git config user.name)"
git config --global user.email "$(git config user.name)"
git clone --branch $BRANCH $REPO
cd $PROJECT
git config user.name "$(git config user.name)"
git config user.email "$(git config user.name)"
EOF

echo "+++ configure local ssh"
Expand Down

0 comments on commit 1b8618b

Please sign in to comment.