From 1b8618b3ebd3e97f2c342655b1904ebd0cedb38b Mon Sep 17 00:00:00 2001 From: Timothy Middelkoop Date: Wed, 10 Nov 2021 22:34:58 +0000 Subject: [PATCH] Cleanup analysis for speed and presentation --- content/GCP/06_running_analysis.ipynb | 268 +++++++++++++++++++++++--- scripts/gcp-create.sh | 10 +- 2 files changed, 250 insertions(+), 28 deletions(-) diff --git a/content/GCP/06_running_analysis.ipynb b/content/GCP/06_running_analysis.ipynb index 56531e2..0ba3926 100644 --- a/content/GCP/06_running_analysis.ipynb +++ b/content/GCP/06_running_analysis.ipynb @@ -30,7 +30,7 @@ "source": [ "## Connect to the VM\n", "\n", - "First login to the instance from the Cloud Shell\n", + "First login to the instance from the Cloud Shell by running the following command:\n", "```\n", "gcloud compute ssh instance-1\n", "```\n", @@ -106,6 +106,16 @@ { "cell_type": "code", "execution_count": 2, + "id": "96db6a66-3fbf-419a-b8c8-dbb27639e990", + "metadata": {}, + "outputs": [], + "source": [ + "cd ~" + ] + }, + { + "cell_type": "code", + "execution_count": 3, "id": "36554c99-ba08-4733-8ef2-e68d42d0d2b7", "metadata": {}, "outputs": [ @@ -114,32 +124,31 @@ "output_type": "stream", "text": [ "Cloning into 'CLASS-Examples'...\n", - "remote: Enumerating objects: 16, done.\u001b[K\n", - "remote: Counting objects: 100% (16/16), done.\u001b[K\n", - "remote: Compressing objects: 100% (13/13), done.\u001b[K\n", - "remote: Total 41 (delta 4), reused 15 (delta 3), pack-reused 25\u001b[K\n", - "Unpacking objects: 100% (41/41), done.\n" + "remote: Enumerating objects: 23, done.\u001b[K\n", + "remote: Counting objects: 100% (23/23), done.\u001b[K\n", + "remote: Compressing objects: 100% (18/18), done.\u001b[K\n", + "remote: Total 48 (delta 8), reused 20 (delta 5), pack-reused 25\u001b[K\n", + "Unpacking objects: 100% (48/48), done.\n" ] } ], "source": [ - "cd ~\n", "git clone https://github.internet2.edu/CLASS/CLASS-Examples.git" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "90c1cda7-60d4-44bb-84f8-e776a77a94ab", "metadata": {}, "outputs": [], "source": [ - "cd CLASS-Examples/landsat/" + "cd ~/CLASS-Examples/landsat/" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "55b628d5-6e5c-45a5-9cd3-c129db9cdcd2", "metadata": {}, "outputs": [ @@ -148,12 +157,12 @@ "output_type": "stream", "text": [ "total 24\n", - "-rw-r--r-- 1 learner learner 841 Nov 10 20:50 ReadMe.md\n", - "-rw-r--r-- 1 learner learner 72 Nov 10 20:50 clean.sh\n", - "-rw-r--r-- 1 learner learner 256 Nov 10 20:50 download.sh\n", - "-rw-r--r-- 1 learner learner 314 Nov 10 20:50 get-index.sh\n", - "-rw-r--r-- 1 learner learner 110 Nov 10 20:50 search.json\n", - "-rw-r--r-- 1 learner learner 1447 Nov 10 20:50 search.py\n" + "-rw-r--r-- 1 learner learner 862 Nov 10 22:31 ReadMe.md\n", + "-rw-r--r-- 1 learner learner 72 Nov 10 22:31 clean.sh\n", + "-rw-r--r-- 1 learner learner 280 Nov 10 22:31 download.sh\n", + "-rw-r--r-- 1 learner learner 314 Nov 10 22:31 get-index.sh\n", + "-rw-r--r-- 1 learner learner 76 Nov 10 22:31 search.json\n", + "-rw-r--r-- 1 learner learner 783 Nov 10 22:31 search.py\n" ] } ], @@ -173,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "e56ab74a-ae6d-4602-a26b-4a2656bd40cd", "metadata": {}, "outputs": [ @@ -210,7 +219,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "bbe85b75-c7cd-40ed-a3b0-37cbd0a5f52e", "metadata": {}, "outputs": [ @@ -228,7 +237,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "18a9b71c-5871-4ce2-a202-b48ad04e8d38", "metadata": {}, "outputs": [ @@ -237,7 +246,7 @@ "output_type": "stream", "text": [ "Copying gs://gcp-public-data-landsat/index.csv.gz...\n", - "/ [1 files][757.2 MiB/757.2 MiB] 59.4 MiB/s \n", + "- [1 files][757.2 MiB/757.2 MiB] 54.0 MiB/s \n", "Operation completed over 1 objects/757.2 MiB. \n" ] } @@ -248,7 +257,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "2cdaf24c-c4aa-4e80-9236-939e7c982916", "metadata": {}, "outputs": [], @@ -258,7 +267,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "b005876c-f9af-43d6-80c6-f04295413b9b", "metadata": {}, "outputs": [ @@ -267,13 +276,226 @@ "output_type": "stream", "text": [ "total 2.6G\n", - "-rw-r--r-- 1 learner learner 2.6G Nov 10 20:50 index.csv\n" + "-rw-r--r-- 1 learner learner 2.6G Nov 10 22:32 index.csv\n" ] } ], "source": [ "ls -lh data" ] + }, + { + "cell_type": "markdown", + "id": "fcde8334-f58d-4c3d-995a-2491be0f95ea", + "metadata": {}, + "source": [ + "We will now explore the data" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "ffe969db-d207-44fe-8957-8d129c76ee8f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SCENE_ID,PRODUCT_ID,SPACECRAFT_ID,SENSOR_ID,DATE_ACQUIRED,COLLECTION_NUMBER,COLLECTION_CATEGORY,SENSING_TIME,DATA_TYPE,WRS_PATH,WRS_ROW,CLOUD_COVER,NORTH_LAT,SOUTH_LAT,WEST_LON,EAST_LON,TOTAL_SIZE,BASE_URL\n", + "LM41170311983272FFF03,LM04_L1TP_117031_19830929_20180412_01_T2,LANDSAT_4,MSS,1983-09-29,01,T2,1983-09-29T01:45:39.0520000Z,L1TP,117,31,2.0,42.79515,40.7823,124.88634,127.85668,27769529,gs://gcp-public-data-landsat/LM04/01/117/031/LM04_L1TP_117031_19830929_20180412_01_T2\n", + "LM10890151972214AAA05,LM01_L1GS_089015_19720801_20180428_01_T2,LANDSAT_1,MSS,1972-08-01,01,T2,1972-08-01T22:10:17.7940000Z,L1GS,89,15,0.0,65.211,62.9963,-170.33714,-165.11701,16228538,gs://gcp-public-data-landsat/LM01/01/089/015/LM01_L1GS_089015_19720801_20180428_01_T2\n", + "LC80660912015026LGN02,LC08_L1GT_066091_20150126_20180202_01_T2,LANDSAT_8,OLI_TIRS,2015-01-26,01,T2,2015-01-26T21:24:43.3704780Z,L1GT,66,91,94.98,-43.51716,-45.68406,-177.72298,-174.66884,1075234161,gs://gcp-public-data-landsat/LC08/01/066/091/LC08_L1GT_066091_20150126_20180202_01_T2\n" + ] + } + ], + "source": [ + "head --lines=4 data/index.csv" + ] + }, + { + "cell_type": "markdown", + "id": "532e6da3-302a-4e8a-8570-752995f30f1d", + "metadata": {}, + "source": [ + "## Search for Data\n", + "\n", + "We can see the data is well formed and what we expect. We will now use this data to download data related to a specific point and for the Landsat 8. The following script does a simple filter." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "c5e300c3-e1f3-4cd4-9679-77725e61c4db", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "#!/usr/bin/python3\n", + "import json\n", + "import csv\n", + "import sys\n", + "\n", + "# Example: Burr Oak Tree\n", + "# 38.899313,-92.464562 (Lat north+, Long west-) ; Landsat Path 025, Row 033\n", + "config=json.load(open(\"search.json\"))\n", + "lat,lon=config['lat'],config['lon']\n", + "landsat=config['landsat']\n", + "\n", + "reader=csv.reader(sys.stdin)\n", + "header=next(reader) # skip header\n", + "for l in reader:\n", + " SCENE_ID,PRODUCT_ID,SPACECRAFT_ID,SENSOR_ID,DATE_ACQUIRED,COLLECTION_NUMBER,COLLECTION_CATEGORY,SENSING_TIME,DATA_TYPE,WRS_PATH,WRS_ROW,CLOUD_COVER,NORTH_LAT,SOUTH_LAT,WEST_LON,EAST_LON,TOTAL_SIZE,BASE_URL=l\n", + " west,east=float(WEST_LON),float(EAST_LON)\n", + " north,south=float(NORTH_LAT),float(SOUTH_LAT)\n", + " if SPACECRAFT_ID==landsat and north >= lat and south <= lat and west <= lon and east >= lon:\n", + " print(BASE_URL) # output BASE_URL\n" + ] + } + ], + "source": [ + "cat search.py" + ] + }, + { + "cell_type": "markdown", + "id": "4aa3de47-3dd4-4a0f-9f07-f2f004de7054", + "metadata": {}, + "source": [ + "We can see that the actual search data comes from the file `search.json`. The program reads the data from the standard input and iterates over all rows in the CSV file. It filters the results for which the image contains the pint and prints out the bucket URL for them. We are interested in all products that contain the Burr Oak Tree." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "c9872510-4265-4b0e-aeb5-5a829ff69b24", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"lat\": 38.899313,\n", + " \"lon\": -92.464562,\n", + " \"landsat\": \"LANDSAT_8\"\n", + "}\n" + ] + } + ], + "source": [ + "cat search.json" + ] + }, + { + "cell_type": "markdown", + "id": "cbb27235-6bc4-4eb6-b668-5c30427a28b8", + "metadata": {}, + "source": [ + "Now lets test this on a subset of the data." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "6912a9ec-0f9b-4500-ba20-d4280592b323", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1\n", + "gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1\n" + ] + } + ], + "source": [ + "head --lines=100000 data/index.csv | python3 search.py" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "3572c518-df83-4906-bfa6-a37bde2a5063", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "#!/bin/bash\n", + "\n", + "# Read space separated URL from STDIN and download \n", + "while read -r URL ; do\n", + " echo \"+++ $URL\"\n", + " # -m parallel\n", + " # -n no-clobber (do not re-download data)\n", + " # -r recursive (download all the data in the specified URL)\n", + " gsutil -m cp -n -r \"${URL}/\" data/\n", + "done\n" + ] + } + ], + "source": [ + "cat download.sh" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "cccec3e1-0dcd-4e3b-a059-a884f5219b66", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+++ gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_ANG.txt...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B1.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B11.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B10.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B2.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B8.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B9.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_BQA.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B3.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B4.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B6.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B5.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_MTL.txt...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B7.TIF...\n", + "- [14/14 files][ 1021 MiB/ 1021 MiB] 100% Done \n", + "Operation completed over 14 objects/1021.8 MiB. \n", + "+++ gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_ANG.txt...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B10.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B1.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B2.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B11.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B8.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B4.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_MTL.txt...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B9.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B3.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B7.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B6.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_BQA.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B5.TIF...\n", + "- [14/14 files][ 1.0 GiB/ 1.0 GiB] 100% Done \n", + "Operation completed over 14 objects/1.0 GiB. \n" + ] + } + ], + "source": [ + "head --lines=100000 data/index.csv | python3 search.py | bash download.sh" + ] } ], "metadata": { diff --git a/scripts/gcp-create.sh b/scripts/gcp-create.sh index 36dd725..f1cc80a 100755 --- a/scripts/gcp-create.sh +++ b/scripts/gcp-create.sh @@ -8,7 +8,8 @@ NAME=learner VM=essentials ZONE=us-west2-c PROJECT=CLASS-Essentials -REPO="git@github.internet2.edu:CLASS/${PROJECT}.git" +GITHUB=github.internet2.edu +REPO="git@${GITHUB}:CLASS/${PROJECT}.git" echo "=== gcp-dev.sh $PROJECT $BRANCH" @@ -25,14 +26,13 @@ echo "+++ configuring VM" gcloud compute ssh --zone=$ZONE $NAME@$VM --ssh-flag='-A' < .ssh/known_hosts +ssh-keyscan ${GITHUB} > .ssh/known_hosts git config --global color.ui auto git config --global push.default simple git config --global pull.ff only +git config --global user.name "$(git config user.name)" +git config --global user.email "$(git config user.name)" git clone --branch $BRANCH $REPO -cd $PROJECT -git config user.name "$(git config user.name)" -git config user.email "$(git config user.name)" EOF echo "+++ configure local ssh"