From bdcc3d25143fcb5c1844d87644889f458089989b Mon Sep 17 00:00:00 2001 From: Timothy Middelkoop Date: Mon, 13 Dec 2021 15:16:06 +0000 Subject: [PATCH] Add back anallysis from failed merge --- content/GCP/06_running_analysis.ipynb | 338 +++++++++++++++++++------- 1 file changed, 247 insertions(+), 91 deletions(-) diff --git a/content/GCP/06_running_analysis.ipynb b/content/GCP/06_running_analysis.ipynb index 0ba3926..654a9cf 100644 --- a/content/GCP/06_running_analysis.ipynb +++ b/content/GCP/06_running_analysis.ipynb @@ -32,7 +32,7 @@ "\n", "First login to the instance from the Cloud Shell by running the following command:\n", "```\n", - "gcloud compute ssh instance-1\n", + "gcloud compute ssh essentials\n", "```\n", "If prompted for a zone select `n` to find it automatically. You can see an example session below." ] @@ -43,11 +43,11 @@ "metadata": {}, "source": [ "```\n", - "student31@cloudshell:~ (just-armor-301114)$ gcloud compute ssh instance-1\n", - "Did you mean zone [us-central1-b] for instance: [instance-1] (Y/n)? n\n", + "learner@cloudshell:~ (just-armor-301114)$ gcloud compute ssh essentials\n", + "Did you mean zone [us-central1-b] for instance: [essentials] (Y/n)? n\n", "\n", - "No zone specified. Using zone [us-west2-c] for instance: [instance-1].\n", - "Linux instance-1 4.19.0-18-cloud-amd64 #1 SMP Debian 4.19.208-1 (2021-09-29) x86_64\n", + "No zone specified. Using zone [us-west2-c] for instance: [essentials].\n", + "Linux essentials 4.19.0-18-cloud-amd64 #1 SMP Debian 4.19.208-1 (2021-09-29) x86_64\n", "\n", "The programs included with the Debian GNU/Linux system are free software;\n", "the exact distribution terms for each program are described in the\n", @@ -56,10 +56,49 @@ "Debian GNU/Linux comes with ABSOLUTELY NO WARRANTY, to the extent\n", "permitted by applicable law.\n", "Last login: Tue Nov 9 20:12:49 2021 from 34.133.99.196\n", - "student31@instance-1:~$\n", + "learner@essentials:~$\n", "```\n" ] }, + { + "cell_type": "markdown", + "id": "0ab6ad28-24bf-4c0f-b4dd-3be2ec25aaa7", + "metadata": {}, + "source": [ + "## Secure the VM\n", + "\n", + "We first make sure that the VM is up to date with the latest security patches by running the following commands" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "0f2df994-202e-4e3e-96c2-1536d8adb036", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hit:1 http://security.debian.org/debian-security buster/updates InRelease\n", + "Hit:2 http://deb.debian.org/debian buster InRelease \u001b[0m\n", + "Hit:3 http://deb.debian.org/debian buster-updates InRelease \n", + "Hit:4 http://deb.debian.org/debian buster-backports InRelease \n", + "Hit:5 http://packages.cloud.google.com/apt cloud-sdk-buster InRelease\n", + "Hit:6 http://packages.cloud.google.com/apt google-cloud-packages-archive-keyring-buster InRelease\n", + "Hit:7 http://packages.cloud.google.com/apt google-compute-engine-buster-stable InRelease\n", + "Reading package lists... Done\n", + "Building dependency tree \n", + "Reading state information... Done\n", + "5 packages can be upgraded. Run 'apt list --upgradable' to see them.\n" + ] + } + ], + "source": [ + "sudo apt update\n", + "sudo unattended-upgrades" + ] + }, { "cell_type": "markdown", "id": "b7c4db9e-f098-41bc-80d2-b524444eec7f", @@ -72,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "05fadd30-4f35-4a93-b713-cc132131b948", "metadata": {}, "outputs": [ @@ -83,19 +122,8 @@ "Reading package lists... Done\n", "Building dependency tree \n", "Reading state information... Done\n", - "Suggested packages:\n", - " git-daemon-run | git-daemon-sysvinit git-doc git-el git-email git-gui gitk\n", - " gitweb git-cvs git-mediawiki git-svn\n", - "The following NEW packages will be installed:\n", - " git\n", - "0 upgraded, 1 newly installed, 0 to remove and 0 not upgraded.\n", - "Need to get 0 B/5633 kB of archives.\n", - "After this operation, 36.2 MB of additional disk space will be used.\n", - "Selecting previously unselected package git.\n", - "(Reading database ... 56121 files and directories currently installed.)\n", - "Preparing to unpack .../git_1%3a2.20.1-2+deb10u3_amd64.deb ...\n", - "Unpacking git (1:2.20.1-2+deb10u3) ...\n", - "Setting up git (1:2.20.1-2+deb10u3) ...\n" + "git is already the newest version (1:2.20.1-2+deb10u3).\n", + "0 upgraded, 0 newly installed, 0 to remove and 5 not upgraded.\n" ] } ], @@ -105,7 +133,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "96db6a66-3fbf-419a-b8c8-dbb27639e990", "metadata": {}, "outputs": [], @@ -115,7 +143,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "36554c99-ba08-4733-8ef2-e68d42d0d2b7", "metadata": {}, "outputs": [ @@ -124,11 +152,9 @@ "output_type": "stream", "text": [ "Cloning into 'CLASS-Examples'...\n", - "remote: Enumerating objects: 23, done.\u001b[K\n", - "remote: Counting objects: 100% (23/23), done.\u001b[K\n", - "remote: Compressing objects: 100% (18/18), done.\u001b[K\n", - "remote: Total 48 (delta 8), reused 20 (delta 5), pack-reused 25\u001b[K\n", - "Unpacking objects: 100% (48/48), done.\n" + "remote: Enumerating objects: 54, done.\u001b[K\n", + "remote: Total 54 (delta 0), reused 0 (delta 0), pack-reused 54\u001b[K\n", + "Unpacking objects: 100% (54/54), done.\n" ] } ], @@ -138,7 +164,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "90c1cda7-60d4-44bb-84f8-e776a77a94ab", "metadata": {}, "outputs": [], @@ -148,7 +174,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "55b628d5-6e5c-45a5-9cd3-c129db9cdcd2", "metadata": {}, "outputs": [ @@ -156,13 +182,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "total 24\n", - "-rw-r--r-- 1 learner learner 862 Nov 10 22:31 ReadMe.md\n", - "-rw-r--r-- 1 learner learner 72 Nov 10 22:31 clean.sh\n", - "-rw-r--r-- 1 learner learner 280 Nov 10 22:31 download.sh\n", - "-rw-r--r-- 1 learner learner 314 Nov 10 22:31 get-index.sh\n", - "-rw-r--r-- 1 learner learner 76 Nov 10 22:31 search.json\n", - "-rw-r--r-- 1 learner learner 783 Nov 10 22:31 search.py\n" + "total 28\n", + "-rw-r--r-- 1 learner learner 960 Dec 13 15:11 ReadMe.md\n", + "-rw-r--r-- 1 learner learner 72 Dec 13 15:11 clean.sh\n", + "-rw-r--r-- 1 learner learner 514 Dec 13 15:11 combine.py\n", + "-rw-r--r-- 1 learner learner 280 Dec 13 15:11 download.sh\n", + "-rw-r--r-- 1 learner learner 314 Dec 13 15:11 get-index.sh\n", + "-rw-r--r-- 1 learner learner 76 Dec 13 15:11 search.json\n", + "-rw-r--r-- 1 learner learner 783 Dec 13 15:11 search.py\n" ] } ], @@ -182,7 +209,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "e56ab74a-ae6d-4602-a26b-4a2656bd40cd", "metadata": {}, "outputs": [ @@ -219,7 +246,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "bbe85b75-c7cd-40ed-a3b0-37cbd0a5f52e", "metadata": {}, "outputs": [ @@ -237,7 +264,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "18a9b71c-5871-4ce2-a202-b48ad04e8d38", "metadata": {}, "outputs": [ @@ -246,8 +273,13 @@ "output_type": "stream", "text": [ "Copying gs://gcp-public-data-landsat/index.csv.gz...\n", - "- [1 files][757.2 MiB/757.2 MiB] 54.0 MiB/s \n", - "Operation completed over 1 objects/757.2 MiB. \n" + "==> NOTE: You are downloading one or more large file(s), which would \n", + "run significantly faster if you enabled sliced object downloads. This\n", + "feature is enabled by default but requires that compiled crcmod be\n", + "installed (see \"gsutil help crcmod\").\n", + "\n", + "/ [1 files][760.7 MiB/760.7 MiB] \n", + "Operation completed over 1 objects/760.7 MiB. \n" ] } ], @@ -255,9 +287,17 @@ "gsutil cp gs://gcp-public-data-landsat/index.csv.gz data/" ] }, + { + "cell_type": "markdown", + "id": "76b905b4-1c2a-4960-a14f-974b77f671cd", + "metadata": {}, + "source": [ + "We will now uncompress the index file to make it easier to use. This may take some time depending on the machine type you are using." + ] + }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "2cdaf24c-c4aa-4e80-9236-939e7c982916", "metadata": {}, "outputs": [], @@ -267,7 +307,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "b005876c-f9af-43d6-80c6-f04295413b9b", "metadata": {}, "outputs": [ @@ -276,7 +316,7 @@ "output_type": "stream", "text": [ "total 2.6G\n", - "-rw-r--r-- 1 learner learner 2.6G Nov 10 22:32 index.csv\n" + "-rw-r--r-- 1 learner learner 2.6G Dec 13 15:11 index.csv\n" ] } ], @@ -294,7 +334,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "ffe969db-d207-44fe-8957-8d129c76ee8f", "metadata": {}, "outputs": [ @@ -303,9 +343,9 @@ "output_type": "stream", "text": [ "SCENE_ID,PRODUCT_ID,SPACECRAFT_ID,SENSOR_ID,DATE_ACQUIRED,COLLECTION_NUMBER,COLLECTION_CATEGORY,SENSING_TIME,DATA_TYPE,WRS_PATH,WRS_ROW,CLOUD_COVER,NORTH_LAT,SOUTH_LAT,WEST_LON,EAST_LON,TOTAL_SIZE,BASE_URL\n", - "LM41170311983272FFF03,LM04_L1TP_117031_19830929_20180412_01_T2,LANDSAT_4,MSS,1983-09-29,01,T2,1983-09-29T01:45:39.0520000Z,L1TP,117,31,2.0,42.79515,40.7823,124.88634,127.85668,27769529,gs://gcp-public-data-landsat/LM04/01/117/031/LM04_L1TP_117031_19830929_20180412_01_T2\n", - "LM10890151972214AAA05,LM01_L1GS_089015_19720801_20180428_01_T2,LANDSAT_1,MSS,1972-08-01,01,T2,1972-08-01T22:10:17.7940000Z,L1GS,89,15,0.0,65.211,62.9963,-170.33714,-165.11701,16228538,gs://gcp-public-data-landsat/LM01/01/089/015/LM01_L1GS_089015_19720801_20180428_01_T2\n", - "LC80660912015026LGN02,LC08_L1GT_066091_20150126_20180202_01_T2,LANDSAT_8,OLI_TIRS,2015-01-26,01,T2,2015-01-26T21:24:43.3704780Z,L1GT,66,91,94.98,-43.51716,-45.68406,-177.72298,-174.66884,1075234161,gs://gcp-public-data-landsat/LC08/01/066/091/LC08_L1GT_066091_20150126_20180202_01_T2\n" + "LE71730322004290ASN01,LE07_L1TP_173032_20041016_20170118_01_T1,LANDSAT_7,ETM,2004-10-16,01,T1,2004-10-16T07:51:09.5270348Z,L1TP,173,32,31.0,41.30611,39.3474,38.02284,40.86797,207913942,gs://gcp-public-data-landsat/LE07/01/173/032/LE07_L1TP_173032_20041016_20170118_01_T1\n", + "LT51320261997215BJC00,LT05_L1TP_132026_19970803_20161230_01_T1,LANDSAT_5,TM,1997-08-03,01,T1,1997-08-03T03:17:51.5040130Z,L1TP,132,26,63.0,49.83572,47.82939,104.30052,107.6158,136383230,gs://gcp-public-data-landsat/LT05/01/132/026/LT05_L1TP_132026_19970803_20161230_01_T1\n", + "LT51510262008107BJC01,LT05_L1GS_151026_20080416_20180611_01_T2,LANDSAT_5,TM,2008-04-16,01,T2,2008-04-16T05:33:27.4160940Z,L1GS,151,26,87.0,49.86189,47.86585,74.77182,78.13782,141793569,gs://gcp-public-data-landsat/LT05/01/151/026/LT05_L1GS_151026_20080416_20180611_01_T2\n" ] } ], @@ -325,7 +365,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "c5e300c3-e1f3-4cd4-9679-77725e61c4db", "metadata": {}, "outputs": [ @@ -369,7 +409,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "id": "c9872510-4265-4b0e-aeb5-5a829ff69b24", "metadata": {}, "outputs": [ @@ -399,7 +439,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "id": "6912a9ec-0f9b-4500-ba20-d4280592b323", "metadata": {}, "outputs": [ @@ -407,18 +447,28 @@ "name": "stdout", "output_type": "stream", "text": [ - "gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1\n", - "gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1\n" + "gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20210127_20210305_01_T2\n", + "gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20160215_20170224_01_T2\n" ] } ], "source": [ - "head --lines=100000 data/index.csv | python3 search.py" + "head --lines=200000 data/index.csv | python3 search.py" + ] + }, + { + "cell_type": "markdown", + "id": "a76f24f8-3b2d-4c0d-880b-f2911b9d9b84", + "metadata": {}, + "source": [ + "## Download the Data\n", + "\n", + "Now that we have a list of folders we are interested, we will now download them with a simple script that takes bucket addresses (URL's) and downloads them with the `gsutil` program." ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "id": "3572c518-df83-4906-bfa6-a37bde2a5063", "metadata": {}, "outputs": [ @@ -443,9 +493,17 @@ "cat download.sh" ] }, + { + "cell_type": "markdown", + "id": "a02100bd-f8c5-42dd-975e-a9eb5369bc81", + "metadata": {}, + "source": [ + "Get the first 2 datasets" + ] + }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "id": "cccec3e1-0dcd-4e3b-a059-a884f5219b66", "metadata": { "scrolled": true, @@ -456,45 +514,143 @@ "name": "stdout", "output_type": "stream", "text": [ - "+++ gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_ANG.txt...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B1.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B11.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B10.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B2.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B8.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B9.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_BQA.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B3.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B4.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B6.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B5.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_MTL.txt...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20160521_20170223_01_T1/LC08_L1TP_025033_20160521_20170223_01_T1_B7.TIF...\n", - "- [14/14 files][ 1021 MiB/ 1021 MiB] 100% Done \n", - "Operation completed over 14 objects/1021.8 MiB. \n", - "+++ gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_ANG.txt...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B10.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B1.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B2.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B11.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B8.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B4.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_MTL.txt...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B9.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B3.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B7.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B6.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_BQA.TIF...\n", - "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1TP_025033_20171218_20171224_01_T1/LC08_L1TP_025033_20171218_20171224_01_T1_B5.TIF...\n", - "- [14/14 files][ 1.0 GiB/ 1.0 GiB] 100% Done \n", - "Operation completed over 14 objects/1.0 GiB. \n" + "+++ gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20210127_20210305_01_T2\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20210127_20210305_01_T2/LC08_L1GT_025033_20210127_20210305_01_T2_B1.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20210127_20210305_01_T2/LC08_L1GT_025033_20210127_20210305_01_T2_ANG.txt...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20210127_20210305_01_T2/LC08_L1GT_025033_20210127_20210305_01_T2_B10.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20210127_20210305_01_T2/LC08_L1GT_025033_20210127_20210305_01_T2_B4.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20210127_20210305_01_T2/LC08_L1GT_025033_20210127_20210305_01_T2_B8.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20210127_20210305_01_T2/LC08_L1GT_025033_20210127_20210305_01_T2_B11.TIF...\n", + "==> NOTE: You are downloading one or more large file(s), which would \n", + "run significantly faster if you enabled sliced object downloads. This\n", + "feature is enabled by default but requires that compiled crcmod be\n", + "installed (see \"gsutil help crcmod\").\n", + "\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20210127_20210305_01_T2/LC08_L1GT_025033_20210127_20210305_01_T2_B9.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20210127_20210305_01_T2/LC08_L1GT_025033_20210127_20210305_01_T2_B5.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20210127_20210305_01_T2/LC08_L1GT_025033_20210127_20210305_01_T2_B3.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20210127_20210305_01_T2/LC08_L1GT_025033_20210127_20210305_01_T2_BQA.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20210127_20210305_01_T2/LC08_L1GT_025033_20210127_20210305_01_T2_B7.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20210127_20210305_01_T2/LC08_L1GT_025033_20210127_20210305_01_T2_B2.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20210127_20210305_01_T2/LC08_L1GT_025033_20210127_20210305_01_T2_B6.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20210127_20210305_01_T2/LC08_L1GT_025033_20210127_20210305_01_T2_MTL.txt...\n", + "| [14/14 files][841.7 MiB/841.7 MiB] 100% Done \n", + "Operation completed over 14 objects/841.7 MiB. \n", + "+++ gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20160215_20170224_01_T2\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20160215_20170224_01_T2/LC08_L1GT_025033_20160215_20170224_01_T2_ANG.txt...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20160215_20170224_01_T2/LC08_L1GT_025033_20160215_20170224_01_T2_B1.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20160215_20170224_01_T2/LC08_L1GT_025033_20160215_20170224_01_T2_B10.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20160215_20170224_01_T2/LC08_L1GT_025033_20160215_20170224_01_T2_B11.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20160215_20170224_01_T2/LC08_L1GT_025033_20160215_20170224_01_T2_B2.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20160215_20170224_01_T2/LC08_L1GT_025033_20160215_20170224_01_T2_B9.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20160215_20170224_01_T2/LC08_L1GT_025033_20160215_20170224_01_T2_B4.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20160215_20170224_01_T2/LC08_L1GT_025033_20160215_20170224_01_T2_B5.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20160215_20170224_01_T2/LC08_L1GT_025033_20160215_20170224_01_T2_B3.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20160215_20170224_01_T2/LC08_L1GT_025033_20160215_20170224_01_T2_B8.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20160215_20170224_01_T2/LC08_L1GT_025033_20160215_20170224_01_T2_BQA.TIF...\n", + "==> NOTE: You are downloading one or more large file(s), which would\n", + "run significantly faster if you enabled sliced object downloads. This\n", + "feature is enabled by default but requires that compiled crcmod be\n", + "installed (see \"gsutil help crcmod\").\n", + "\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20160215_20170224_01_T2/LC08_L1GT_025033_20160215_20170224_01_T2_MTL.txt...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20160215_20170224_01_T2/LC08_L1GT_025033_20160215_20170224_01_T2_B6.TIF...\n", + "Copying gs://gcp-public-data-landsat/LC08/01/025/033/LC08_L1GT_025033_20160215_20170224_01_T2/LC08_L1GT_025033_20160215_20170224_01_T2_B7.TIF...\n", + "/ [14/14 files][976.2 MiB/976.2 MiB] 100% Done \n", + "Operation completed over 14 objects/976.2 MiB. \n" + ] + } + ], + "source": [ + "head --lines=200000 data/index.csv | python3 search.py | head --lines=2 | bash download.sh" + ] + }, + { + "cell_type": "markdown", + "id": "14c23aab-a6e8-439d-8e55-44b7148f74d5", + "metadata": {}, + "source": [ + "Check that the data was downloaded" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "a37c1567-14b5-4dc7-bc27-d1b84411fce1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 2664744\n", + "drwxr-xr-x 2 learner learner 4096 Dec 13 15:13 \u001b[0m\u001b[01;34mLC08_L1GT_025033_20160215_20170224_01_T2\u001b[0m\n", + "drwxr-xr-x 2 learner learner 4096 Dec 13 15:12 \u001b[01;34mLC08_L1GT_025033_20210127_20210305_01_T2\u001b[0m\n", + "-rw-r--r-- 1 learner learner 2728684265 Dec 13 15:11 index.csv\n" + ] + } + ], + "source": [ + "ls -l data" + ] + }, + { + "cell_type": "markdown", + "id": "c0257075-537c-4510-bafd-72e9756db17b", + "metadata": {}, + "source": [ + "## Exporting the Results\n", + "\n", + "Now that we have the data we will create a bucket to place the results. We will first create a bucket with a reasonable set of options.\n", + "\n", + "We fisrt store the bucket name in the `BUCKET` environment variable for future use. This time we will specify a realistic set of options for a private bucket used for computation.\n", + "\n", + "Options (run `gsutil mb --help` for more information):\n", + " * `-b on` specifies uniform bucket-level access.\n", + " * `-l $REGION` puts the data in a specific region for lower cost and lower latency.\n", + " * `--pap enforced` turns on public access prevention to help keep data private. \n", + " \n", + "The uniform bucket level access (Bucket Policy Only enabled: true) puts the data access permissions (ACL) on the entire bucket, not on each object in the bucket. This makes the permissions obvious and makes security much more predictable.\n", + " \n", + "As usual, we must set our environment. In this case we also set a `REGION` environment variable to indicate where in the world we want the data to be stored.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "f36cb8c5-f305-4cb2-a5cc-0c9fd8592fb4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bucket: essentials-learner-2021-12-13 region: us-west2\n" + ] + } + ], + "source": [ + "BUCKET=\"essentials-${USER}-$(date +%F)\"\n", + "REGION=\"us-west2\"\n", + "echo \"bucket: $BUCKET region: $REGION\"" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "c2ae2b74-5e93-4c55-8bd7-63337f7dcbb8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Creating gs://essentials-learner-2021-12-13/...\n" ] } ], "source": [ - "head --lines=100000 data/index.csv | python3 search.py | bash download.sh" + "gsutil mb -b on -l $REGION --pap enforced \"gs://$BUCKET\"" ] } ],