From 44e1ac6931b5cc2728be6c083e1f8a44a1ce7b6b Mon Sep 17 00:00:00 2001 From: John Hicks Date: Mon, 24 Oct 2022 16:43:10 -0400 Subject: [PATCH] change landsat dataset --- azure-landsat/landsat-c2.py | 249 +++++++++++++++++++++++++++++ azure-landsat/landsat-hls-azure.py | 170 -------------------- azure-landsat/setup.sh | 12 ++ 3 files changed, 261 insertions(+), 170 deletions(-) create mode 100755 azure-landsat/landsat-c2.py delete mode 100755 azure-landsat/landsat-hls-azure.py diff --git a/azure-landsat/landsat-c2.py b/azure-landsat/landsat-c2.py new file mode 100755 index 0000000..e62fac4 --- /dev/null +++ b/azure-landsat/landsat-c2.py @@ -0,0 +1,249 @@ +#!/usr/bin/env python3 +# reference - https://planetarycomputer.microsoft.com/docs/concepts/sas/ +# source - https://github.com/microsoft/AIforEarthDataSets/blob/main/data/landsat-7.ipynb +# get key curl https://planetarycomputer.microsoft.com/api/sas/v1/token/landsateuwest/landsat-c2 | cut -d '"' -f 8 > file + +import os +import datetime +import numpy as np +import matplotlib.pyplot as plt + +import rasterio +import rasterio.features +from rasterio.windows import Window +from pyproj import Transformer + +from azure.storage.blob import ContainerClient +from cmr import GranuleQuery + +from lxml import etree +import unicodedata + +# Let's take a look at an area near Shizuoka, Japan +query_lat = 34.60161789420915 +query_lon = 138.2283853469587 + +query_short_name = 'Landsat7_ETM_Plus_C1' + +query_start_date = datetime.datetime(2020, 1, 1, 0, 0, 0) +query_end_date = datetime.datetime(2020, 12, 1, 0, 0, 0) + +# This should be a text file with a SAS token for the Landsat container on the first line +sas_file = os.path.expanduser('./tokens/landsat_ro_sas.txt') + +# Maps instrument names as they appear in CMR results to the short names used in filenames +instrument_mapping = { + 'Landsat 7 Enhanced Thematic Mapper Plus (ETM+) Collection 1 V1': + 'etm'} + +# Choose bands for the RGB composite we're going to make +rgb_bands = ['B3','B2','B1'] + +# Normalization value for rendering +composite_norm_value = 25000 + +# Select a thumbnail for thumbnail rendering (six are available for Landsat C2 files) +thumbnail_index = -3 + +# When rendering whole images, how much should we downscale? +dsfactor = 10 + +# We're going to render a nice image at the end, set the size and "zoom" +target_size = [800,400] +oversample = 3.0 + +lines = [] +with open(sas_file,'r') as f: + lines = f.readlines() +assert len(lines) >= 1 +sas_token = lines[0].strip() + +storage_account_name = 'landsateuwest' +container_name = 'landsat-c2' +storage_account_url = 'https://' + storage_account_name + '.blob.core.windows.net/' + +container_client = ContainerClient(account_url=storage_account_url, + container_name=container_name, + credential=sas_token) +api = GranuleQuery() +json_granules = api.short_name(query_short_name).point(query_lon,query_lat).temporal(query_start_date, query_end_date).get() + +print('Found {} granules'.format(len(json_granules))) + +# Retrieve results in echo10 format, which is less convenient than the default .json, but +# includes cloud cover information, which the .json results don't (for Landsat) +api = GranuleQuery() +api.parameters( + short_name=query_short_name, + point=(query_lon, query_lat), + temporal=(query_start_date,query_end_date) +) +xml_results = api.format('echo10').get(len(json_granules)) + +xml = xml_results[0] +xml = unicodedata.normalize('NFKD', xml).encode('ascii', 'ignore') +tree = etree.fromstring(xml) +xml_granules = tree.findall("result/Granule") + +lowest_cloud_cover = None +granule_name = None + +for granule in xml_granules: + attributes = granule.find('AdditionalAttributes') + for a in attributes: + name = a.find('Name').text + if name == 'LandCloudCover' or name == 'LandCloudCoverPct': + cc = float(a.find('Values').find('Value').text) + if lowest_cloud_cover is None or cc < lowest_cloud_cover: + lowest_cloud_cover = cc + granule_name = granule.find('GranuleUR').text + break + +# Find the json-formatted granule with the same name +granule = [g for g in json_granules if g['title'] == granule_name] +assert len(granule) == 1 +granule = granule[0] + +# E.g. 'LE07_L1TP_074086_20200328_20200424_01_T1' +granule_id = granule['title'] +print('Accessing tile {}'.format(granule_id)) + +level = 'level-2' +category = 'standard' +sensor = instrument_mapping[granule['dataset_id']] + +# E.g., 2020-01-03T19:01:46.557Z +date = granule['time_start'] +year = date[0:4] +month = date[5:7] +day = date[8:10] + +path = granule_id[10:13] +row = granule_id[13:16] + +row_folder = '/'.join([level,category,sensor,year,path,row]) + +# E.g. 01152004 +granule_date_string = granule_id[11:19] + +granule_month = granule_date_string[0:2] +granule_day = granule_date_string[2:4] +granule_year = granule_date_string[4:8] + +# E.g. LC08_L1TP_047027_20200103 +scene_prefix = granule_id[0:25] + +# E.g. LC08_L2SP_047027_20200103 +scene_prefix = scene_prefix[0:5] + 'L2SP' + scene_prefix[9:] + +azure_scene_prefix = row_folder + '/' + scene_prefix + +generator = container_client.list_blobs(name_starts_with=azure_scene_prefix) +image_paths = [blob.name for blob in generator if blob.name.endswith('.TIF')] + +print('Found {} images:'.format(len(image_paths))) +for fn in image_paths: + print(fn.split('/')[-1]) + +azure_urls = [storage_account_url + container_name + '/' + p + '?' + sas_token for p in image_paths] + +#print(azure_urls) + +# From https://www.usgs.gov/media/images/common-landsat-band-rgb-composites +rgb_bands = ['B3','B2','B1'] +rgb_urls = [] +for band_name in rgb_bands: + rgb_urls.append([s for s in azure_urls if band_name + '.TIF' in s][0]) + +thumbnail_data = [] + +# url = rgb_urls[0] +for url in rgb_urls: + +# print(url) + + # From: + # + # https://automating-gis-processes.github.io/CSC/notebooks/L5/read-cogs.html + with rasterio.open(url) as raster: + + # List of overviews from biggest to smallest + oviews = raster.overviews(1) + + # Retrieve a small-ish thumbnail (six are available for Landsat files) + decimation_level = oviews[thumbnail_index] + h = int(raster.height/decimation_level) + w = int(raster.width/decimation_level) + + thumbnail_channel = raster.read(1, out_shape=(1, h, w)) / composite_norm_value + thumbnail_data.append(thumbnail_channel) + +rgb1 = np.dstack((thumbnail_data[0],thumbnail_data[1],thumbnail_data[2])) +np.clip(rgb1,0,1,rgb1) + +dpi = 100; fig = plt.figure(frameon=False,figsize=(w/dpi,h/dpi),dpi=dpi) +ax = plt.Axes(fig,[0., 0., 1., 1.]); ax.set_axis_off(); fig.add_axes(ax) + +#plt.imshow(rgb); +plt.imshow(rgb1); +#plt.show() +#plt.imsave('test.png', rgb) + +image_data = [] + +for fn in rgb_urls: + with rasterio.open(fn,'r') as raster: + h = int(raster.height/dsfactor) + w = int(raster.width/dsfactor) + band_array = raster.read(1, out_shape=(1, h, w)) + raster.close() + band_array = band_array / composite_norm_value + image_data.append(band_array) + +rgb2 = np.dstack((image_data[0],image_data[1],image_data[2])) +np.clip(rgb2,0,1,rgb2) + +dpi = 100; fig = plt.figure(frameon=False,figsize=(w/dpi,h/dpi),dpi=dpi) +ax = plt.Axes(fig,[0., 0., 1., 1.]); ax.set_axis_off(); fig.add_axes(ax) + +plt.imshow(rgb2); +#plt.show() + +image_data = [] + +for fn in rgb_urls: + + with rasterio.open(fn,'r') as src: + + # Choose the target image size + xsize = (target_size[0] * oversample) + ysize = (target_size[1] * oversample) + + # Find the transformation from pixels to lat/lon + transformer = Transformer.from_crs('EPSG:4326', src.crs, always_xy=True) + xx, yy = transformer.transform(query_lon, query_lat) + py, px = src.index(xx,yy) + xoff = px - xsize // 2 + yoff = py - ysize // 2 + window = Window(xoff,yoff,xsize,ysize) + + band_array = src.read(1, window=window) + src.close() + band_array = band_array / composite_norm_value + image_data.append(band_array) + + # ...with rasterio.open() + +# ...for each file + +rgb3 = np.dstack((image_data[0],image_data[1],image_data[2])) +np.clip(rgb3,0,1,rgb3) + +w = target_size[0] +h = target_size[1] + +dpi = 50; fig = plt.figure(frameon=False,figsize=(w/dpi,h/dpi),dpi=dpi) +ax = plt.Axes(fig,[0., 0., 1., 1.]); ax.set_axis_off(); fig.add_axes(ax) + +plt.imshow(rgb3); +plt.show(); diff --git a/azure-landsat/landsat-hls-azure.py b/azure-landsat/landsat-hls-azure.py deleted file mode 100755 index 088022c..0000000 --- a/azure-landsat/landsat-hls-azure.py +++ /dev/null @@ -1,170 +0,0 @@ -#!/usr/bin/env python3 -# The python scripe originally came from the following jupyter nodebook -# https://nbviewer.org/github/microsoft/AIforEarthDataSets/blob/main/data/hls.ipynb -# - -# Environment setup -import requests -import numpy as np -import io -import matplotlib.pyplot as plt -import pandas as pd - -import rasterio -from azure.storage.blob import ContainerClient - -container_name = 'hls' -storage_account_name = 'hlssa' -storage_account_url = 'https://' + storage_account_name + '.blob.core.windows.net/' -hls_blob_root = storage_account_url + container_name - -# This file is provided by NASA; it indicates the lat/lon extents of each -# hls tile. -# -# The file originally comes from: -# -# https://hls.gsfc.nasa.gov/wp-content/uploads/2016/10/S2_TilingSystem2-1.txt -# -# ...but as of 8/2019, there is a bug with the column names in the original file, so we -# access a copy with corrected column names. -hls_tile_extents_url = 'https://ai4edatasetspublicassets.blob.core.windows.net/assets/S2_TilingSystem2-1.txt' - -# Load this file into a table, where each row is: -# -# Tile ID, Xstart, Ystart, UZ, EPSG, MinLon, MaxLon, MinLon, MaxLon -s = requests.get(hls_tile_extents_url).content -hls_tile_extents = pd.read_csv(io.StringIO(s.decode('utf-8')),delimiter=r'\s+') -#print('Read tile extents for {} tiles'.format(len(hls_tile_extents))) - -hls_container_client = ContainerClient(account_url=storage_account_url, - container_name=container_name, - credential=None) - -# Functions - -def list_available_tiles(prefix): - """ - List all blobs in an Azure blob container matching a prefix. - - We'll use this to query tiles by location and year. - """ - - files = [] - generator = hls_container_client.list_blobs(name_starts_with=prefix) - for blob in generator: - files.append(blob.name) - return files - - -def lat_lon_to_hls_tile_id(lat,lon): - """ - Get the hls tile ID for a given lat/lon coordinate pair. - """ - - found_matching_tile = False - - for i_row,row in hls_tile_extents.iterrows(): - found_matching_tile = lat >= row.MinLat and lat <= row.MaxLat \ - and lon >= row.MinLon and lon <= row.MaxLon - if found_matching_tile: - break - - if not found_matching_tile: - return None - else: - return row.TilID - -# Build a tile path from components, including lat/lon lookup -# Near Bear Lake, UT -lat = 41.89655047211277; lon = -111.4132464403312 - -tile_id = lat_lon_to_hls_tile_id(lat,lon) -#print('Using Tile ID {}'.format(tile_id)) - -year = '2019' -daynum = '001' # 1-indexed day-of-year -product = 'S30' # 'S30' for Sentinel, 'L30' for Landsat -sband = '_01' -version = 'v1.4' # Currently always v1.4 - -#blob_name = product + '/HLS.' + product + '.T' + tile_id + '.' + year + daynum + '.' + version \ -# + sband + '.tif' -#sentinel_url = hls_blob_root + '/' + blob_name -#print('Constructed tile URL:\n{}'.format(sentinel_url)) - -# Display an RGB composite of the Sentinel-2 image we just chose -# Bands 2, 3, and 4 are B, G, and R in Sentinel-2 HLS images -composite_bands = [4,3,2] - -#urls = [sentinel_url.replace(sband,'_' + str(b).zfill(2)) for b in composite_bands] -#print('Rendering URLs:') -#for s in urls: -# print(s) - -#image_data = [] - -composite_downsample_factor = 5 -composite_norm_value = 15000 - -#for fn in urls: -# with rasterio.open(fn,'r') as raster: -# h = int(raster.height/composite_downsample_factor) -# w = int(raster.width/composite_downsample_factor) -# band_array = raster.read(1, out_shape=(1, h, w)) -# raster.close() -# band_array = band_array / composite_norm_value -# image_data.append(band_array) - -#rgb = np.dstack((image_data[0],image_data[1],image_data[2])) -#np.clip(rgb,0,1,rgb) - -#dpi = 100; fig = plt.figure(frameon=False,figsize=(w/dpi,h/dpi),dpi=dpi) -#ax = plt.Axes(fig,[0., 0., 1., 1.]); ax.set_axis_off(); fig.add_axes(ax) -#plt.imshow(rgb); -#plt.imsave('test01.png', rgb) - -i_daynum = int(daynum) -product = 'L30' - -# Try days sequentially until we find one where there's a Landsat image for this day -while True: - - daynum = str(i_daynum).zfill(3) - prefix = product + '/HLS.' + product + '.T' + tile_id + '.' + year + daynum - print('Finding tiles with prefix {}'.format(prefix)) - matches = list_available_tiles(prefix) - - if len(matches) == 0: - print('No matching tiles') - i_daynum += 1 - else: - break - -blob_name = matches[0] -landsat_url = hls_blob_root + '/' + blob_name -print('Found {} matching tiles, e.g.:\n{}'.format(len(matches),landsat_url)) - -# Display a composite of the Landsat image we found -urls = [landsat_url.replace(sband,'_' + str(b).zfill(2)) for b in composite_bands] -print('Rendering URLs:') -for s in urls: - print(s) - -image_data = [] -for fn in urls: - with rasterio.open(fn,'r') as raster: - h = int(raster.height/composite_downsample_factor) - w = int(raster.width/composite_downsample_factor) - band_array = raster.read(1, out_shape=(1, h, w)) - raster.close() - band_array = band_array / composite_norm_value - image_data.append(band_array) - -rgb1 = np.dstack((image_data[0],image_data[1],image_data[2])) -np.clip(rgb1,0,1,rgb1) - -dpi = 100; fig = plt.figure(frameon=False,figsize=(w/dpi,h/dpi),dpi=dpi) -ax = plt.Axes(fig,[0., 0., 1., 1.]); ax.set_axis_off(); fig.add_axes(ax) -#plt.imshow(rgb1); -#plt.show() -plt.imsave('test.png', rgb1) diff --git a/azure-landsat/setup.sh b/azure-landsat/setup.sh index 85b49e7..2af4a10 100755 --- a/azure-landsat/setup.sh +++ b/azure-landsat/setup.sh @@ -7,3 +7,15 @@ pip3 install matplotlib pip3 install pandas pip3 install rasterio pip3 install azure.storage.blob +pip3 install pyproj +pip3 install python-cmr +pip3 install lxml + +#mkdir ./tokens +if [ ! -d ./tokens ]; then + mkdir -p ./tokens; +fi + +key=$(curl -s https://planetarycomputer.microsoft.com/api/sas/v1/token/landsateuwest/landsat-c2 | cut -d '"' -f 8 > tokens/file) +echo $key +