Raster Data
DSE EcoTech Series (2024)
license: CC-BY-4.0
IMPORTS¶
from typing import Any, Union
import sys
from pathlib import Path
from pprint import pprint
import rich.pretty as rp
import matplotlib.pyplot as plt
from io import BytesIO
from urllib.request import urlopen
import numpy as np
import xarray as xr
import rasterio as rio
import rioxarray as rxr
import localtileserver
import leafmap
CONSTANTS¶
DATA_FOLDER = '../data/raster'
S2_ZARR_PATH = f'{DATA_FOLDER}/sentinel_2.zarr'
PATHS = [p for p in Path(DATA_FOLDER).glob(f'*.tif')]
READING DATA¶
RASTERIO¶
rasterio is a great package for working with GIS data. Here we’ll simply be using it to read in raster data.
The read_rio(...)
method below returns a numpy array (arr
) containing the pixel-values and a dictionary
containing geospatial and other meta-data (profile
)
print('GEOTIFFS:')
for i, p in enumerate(PATHS):
print(i, p.name)
GEOTIFFS:
0 tahoe-dem.tif
1 tahoe-mean-s2-20210101_20210301.tif
def visualize_arrays(s2_arr: np.ndarray, dem_arr: np.ndarray) -> None:
""" visualize sentinel-2 and dem data
1. compute an S2 RGB array uint8 array for visualization
2. select the elevation band of the DEM array
"""
s2_rgb =(s2_arr[[4,3,2]].swapaxes(0,1).swapaxes(1,2) / 10000)
s2_rgb = ((s2_rgb - 0.0) * 255 / 0.5).clip(0,255).round().astype(int)
elevation = dem_arr[0]
s2_rgb.shape, elevation.shape
fig, axs = plt.subplots(1, 2, figsize=(14, 6))
plt.suptitle("Sentinel-2 and Elevation data from Tahoe Region", fontsize=24, color='#333', fontweight='light')
axs[0].axis('off')
axs[1].axis('off')
axs[0].set_title('DEM', fontsize=13, color='#666')
axs[0].imshow(elevation, cmap="hot")
axs[1].set_title('S2 (mean 20210101-20210301)', fontsize=13, color='#666')
axs[1].imshow(s2_rgb)
plt.show()
def print_raster_info(name, arr: np.ndarray, profile: dict[str, Any]) -> None:
""" prints out information about the raster """
print('-' * 25)
print(f'{name}')
print('-' * 25)
print('shape:', arr.shape)
print('mean:', np.nanmean(arr))
print('max:', np.nanmax(arr))
print('min:', np.nanmin(arr))
print('-' * 10)
for k, v in profile.items():
print(f'{k}:', v)
def line():
print('-' * 75)
def read_rio(path: str) -> tuple[np.ndarray, dict[str, Any]]:
""" read a geotiff using rasterio """
with rio.open(path,'r') as src:
profile = src.profile
arr = src.read()
return arr, profile
dem_arr, dem_profile = read_rio(PATHS[0])
s2_arr, s2_profile = read_rio(PATHS[1])
line()
print('Sentinel-2:', s2_arr.shape)
rp.pprint(s2_profile)
line()
visualize_arrays(s2_arr, dem_arr)
One can also use GDAL from the command line to see the geospatial data
!gdalinfo ../data/raster/tahoe-dem.tif
Driver: GTiff/GeoTIFF
Files: ../data/raster/tahoe-dem.tif
Size is 4118, 3756
Coordinate System is:
GEOGCRS["WGS 84",
ENSEMBLE["World Geodetic System 1984 ensemble",
MEMBER["World Geodetic System 1984 (Transit)"],
MEMBER["World Geodetic System 1984 (G730)"],
MEMBER["World Geodetic System 1984 (G873)"],
MEMBER["World Geodetic System 1984 (G1150)"],
MEMBER["World Geodetic System 1984 (G1674)"],
MEMBER["World Geodetic System 1984 (G1762)"],
MEMBER["World Geodetic System 1984 (G2139)"],
MEMBER["World Geodetic System 1984 (G2296)"],
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]],
ENSEMBLEACCURACY[2.0]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
CS[ellipsoidal,2],
AXIS["geodetic latitude (Lat)",north,
ORDER[1],
ANGLEUNIT["degree",0.0174532925199433]],
AXIS["geodetic longitude (Lon)",east,
ORDER[2],
ANGLEUNIT["degree",0.0174532925199433]],
USAGE[
SCOPE["Horizontal component of 3D system."],
AREA["World."],
BBOX[-90,-180,90,180]],
ID["EPSG",4326]]
Data axis to CRS axis mapping: 2,1
Origin = (-120.845683933121819,39.653702766189156)
Pixel Size = (0.000269494585236,-0.000269494585236)
Metadata:
AREA_OR_POINT=Area
Image Structure Metadata:
COMPRESSION=LZW
INTERLEAVE=PIXEL
Corner Coordinates:
Upper Left (-120.8456839, 39.6537028) (120d50'44.46"W, 39d39'13.33"N)
Lower Left (-120.8456839, 38.6414811) (120d50'44.46"W, 38d38'29.33"N)
Upper Right (-119.7359052, 39.6537028) (119d44' 9.26"W, 39d39'13.33"N)
Lower Right (-119.7359052, 38.6414811) (119d44' 9.26"W, 38d38'29.33"N)
Center (-120.2907946, 39.1475919) (120d17'26.86"W, 39d 8'51.33"N)
Band 1 Block=256x256 Type=Float32, ColorInterp=Gray
Description = DEM
Band 2 Block=256x256 Type=Float32, ColorInterp=Undefined
Description = slope
Band 3 Block=256x256 Type=Float32, ColorInterp=Undefined
Description = aspect
VISUALIZE THE DATA¶
We can now visualize these images. First I’ll compute an RGB array uint8 array, and select the elevation band from the dem
Note I’m swapping axes in order to use matplotlib for plotting
XARRAY¶
RIOXARRY¶
rioxarray allows one to read the tif directly into an xarray format
NOTE: the reading of the data happens lazily when you call ds.data
da = rxr.open_rasterio(PATHS[1])
print('array equality:', np.array_equal(da.data, s2_arr, equal_nan=True))
da
There a few advantages using XARRAY:
- we now have access to the lat, lon values
ds.x,y
- we also have access to the band-names
ds.long_name
whose order corresponds with the returned numpy array
When working with xarray data the zarr data-type is a good option. We’ll first convert the data-array to a dataset and then save it
ds = da.to_dataset(name='s2')
ds
perhaps it would be nicer to have the data organized by band names
def bandify_data_array(da: xr.DataArray, bands: Union[list[str], str]='long_name') -> xr.Dataset:
""" transform data-array to dataset with band-valued data_vars """
if isinstance(bands, str):
bands = list(da.attrs[bands])
return xr.Dataset(
data_vars={b: (['y', 'x'], da.data[i]) for i, b in enumerate(bands)},
coords=dict(
x=("x", da.x.data),
y=("y", da.y.data)
),
attrs=da.attrs)
ds = bandify_data_array(da)
(ds.B8-ds.B4).plot()
plt.title('NDVI')
plt.show()
ds
if Path(S2_ZARR_PATH).exists():
print(f'[INFO]: zarr-output exists: to re-run first delete zarr-directory ({S2_ZARR_PATH})')
else:
ds.to_zarr(S2_ZARR_PATH)
[INFO]: zarr-output exists: to re-run first delete zarr-directory (../data/raster/sentinel_2.zarr)
COG¶
!rio cogeo create ../data/raster/tahoe-mean-s2-20210101_20210301.tif ../data/raster/cogs/tahoe-mean-s2-20210101_20210301.tif
Reading input: /Users/brookieguzder-williams/code/dse/map_something/repo/data/raster/tahoe-mean-s2-20210101_20210301.tif
[####################################] 100%
Adding overviews...
Updating dataset tags...
Writing output to: ../data/raster/cogs/tahoe-mean-s2-20210101_20210301.tif
!rio cogeo create ../data/raster/tahoe-dem.tif ../data/raster/cogs/tahoe-dem.tif
Reading input: /Users/brookieguzder-williams/code/dse/map_something/repo/data/raster/tahoe-dem.tif
[####################################] 100%
Adding overviews...
Updating dataset tags...
Writing output to: ../data/raster/cogs/tahoe-dem.tif
!gdalinfo ../data/raster/cogs/tahoe-mean-s2-20210101_20210301.tif
Driver: GTiff/GeoTIFF
Files: ../data/raster/cogs/tahoe-mean-s2-20210101_20210301.tif
Size is 4118, 3756
Coordinate System is:
GEOGCRS["WGS 84",
ENSEMBLE["World Geodetic System 1984 ensemble",
MEMBER["World Geodetic System 1984 (Transit)"],
MEMBER["World Geodetic System 1984 (G730)"],
MEMBER["World Geodetic System 1984 (G873)"],
MEMBER["World Geodetic System 1984 (G1150)"],
MEMBER["World Geodetic System 1984 (G1674)"],
MEMBER["World Geodetic System 1984 (G1762)"],
MEMBER["World Geodetic System 1984 (G2139)"],
MEMBER["World Geodetic System 1984 (G2296)"],
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]],
ENSEMBLEACCURACY[2.0]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
CS[ellipsoidal,2],
AXIS["geodetic latitude (Lat)",north,
ORDER[1],
ANGLEUNIT["degree",0.0174532925199433]],
AXIS["geodetic longitude (Lon)",east,
ORDER[2],
ANGLEUNIT["degree",0.0174532925199433]],
USAGE[
SCOPE["Horizontal component of 3D system."],
AREA["World."],
BBOX[-90,-180,90,180]],
ID["EPSG",4326]]
Data axis to CRS axis mapping: 2,1
Origin = (-120.845683933121819,39.653702766189156)
Pixel Size = (0.000269494585236,-0.000269494585236)
Metadata:
OVR_RESAMPLING_ALG=NEAREST
AREA_OR_POINT=Area
Image Structure Metadata:
LAYOUT=COG
COMPRESSION=DEFLATE
INTERLEAVE=PIXEL
Corner Coordinates:
Upper Left (-120.8456839, 39.6537028) (120d50'44.46"W, 39d39'13.33"N)
Lower Left (-120.8456839, 38.6414811) (120d50'44.46"W, 38d38'29.33"N)
Upper Right (-119.7359052, 39.6537028) (119d44' 9.26"W, 39d39'13.33"N)
Lower Right (-119.7359052, 38.6414811) (119d44' 9.26"W, 38d38'29.33"N)
Center (-120.2907946, 39.1475919) (120d17'26.86"W, 39d 8'51.33"N)
Band 1 Block=512x512 Type=Float64, ColorInterp=Gray
Description = B1
Overviews: 2059x1878, 1030x939, 515x470
Band 2 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = B2
Overviews: 2059x1878, 1030x939, 515x470
Band 3 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = B3
Overviews: 2059x1878, 1030x939, 515x470
Band 4 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = B4
Overviews: 2059x1878, 1030x939, 515x470
Band 5 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = B5
Overviews: 2059x1878, 1030x939, 515x470
Band 6 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = B6
Overviews: 2059x1878, 1030x939, 515x470
Band 7 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = B7
Overviews: 2059x1878, 1030x939, 515x470
Band 8 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = B8
Overviews: 2059x1878, 1030x939, 515x470
Band 9 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = B8A
Overviews: 2059x1878, 1030x939, 515x470
Band 10 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = B9
Overviews: 2059x1878, 1030x939, 515x470
Band 11 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = B11
Overviews: 2059x1878, 1030x939, 515x470
Band 12 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = B12
Overviews: 2059x1878, 1030x939, 515x470
Band 13 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = AOT
Overviews: 2059x1878, 1030x939, 515x470
Band 14 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = WVP
Overviews: 2059x1878, 1030x939, 515x470
Band 15 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = SCL
Overviews: 2059x1878, 1030x939, 515x470
Band 16 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = TCI_R
Overviews: 2059x1878, 1030x939, 515x470
Band 17 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = TCI_G
Overviews: 2059x1878, 1030x939, 515x470
Band 18 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = TCI_B
Overviews: 2059x1878, 1030x939, 515x470
Band 19 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = MSK_CLDPRB
Overviews: 2059x1878, 1030x939, 515x470
Band 20 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = MSK_SNWPRB
Overviews: 2059x1878, 1030x939, 515x470
Band 21 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = QA10
Overviews: 2059x1878, 1030x939, 515x470
Band 22 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = QA20
Overviews: 2059x1878, 1030x939, 515x470
Band 23 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = QA60
Overviews: 2059x1878, 1030x939, 515x470
Band 24 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = MSK_CLASSI_OPAQUE
Overviews: 2059x1878, 1030x939, 515x470
Band 25 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = MSK_CLASSI_CIRRUS
Overviews: 2059x1878, 1030x939, 515x470
Band 26 Block=512x512 Type=Float64, ColorInterp=Undefined
Description = MSK_CLASSI_SNOW_ICE
Overviews: 2059x1878, 1030x939, 515x470
cog, cog_profile = read_rio('../data/raster/cogs/tahoe-mean-s2-20210101_20210301.tif')
print(cog.shape)
rp.pprint(cog_profile)
rp.pprint(s2_profile)