Windowed Reading with EOReader#
Let’s see how to read a window instead of the whole bands of a given product and the potential time-consumption gain.
Warning: The durations shown hereunder may not be representative of your computer’s performances. Please take it as a hint about relative performances between constellations.
Try with Landsat-8#
Let’s open a Landsat-8 OLI collection 2 tile.
# Imports
import os
import geopandas as gpd
import rasterio
from shapely.geometry import box
from rasterio.windows import Window, from_bounds
from eoreader.reader import Reader
from eoreader.bands import NDWI, SWIR_1
import hvplot.pandas
import hvplot.xarray
hvplot.extension('matplotlib')
/home/docs/checkouts/readthedocs.org/user_builds/eoreader/envs/latest/lib/python3.9/site-packages/dask/dataframe/__init__.py:42: FutureWarning:
Dask dataframe query planning is disabled because dask-expr is not installed.
You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.
warnings.warn(msg, FutureWarning)
# Open the product
folder = os.path.join("/home", "ds2_db3", "CI", "eoreader")
path = os.path.join(folder, "optical", "LC08_L1TP_200030_20201220_20210310_02_T1.tar")
reader = Reader()
prod = reader.open(path)
band = NDWI
There is no existing products in EOReader corresponding to /home/ds2_db3/CI/eoreader/optical/LC08_L1TP_200030_20201220_20210310_02_T1.tar.
Windows in EOReader#
There are several ways to read a band with a window in EOReader:
Using a vector stored on disk
Using a
geopandas.GeoDataFrame
Using a tuple containing the bounds (in the same CRS than the bands, i.e UTM accessible through
prod.crs()
)Using directly a window. Note that if you want to pass pixel bounds, you must first create a
rasterio.windows.Window
⚠ Even if a vector is passed to the window keyword, only its extent will be used.
# Here we have a window as a path to a vector
window_path = os.path.join(folder, "others", "20201220T104856_L8_200030_OLI_TIRS_window.geojson")
window_path
'/home/ds2_db3/CI/eoreader/others/20201220T104856_L8_200030_OLI_TIRS_window.geojson'
# Here we have the vector directly opened as a geopanas.GeoDataFrame
window_gdf = gpd.read_file(window_path)
window_gdf.to_crs("EPSG:4326").hvplot(coastline="10m")
---------------------------------------------------------------------------
DataSourceError Traceback (most recent call last)
Cell In[4], line 2
1 # Here we have the vector directly opened as a geopanas.GeoDataFrame
----> 2 window_gdf = gpd.read_file(window_path)
3 window_gdf.to_crs("EPSG:4326").hvplot(coastline="10m")
File ~/checkouts/readthedocs.org/user_builds/eoreader/envs/latest/lib/python3.9/site-packages/geopandas/io/file.py:294, in _read_file(filename, bbox, mask, columns, rows, engine, **kwargs)
291 from_bytes = True
293 if engine == "pyogrio":
--> 294 return _read_file_pyogrio(
295 filename, bbox=bbox, mask=mask, columns=columns, rows=rows, **kwargs
296 )
298 elif engine == "fiona":
299 if pd.api.types.is_file_like(filename):
File ~/checkouts/readthedocs.org/user_builds/eoreader/envs/latest/lib/python3.9/site-packages/geopandas/io/file.py:547, in _read_file_pyogrio(path_or_bytes, bbox, mask, rows, **kwargs)
538 warnings.warn(
539 "The 'include_fields' and 'ignore_fields' keywords are deprecated, and "
540 "will be removed in a future release. You can use the 'columns' keyword "
(...)
543 stacklevel=3,
544 )
545 kwargs["columns"] = kwargs.pop("include_fields")
--> 547 return pyogrio.read_dataframe(path_or_bytes, bbox=bbox, **kwargs)
File ~/checkouts/readthedocs.org/user_builds/eoreader/envs/latest/lib/python3.9/site-packages/pyogrio/geopandas.py:265, in read_dataframe(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, fid_as_index, use_arrow, on_invalid, arrow_to_pandas_kwargs, **kwargs)
260 if not use_arrow:
261 # For arrow, datetimes are read as is.
262 # For numpy IO, datetimes are read as string values to preserve timezone info
263 # as numpy does not directly support timezones.
264 kwargs["datetime_as_string"] = True
--> 265 result = read_func(
266 path_or_buffer,
267 layer=layer,
268 encoding=encoding,
269 columns=columns,
270 read_geometry=read_geometry,
271 force_2d=gdal_force_2d,
272 skip_features=skip_features,
273 max_features=max_features,
274 where=where,
275 bbox=bbox,
276 mask=mask,
277 fids=fids,
278 sql=sql,
279 sql_dialect=sql_dialect,
280 return_fids=fid_as_index,
281 **kwargs,
282 )
284 if use_arrow:
285 meta, table = result
File ~/checkouts/readthedocs.org/user_builds/eoreader/envs/latest/lib/python3.9/site-packages/pyogrio/raw.py:198, in read(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, return_fids, datetime_as_string, **kwargs)
59 """Read OGR data source into numpy arrays.
60
61 IMPORTANT: non-linear geometry types (e.g., MultiSurface) are converted
(...)
194
195 """
196 dataset_kwargs = _preprocess_options_key_value(kwargs) if kwargs else {}
--> 198 return ogr_read(
199 get_vsi_path_or_buffer(path_or_buffer),
200 layer=layer,
201 encoding=encoding,
202 columns=columns,
203 read_geometry=read_geometry,
204 force_2d=force_2d,
205 skip_features=skip_features,
206 max_features=max_features or 0,
207 where=where,
208 bbox=bbox,
209 mask=_mask_to_wkb(mask),
210 fids=fids,
211 sql=sql,
212 sql_dialect=sql_dialect,
213 return_fids=return_fids,
214 dataset_kwargs=dataset_kwargs,
215 datetime_as_string=datetime_as_string,
216 )
File pyogrio/_io.pyx:1240, in pyogrio._io.ogr_read()
File pyogrio/_io.pyx:220, in pyogrio._io.ogr_open()
DataSourceError: /home/ds2_db3/CI/eoreader/others/20201220T104856_L8_200030_OLI_TIRS_window.geojson: No such file or directory
# Here we have an array of the bounds (tuple or list also works)
window_bounds = window_gdf.bounds.values[0]
window_bounds
array([ 579046.01517122, 4765387.52325064, 678660.50985682,
4834693.77806381])
# Here we construct a Window containing directly the pixels
# It is not necessary to have a window with integers values, they will be rounded when needed.
# However, it is better to control exactly the pixels you want so it is advised to round the values yourself
with rasterio.open(str(prod.get_band_paths([SWIR_1])[SWIR_1])) as ds:
window_pix = from_bounds(*window_bounds, ds.transform)
window_pix
Window(col_off=1932.033839040505, row_off=2154.040731206187, width=3320.4831561867395, height=2310.2084937724576)
# Create function to time it easily
def load(prod, window):
"""
Function that loads the wanted band over a proposed window and cleans the temporary directory.
"""
ds = prod.load(
band,
window=window
)
prod.clean_tmp()
return ds[band]
Time without window#
Pass None
to load without any window
%time band_arr = load(prod, None)
CPU times: user 1min 12s, sys: 3min 54s, total: 5min 6s
Wall time: 8min 26s
Time with window#
Use directly the window path.
The keyword used to load with a window is window
.
%time band_arr_win_path = load(prod, window_path)
CPU times: user 4.13 s, sys: 0 ns, total: 4.13 s
Wall time: 7.37 s
Read with a window in different ways#
Here, try to load with:
geopandas.GeoDataFrame
Array of bounds
rasterio.windows.Window
%time band_arr_win_gdf = load(prod, window_gdf)
CPU times: user 7.89 s, sys: 0 ns, total: 7.89 s
Wall time: 5.54 s
%time band_arr_win_bounds = load(prod, window_bounds)
CPU times: user 4.24 s, sys: 0 ns, total: 4.24 s
Wall time: 4.04 s
%time band_arr_win_pix = load(prod, window_pix)
CPU times: user 3.81 s, sys: 0 ns, total: 3.81 s
Wall time: 3.3 s
Some checks#
✅ All windowed bands have the same shape
✅ Windowed bands are smaller that raw band
assert band_arr_win_path.shape == band_arr_win_gdf.shape == band_arr_win_bounds.shape == band_arr_win_pix.shape
band_arr_win_path.shape
(1, 2311, 3321)
band_arr.shape
(1, 7811, 7681)
Plot#
The image with the
[blue - white - yellow]
colomap is the MNDWI band.The image with the
[green - blue]
colomap is the windowed MNDWI band.The green line corresponds to the window’s bounds
The blue line is the given vector used as a window
%%opts QuadMesh [fig_size=500]
# Compute bounds
bounds = band_arr.rio.bounds()
window_bounds_gpd = gpd.GeoDataFrame(
geometry=[box(*window_bounds)],
crs=window_gdf.crs
)
# Plot
band_arr[0, ::10, ::10].hvplot.quadmesh(
"x", "y",
coastline="10m",
cmap="bwy_r"
) * band_arr_win_path[0, ::10, ::10].hvplot.quadmesh(
"x", "y",
coastline="10m",
cmap="GnBu"
) * window_bounds_gpd.hvplot(
facecolor=(0, 0, 0, 0),
edgecolor="g", linewidth=4
) * window_gdf.hvplot(
facecolor=(0, 0, 0, 0),
edgecolor="b", linewidth=4,
xlim=(bounds[0], bounds[2]),
ylim=(bounds[1], bounds[3])
)