Windowed Reading with EOReader#

Let’s see how to read a window instead of the whole bands of a given product and the potential time-consumption gain.

Warning: The durations shown hereunder may not be representative of your computer’s performances. Please take it as a hint about relative performances between constellations.

Try with Landsat-8#

Let’s open a Landsat-8 OLI collection 2 tile.

# Imports
import os
import geopandas as gpd
import rasterio
from shapely.geometry import box

from rasterio.windows import Window, from_bounds
from eoreader.reader import Reader
from eoreader.bands import NDWI, SWIR_1

import hvplot.pandas
import hvplot.xarray

hvplot.extension('matplotlib')
/home/docs/checkouts/readthedocs.org/user_builds/eoreader/envs/latest/lib/python3.9/site-packages/dask/dataframe/__init__.py:42: FutureWarning: 
Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.

  warnings.warn(msg, FutureWarning)
# Open the product
folder = os.path.join("/home", "ds2_db3", "CI", "eoreader")
path = os.path.join(folder, "optical", "LC08_L1TP_200030_20201220_20210310_02_T1.tar")
reader = Reader()
prod = reader.open(path)
band = NDWI
There is no existing products in EOReader corresponding to /home/ds2_db3/CI/eoreader/optical/LC08_L1TP_200030_20201220_20210310_02_T1.tar.

Windows in EOReader#

There are several ways to read a band with a window in EOReader:

  • Using a vector stored on disk

  • Using a geopandas.GeoDataFrame

  • Using a tuple containing the bounds (in the same CRS than the bands, i.e UTM accessible through prod.crs())

  • Using directly a window. Note that if you want to pass pixel bounds, you must first create a rasterio.windows.Window

⚠ Even if a vector is passed to the window keyword, only its extent will be used.

# Here we have a window as a path to a vector
window_path = os.path.join(folder, "others", "20201220T104856_L8_200030_OLI_TIRS_window.geojson")
window_path
'/home/ds2_db3/CI/eoreader/others/20201220T104856_L8_200030_OLI_TIRS_window.geojson'
# Here we have the vector directly opened as a geopanas.GeoDataFrame
window_gdf = gpd.read_file(window_path)
window_gdf.to_crs("EPSG:4326").hvplot(coastline="10m")
---------------------------------------------------------------------------
DataSourceError                           Traceback (most recent call last)
Cell In[4], line 2
      1 # Here we have the vector directly opened as a geopanas.GeoDataFrame
----> 2 window_gdf = gpd.read_file(window_path)
      3 window_gdf.to_crs("EPSG:4326").hvplot(coastline="10m")

File ~/checkouts/readthedocs.org/user_builds/eoreader/envs/latest/lib/python3.9/site-packages/geopandas/io/file.py:294, in _read_file(filename, bbox, mask, columns, rows, engine, **kwargs)
    291             from_bytes = True
    293 if engine == "pyogrio":
--> 294     return _read_file_pyogrio(
    295         filename, bbox=bbox, mask=mask, columns=columns, rows=rows, **kwargs
    296     )
    298 elif engine == "fiona":
    299     if pd.api.types.is_file_like(filename):

File ~/checkouts/readthedocs.org/user_builds/eoreader/envs/latest/lib/python3.9/site-packages/geopandas/io/file.py:547, in _read_file_pyogrio(path_or_bytes, bbox, mask, rows, **kwargs)
    538     warnings.warn(
    539         "The 'include_fields' and 'ignore_fields' keywords are deprecated, and "
    540         "will be removed in a future release. You can use the 'columns' keyword "
   (...)
    543         stacklevel=3,
    544     )
    545     kwargs["columns"] = kwargs.pop("include_fields")
--> 547 return pyogrio.read_dataframe(path_or_bytes, bbox=bbox, **kwargs)

File ~/checkouts/readthedocs.org/user_builds/eoreader/envs/latest/lib/python3.9/site-packages/pyogrio/geopandas.py:265, in read_dataframe(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, fid_as_index, use_arrow, on_invalid, arrow_to_pandas_kwargs, **kwargs)
    260 if not use_arrow:
    261     # For arrow, datetimes are read as is.
    262     # For numpy IO, datetimes are read as string values to preserve timezone info
    263     # as numpy does not directly support timezones.
    264     kwargs["datetime_as_string"] = True
--> 265 result = read_func(
    266     path_or_buffer,
    267     layer=layer,
    268     encoding=encoding,
    269     columns=columns,
    270     read_geometry=read_geometry,
    271     force_2d=gdal_force_2d,
    272     skip_features=skip_features,
    273     max_features=max_features,
    274     where=where,
    275     bbox=bbox,
    276     mask=mask,
    277     fids=fids,
    278     sql=sql,
    279     sql_dialect=sql_dialect,
    280     return_fids=fid_as_index,
    281     **kwargs,
    282 )
    284 if use_arrow:
    285     meta, table = result

File ~/checkouts/readthedocs.org/user_builds/eoreader/envs/latest/lib/python3.9/site-packages/pyogrio/raw.py:198, in read(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, return_fids, datetime_as_string, **kwargs)
     59 """Read OGR data source into numpy arrays.
     60 
     61 IMPORTANT: non-linear geometry types (e.g., MultiSurface) are converted
   (...)
    194 
    195 """
    196 dataset_kwargs = _preprocess_options_key_value(kwargs) if kwargs else {}
--> 198 return ogr_read(
    199     get_vsi_path_or_buffer(path_or_buffer),
    200     layer=layer,
    201     encoding=encoding,
    202     columns=columns,
    203     read_geometry=read_geometry,
    204     force_2d=force_2d,
    205     skip_features=skip_features,
    206     max_features=max_features or 0,
    207     where=where,
    208     bbox=bbox,
    209     mask=_mask_to_wkb(mask),
    210     fids=fids,
    211     sql=sql,
    212     sql_dialect=sql_dialect,
    213     return_fids=return_fids,
    214     dataset_kwargs=dataset_kwargs,
    215     datetime_as_string=datetime_as_string,
    216 )

File pyogrio/_io.pyx:1240, in pyogrio._io.ogr_read()

File pyogrio/_io.pyx:220, in pyogrio._io.ogr_open()

DataSourceError: /home/ds2_db3/CI/eoreader/others/20201220T104856_L8_200030_OLI_TIRS_window.geojson: No such file or directory
# Here we have an array of the bounds (tuple or list also works)
window_bounds = window_gdf.bounds.values[0]
window_bounds
array([ 579046.01517122, 4765387.52325064,  678660.50985682,
       4834693.77806381])
# Here we construct a Window containing directly the pixels 
# It is not necessary to have a window with integers values, they will be rounded when needed.
# However, it is better to control exactly the pixels you want so it is advised to round the values yourself
with rasterio.open(str(prod.get_band_paths([SWIR_1])[SWIR_1])) as ds:
    window_pix = from_bounds(*window_bounds, ds.transform)
window_pix
Window(col_off=1932.033839040505, row_off=2154.040731206187, width=3320.4831561867395, height=2310.2084937724576)
# Create function to time it easily
def load(prod, window):
    """
    Function that loads the wanted band over a proposed window and cleans the temporary directory.    
    """
    ds = prod.load(
        band,
        window=window
    )
    prod.clean_tmp()
    return ds[band]

Time without window#

Pass None to load without any window

%time band_arr = load(prod, None)
CPU times: user 1min 12s, sys: 3min 54s, total: 5min 6s
Wall time: 8min 26s

Time with window#

Use directly the window path. The keyword used to load with a window is window.

%time band_arr_win_path = load(prod, window_path)
CPU times: user 4.13 s, sys: 0 ns, total: 4.13 s
Wall time: 7.37 s

Read with a window in different ways#

Here, try to load with:

  • geopandas.GeoDataFrame

  • Array of bounds

  • rasterio.windows.Window

%time band_arr_win_gdf = load(prod, window_gdf)
CPU times: user 7.89 s, sys: 0 ns, total: 7.89 s
Wall time: 5.54 s
%time band_arr_win_bounds = load(prod, window_bounds)
CPU times: user 4.24 s, sys: 0 ns, total: 4.24 s
Wall time: 4.04 s
%time band_arr_win_pix = load(prod, window_pix)
CPU times: user 3.81 s, sys: 0 ns, total: 3.81 s
Wall time: 3.3 s

Some checks#

✅ All windowed bands have the same shape
✅ Windowed bands are smaller that raw band

assert band_arr_win_path.shape == band_arr_win_gdf.shape == band_arr_win_bounds.shape == band_arr_win_pix.shape
band_arr_win_path.shape
(1, 2311, 3321)
band_arr.shape
(1, 7811, 7681)

Plot#

  • The image with the [blue - white - yellow] colomap is the MNDWI band.

  • The image with the [green - blue] colomap is the windowed MNDWI band.

  • The green line corresponds to the window’s bounds

  • The blue line is the given vector used as a window

%%opts QuadMesh [fig_size=500]

# Compute bounds
bounds = band_arr.rio.bounds()
window_bounds_gpd = gpd.GeoDataFrame(
    geometry=[box(*window_bounds)],
    crs=window_gdf.crs
)

# Plot
band_arr[0, ::10, ::10].hvplot.quadmesh(
    "x", "y", 
    coastline="10m",
    cmap="bwy_r"
) * band_arr_win_path[0, ::10, ::10].hvplot.quadmesh(
    "x", "y", 
    coastline="10m",
    cmap="GnBu"
) * window_bounds_gpd.hvplot(
    facecolor=(0, 0, 0, 0), 
    edgecolor="g", linewidth=4
) * window_gdf.hvplot(
    facecolor=(0, 0, 0, 0), 
    edgecolor="b", linewidth=4, 
    xlim=(bounds[0], bounds[2]), 
    ylim=(bounds[1], bounds[3])
)