Search Shortcut cmd + k | ctrl + k
raster

DuckDB extension for reading and writing geospatial raster data using SQL.

Maintainer(s): ahuarte47

Installing and Loading

INSTALL raster FROM community;
LOAD raster;

Example

-- List available GDAL raster drivers and formats:
SELECT short_name, long_name, help_url FROM RT_Drivers();

┌────────────────┬──────────────────────────────────────────────────────────┬─────────────────────────────────────────────────────┐
   short_name                           long_name                                               help_url                       
    varchar                              varchar                                                 varchar                       
├────────────────┼──────────────────────────────────────────────────────────┼─────────────────────────────────────────────────────┤
 VRT             Virtual Raster                                            https://gdal.org/drivers/raster/vrt.html            
 GTiff           GeoTIFF                                                   https://gdal.org/drivers/raster/gtiff.html          
 COG             Cloud optimized GeoTIFF generator                         https://gdal.org/drivers/raster/cog.html            
  ·                       ·                                                                   ·                                
  ·                       ·                                                                   ·                                
  ·                       ·                                                                   ·                                
 ENVI            ENVI .hdr Labelled                                        https://gdal.org/drivers/raster/envi.html           
 Zarr            Zarr                                                      NULL                                                
 HTTP            HTTP Fetching Wrapper                                     NULL                                                
└────────────────┴──────────────────────────────────────────────────────────┴─────────────────────────────────────────────────────┘

-- Read a raster file — one row per tile, one datacube column per band:
SELECT * FROM RT_Read('path/to/raster/file.tif');

┌───────┬───────────┬────────────┬────────────────────────────────┬─────────────────────────┬───────┬────────┬────────┬───────┬───────┬────────────┬────────────┐
  id        x          y                    bbox                      geometry          level  tile_x  tile_y  cols   rows    metadata   databand_1 
 int64   double      double    struct(xmin, ymin, xmax, ymax)  geometry('epsg:25830')   int32  int32   int32   int32  int32     JSON        BLOB    
├───────┼───────────┼────────────┼────────────────────────────────┼─────────────────────────┼───────┼────────┼────────┼───────┼───────┼────────────┼────────────┤
     0  545619.75  4724508.25  {xmin: 545539.75, ...}          POLYGON ((...))              0       0       0    320      8  {...}       ...        
     1  545619.75  4724504.25  {xmin: 545539.75, ...}          POLYGON ((...))              0       0       1    320      8  {...}       ...        
└───────┴───────────┴────────────┴────────────────────────────────┴─────────────────────────┴───────┴────────┴────────┴───────┴───────┴────────────┴────────────┘

-- Compute NDVI directly in SQL using band algebra:
WITH __input AS (
    SELECT
        databand_1 AS red,
        databand_2 AS nir
    FROM
        RT_Read('path/to/raster/file.tif', blocksize_x := 512, blocksize_y := 512)
)
SELECT
    RT_Cube2TypeFloat((nir - red) / (nir + red)) AS ndvi
FROM
    __input
;

-- Extract pixel values into SQL arrays for aggregate analysis:
WITH __input AS (
    SELECT RT_Cube2ArrayFloat(databand_1, true) AS band
    FROM RT_Read('path/to/raster/file.tif', blocksize_x := 512, blocksize_y := 512)
)
SELECT
    list_min(band.values)        AS band_min,
    list_stddev_pop(band.values) AS band_stddev,
    list_max(band.values)        AS band_max
FROM __input
;

-- Write a raster file using COPY ... FORMAT RASTER:
COPY (
    SELECT geometry, databand_1, databand_2, databand_3
    FROM RT_Read('path/to/raster/file.tif')
)
TO 'path/to/output/file.tif'
WITH (
    FORMAT RASTER,
    DRIVER 'COG',
    CREATION_OPTIONS ('COMPRESS=LZW'),
    RESAMPLING 'nearest',
    COMPUTE_VALID_ENVELOPE true,
    SRS 'EPSG:25830',
    GEOMETRY_COLUMN 'geometry',
    DATABAND_COLUMNS ['databand_3', 'databand_2', 'databand_1']
);

-- Clip raster tiles to a polygon geometry and write to a new file:
LOAD spatial;
LOAD json;

COPY (
    WITH __clip_layer AS (
        SELECT geom FROM ST_Read('./test/data/CATAST_Pol_Township-PNA.geojson')
    ),
    __dataset AS (
        SELECT id, x, y, tile_x, tile_y, cols, rows, geometry, databand_1, metadata
        FROM
            __clip_layer,
            RT_Read([
                './test/data/mosaic/SCL.tif-land-clip00.tiff',
                './test/data/mosaic/SCL.tif-land-clip01.tiff',
                './test/data/mosaic/SCL.tif-land-clip10.tiff',
                './test/data/mosaic/SCL.tif-land-clip11.tiff'
            ])
        WHERE ST_Intersects(__clip_layer.geom, geometry)
    ),
    __clipped AS (
        SELECT
            RT_CubeClip(databand_1,
                        tile_x,
                        tile_y,
                       (metadata->'blocksize_x')::INTEGER,
                       (metadata->'blocksize_y')::INTEGER,
                       (metadata->'transform')::DOUBLE[],
                        __clip_layer.geom,
                       (metadata->'bands'->0->'nodata')::DOUBLE
            ) AS databand,
            * EXCLUDE(databand_1)
        FROM __clip_layer, __dataset
    )
    SELECT geometry, databand FROM __clipped
)
TO 'path/to/output/clipped.tiff'
WITH (
    FORMAT RASTER,
    DRIVER 'GTiff',
    CREATION_OPTIONS ('COMPRESS=LZW'),
    RESAMPLING 'nearest',
    SRS 'EPSG:32630',
    GEOMETRY_COLUMN 'geometry',
    DATABAND_COLUMNS ['databand']
);

About raster

Extension for DuckDB for reading and writing raster files using SQL.

The main purpose with this extension is to be able to read raster files as tables in DuckDB, and then perform band algebra operations on the raster data using SQL. A raster is just a datacube, and the idea is to be able to manipulate it as such.

The extension uses the GDAL library to read raster files, and it supports a wide range of raster formats.

The RT_Read function is the main function to read raster files, extract metadata, and perform basic operations on the raster data before returning a geoparquet-like DuckDB table. The function takes the path to the raster file as an argument, and other optional parameters, and returns a table with the following columns:

  • id: A unique identifier for each tile.
  • x: The center x coordinate of the tile.
  • y: The center y coordinate of the tile.
  • bbox: The bounding box of the tile, represented as a struct with the following fields: xmin, ymin, xmax, ymax.
  • geometry: The geometry of the tile, represented as a polygon.
  • level: The zoom level of the tile.
  • tile_x: The x coordinate of the tile in the zoom level.
  • tile_y: The y coordinate of the tile in the zoom level.
  • cols: The number of columns in the tile.
  • rows: The number of rows in the tile.
  • metadata: A JSON object with the metadata of the tile.
  • databand_x: A BLOB with the data of the x-th band of the tile. The number of bands and their names can be obtained from the metadata column.

The RT_Read function provides a datacube option that allows to return a single datacube BLOB column with the N-dimensional array with the data of all the bands of the tile, instead of one BLOB per band.

RT_Read supports filter pushdown on the non-BLOB columns, which allows you to prefilter the tiles that are loaded based on their metadata or spatial location. For example, you can filter the tiles that intersect with a certain geometry or that have a certain value in the metadata.

COPY TO with FORMAT RASTER allows to export a table with the same structure as the one returned by RT_Read to a new raster file. You can specify the driver and several creation options:

  • DRIVER: The GDAL driver to use for writing the raster file. For example, GTiff or COG.
  • CREATION_OPTIONS: A list of creation options to pass to the GDAL driver, specified as strings in the format KEY=VALUE.
  • RESAMPLING: The resampling method to use when writing the raster file. For example, nearest, bilinear, cubic, etc.
  • ENVELOPE: The spatial extent of the raster file, specified as a list of four values: [xmin, ymin, xmax, ymax].
  • SRS: The spatial reference system of the raster file, specified as an EPSG code (e.g. EPSG:4326) or a WKT string.
  • GEOMETRY_COLUMN: The name of the column that contains the geometry of the tiles. This column will be used to determine the spatial location and the resolution of the tiles in the output raster file.
  • DATABAND_COLUMNS: A list with the names of the columns that contain the data of the bands. The order of the columns in the list will determine the order of the bands in the output raster file.

Added Functions

function_name function_type description comment examples
RT_Drivers table Returns the list of supported GDAL raster drivers and file formats. NULL [SELECT * FROM RT_Drivers();]
RT_Read table Opens a raster file (or a mosaic of raster files) and returns a table with the raster data. Supports filter pushdown on all non-BLOB columns. NULL [SELECT * FROM RT_Read('path/to/raster/file.tif');]
RT_Array2Cube scalar Packages a plain SQL array of numeric values back into a datacube BLOB, the inverse of RT_Cube2Array. NULL [SELECT RT_Array2Cube(r.values, 'RAW', r.bands, r.cols, r.rows, r.no_data) FROM …;]
RT_CubeAbs scalar Returns a datacube with the absolute value of each cell. Nodata cells are preserved. NULL [SELECT RT_CubeAbs(databand_1) FROM …;]
RT_CubeAdd scalar Returns a datacube with each cell equal to the sum of the two inputs. Inputs can be two datacubes or a datacube and a scalar. Nodata cells are preserved. NULL [SELECT RT_CubeAdd(databand_1, 10) FROM …;]
RT_CubeBurn scalar Returns a datacube where cells inside the given geometry are replaced by the specified value. Cells outside the geometry are preserved. Nodata cells are preserved. NULL [SELECT RT_CubeBurn(databand_1, tile_x, tile_y, blocksize_x, blocksize_y, geo_transform, ST_GeomFromText('POLYGON((…))'), 255.0) FROM …;]
RT_CubeClip scalar Returns a datacube where cells outside the given geometry are replaced by the specified value. Cells inside the geometry are preserved. Nodata cells are preserved. NULL [SELECT RT_CubeClip(databand_1, tile_x, tile_y, blocksize_x, blocksize_y, geo_transform, ST_GeomFromText('POLYGON((…))'), 0.0) FROM …;]
RT_CubeDivide scalar Returns a datacube with each cell equal to the left-hand cell divided by the right-hand cell. Inputs can be two datacubes or a datacube and a scalar. Nodata cells are preserved. NULL [SELECT RT_CubeDivide(databand_1, 10) FROM …;]
RT_CubeEqual scalar Returns a datacube where each cell is 1 if left == right, 0 otherwise. Inputs can be two datacubes or a datacube and a scalar. Nodata cells are preserved. NULL [SELECT RT_CubeEqual(databand_1, 10) FROM …;]
RT_CubeExp scalar Returns a datacube with the exponential (e^x) of each cell. Nodata cells are preserved. NULL [SELECT RT_CubeExp(databand_1) FROM …;]
RT_CubeFill scalar Returns a datacube where all cells (including nodata) are unconditionally replaced by the right-hand value. NULL [SELECT RT_CubeFill(databand_1, 0) FROM …;]
RT_CubeGreater scalar Returns a datacube where each cell is 1 if left > right, 0 otherwise. Inputs can be two datacubes or a datacube and a scalar. Nodata cells are preserved. NULL [SELECT RT_CubeGreater(databand_1, 10) FROM …;]
RT_CubeGreaterEqual scalar Returns a datacube where each cell is 1 if left >= right, 0 otherwise. Inputs can be two datacubes or a datacube and a scalar. Nodata cells are preserved. NULL [SELECT RT_CubeGreaterEqual(databand_1, 10) FROM …;]
RT_CubeLess scalar Returns a datacube where each cell is 1 if left < right, 0 otherwise. Inputs can be two datacubes or a datacube and a scalar. Nodata cells are preserved. NULL [SELECT RT_CubeLess(databand_1, 10) FROM …;]
RT_CubeLessEqual scalar Returns a datacube where each cell is 1 if left <= right, 0 otherwise. Inputs can be two datacubes or a datacube and a scalar. Nodata cells are preserved. NULL [SELECT RT_CubeLessEqual(databand_1, 10) FROM …;]
RT_CubeLog scalar Returns a datacube with the natural logarithm of each cell. Nodata cells are preserved. NULL [SELECT RT_CubeLog(databand_1) FROM …;]
RT_CubeMax scalar Returns a datacube with each cell equal to the maximum of the two inputs. Inputs can be two datacubes or a datacube and a scalar. Nodata cells are preserved. NULL [SELECT RT_CubeMax(databand_1, databand_2) FROM …;]
RT_CubeMin scalar Returns a datacube with each cell equal to the minimum of the two inputs. Inputs can be two datacubes or a datacube and a scalar. Nodata cells are preserved. NULL [SELECT RT_CubeMin(databand_1, databand_2) FROM …;]
RT_CubeMod scalar Returns a datacube with each cell equal to the remainder of dividing the left-hand cell by the right-hand value. Inputs can be two datacubes or a datacube and a scalar. Nodata cells are preserved. NULL [SELECT RT_CubeMod(databand_1, 3) FROM …;]
RT_CubeMultiply scalar Returns a datacube with each cell equal to the product of the two inputs. Inputs can be two datacubes or a datacube and a scalar. Nodata cells are preserved. NULL [SELECT RT_CubeMultiply(databand_1, 10) FROM …;]
RT_CubeNeg scalar Returns a datacube with each cell negated (multiplied by -1). Nodata cells are preserved. NULL [SELECT RT_CubeNeg(databand_1) FROM …;]
RT_CubeNotEqual scalar Returns a datacube where each cell is 1 if left != right, 0 otherwise. Inputs can be two datacubes or a datacube and a scalar. Nodata cells are preserved. NULL [SELECT RT_CubeNotEqual(databand_1, 10) FROM …;]
RT_CubeNullOrEmpty scalar Returns true if the datacube is NULL or contains only nodata cells, false otherwise. NULL [SELECT RT_CubeNullOrEmpty(databand_1) FROM …;]
RT_Polygon scalar Vectorizes a datacube by creating a polygon geometry for each contiguous region of non-nodata values. NULL [SELECT RT_Polygon(databand_1, tile_x, tile_y, blocksize_x, blocksize_y, geo_transform) FROM …;]
RT_CubePow scalar Returns a datacube with each cell raised to the power of the right-hand value. Inputs can be two datacubes or a datacube and a scalar. Nodata cells are preserved. NULL [SELECT RT_CubePow(databand_1, 2) FROM …;]
RT_CubeSet scalar Returns a datacube where valid cells are replaced by the right-hand value. Nodata cells in the source are preserved. NULL [SELECT RT_CubeSet(databand_1, 0) FROM …;]
RT_CubeSetNoData scalar Returns a datacube where nodata cells are replaced by the specified value, and sets this value as the new nodata sentinel. NULL [SELECT RT_CubeSetNoData(databand_1, -9999) FROM …]
RT_CubeSqrt scalar Returns a datacube with the square root of each cell. Nodata cells are preserved. NULL [SELECT RT_CubeSqrt(databand_1) FROM …;]
RT_CubeStats scalar Calculates statistics for a specific band (0-based index) of a datacube. Returns a STRUCT with minimum, maximum, mean, stddev, valid_count and nodata_count fields. NULL [SELECT RT_CubeStats(databand_1, 0) AS stats FROM RT_Read('path/to/raster/file.tif');]
RT_CubeSubtract scalar Returns a datacube with each cell equal to the left-hand cell minus the right-hand cell. Inputs can be two datacubes or a datacube and a scalar. Nodata cells are preserved. NULL [SELECT RT_CubeSubtract(databand_1, 10) FROM …;]
RT_Envelope scalar Computes the bounding box of the valid (non-nodata) cells in the input datacube and returns it as a geometry. NULL [SELECT RT_Envelope(databand_1, tile_x, tile_y, (metadata->'blocksize_x')::INTEGER, (metadata->'blocksize_y')::INTEGER, (metadata->'transform')::DOUBLE[]) FROM RT_Read('path/to/raster/file.tif');]
RT_GdalConfig scalar Sets a GDAL configuration option (equivalent to CPLSetConfigOption). Pass NULL as the value to unset the option. NULL [SELECT RT_GdalConfig('AWS_NO_SIGN_REQUEST', 'YES');]
RT_Cube2ArrayInt32 scalar NULL NULL NULL
RT_Cube2TypeUInt64 scalar NULL NULL NULL
RT_Cube2TypeInt8 scalar NULL NULL NULL
RT_Cube2ArrayFloat scalar NULL NULL NULL
RT_Cube2ArrayUInt32 scalar NULL NULL NULL
RT_Cube2ArrayDouble scalar NULL NULL NULL
RT_Cube2TypeUInt8 scalar NULL NULL NULL
RT_Cube2ArrayInt64 scalar NULL NULL NULL
RT_Cube2TypeUInt16 scalar NULL NULL NULL
RT_Cube2TypeDouble scalar NULL NULL NULL
RT_Cube2TypeInt32 scalar NULL NULL NULL
RT_Cube2TypeInt64 scalar NULL NULL NULL
RT_Cube2ArrayInt16 scalar NULL NULL NULL
RT_Cube2ArrayInt8 scalar NULL NULL NULL
RT_Cube2ArrayUInt8 scalar NULL NULL NULL
RT_Cube2TypeFloat scalar NULL NULL NULL
RT_Cube2TypeInt16 scalar NULL NULL NULL
RT_Cube2ArrayUInt16 scalar NULL NULL NULL
RT_Cube2TypeUInt32 scalar NULL NULL NULL
RT_Cube2ArrayUInt64 scalar NULL NULL NULL

Overloaded Functions

function_name function_type description comment examples
- scalar NULL NULL NULL
/ scalar NULL NULL NULL
+ scalar NULL NULL NULL
* scalar NULL NULL NULL
% scalar NULL NULL NULL
^ scalar NULL NULL NULL

Added Types

type_name type_size logical_type type_category internal
RT_BBOX 0 STRUCT COMPOSITE true
RT_DATACUBE 16 BLOB NULL true
RT_STATS 0 STRUCT COMPOSITE true

Added Settings

This extension does not add any settings.