Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions kart/point_cloud/import_.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@
rewrite_and_merge_metadata,
is_copc,
)
from kart.point_cloud.pdal_convert import convert_tile_to_copc
from kart.point_cloud.pdal_convert import (
convert_tile_to_copc,
convert_tile_with_crs_override,
)
from kart.tile.importer import TileImporter
from kart.point_cloud.v1 import PointCloudV1

Expand Down Expand Up @@ -182,12 +185,6 @@ class PointCloudImporter(TileImporter):
CLOUD_OPTIMIZED_VARIANT = "Cloud-Optimized Point Cloud"
CLOUD_OPTIMIZED_VARIANT_ACRONYM = "COPC"

def extract_tile_metadata(self, tile_path, **kwargs):
"""Override to pass override_crs parameter to point cloud metadata extraction."""
if self.override_crs:
kwargs["override_crs"] = self.override_crs
return self.DATASET_CLASS.extract_tile_metadata(tile_path, **kwargs)

def get_default_message(self):
return f"Importing {len(self.sources)} LAZ tiles as {self.dataset_path}"

Expand Down Expand Up @@ -250,12 +247,30 @@ def get_actual_merged_metadata(self, all_metadata):
)

def get_conversion_func(self, tile_source):
if self.convert_to_cloud_optimized and not is_copc(tile_source.metadata):
if self.override_crs:
# When override_crs is specified, we always need to convert
if self.convert_to_cloud_optimized:
# Convert to COPC (or maintain COPC) with CRS override
return lambda source, dest: convert_tile_to_copc(
source, dest, override_srs=self.override_crs
)
else:
# Convert with CRS override, preserving original format
return lambda source, dest: convert_tile_with_crs_override(
source, dest, override_srs=self.override_crs
)
elif self.convert_to_cloud_optimized and not is_copc(tile_source.metadata):
# Convert to COPC without CRS override
return convert_tile_to_copc
return None

def existing_tile_matches_source(self, source_oid, existing_summary):
"""Check if the existing tile can be reused instead of reimporting."""
# If override_crs is specified, we need to rewrite the CRS in the files,
# so we can't reuse existing tiles
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there's actually at least a minor issue here - if you import the same files multiple times, each time with the same --override-crs, this will import all tiles again each time, which is unnecessary.

I'm wondering if we should be checking the CRS of the previous tile itself here (or we could check the CRS of the previous dataset and assume it matches the previous tile, which is much cheaper esp if the tiles are on LFS). If the previous dataset/tile matches the override, we can possibly return True here (if the sourceOID matches)

if self.override_crs:
return False

source_oid = prefix_sha256(source_oid)

if existing_summary.get("oid") == source_oid:
Expand Down
66 changes: 53 additions & 13 deletions kart/point_cloud/pdal_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,31 @@
from kart import subprocess_util as subprocess


def convert_tile_to_format(source, dest, target_format):
def convert_tile_to_format(source, dest, target_format, override_srs=None):
"""
Converts some sort of a .las/.laz file at source to a tile of the given format at dest.
"""
if is_copc(target_format):
return convert_tile_to_copc(source, dest)
return convert_tile_to_copc(source, dest, override_srs=override_srs)
else:
return convert_tile_to_laz(source, dest, target_format)
return convert_tile_to_laz(
source, dest, target_format, override_srs=override_srs
)


def convert_tile_to_copc(source, dest):
def convert_tile_to_copc(source, dest, override_srs=None):
"""
Converts some sort of a .las/.laz file at source to a .copc.laz file at dest.
"""
reader_stage = {
"type": "readers.las",
"filename": str(source),
}
if override_srs:
reader_stage["override_srs"] = str(override_srs)

pipeline = [
{
"type": "readers.las",
"filename": str(source),
},
reader_stage,
{
"type": "writers.copc",
"filename": str(dest),
Expand All @@ -40,17 +46,21 @@ def convert_tile_to_copc(source, dest):
assert dest.is_file()


def convert_tile_to_laz(source, dest, target_format):
def convert_tile_to_laz(source, dest, target_format, override_srs=None):
"""
Converts some sort of .las/.laz at source to some sort of .laz file at dest.
"""
major_version, minor_version = get_las_version(target_format).split(".", maxsplit=1)

reader_stage = {
"type": "readers.las",
"filename": str(source),
}
if override_srs:
reader_stage["override_srs"] = str(override_srs)

pipeline = [
{
"type": "readers.las",
"filename": str(source),
},
reader_stage,
{
"type": "writers.las",
"filename": str(dest),
Expand All @@ -68,3 +78,33 @@ def convert_tile_to_laz(source, dest, target_format):
f"Error converting {source}\n{e}", exit_code=INVALID_FILE_FORMAT
)
assert dest.is_file()


def convert_tile_with_crs_override(source, dest, override_srs):
"""
Converts a .las/.laz file at source to the same format at dest, but with CRS override.
This is used when --override-crs is specified but no other conversion is needed.
"""
reader_stage = {
"type": "readers.las",
"filename": str(source),
"override_srs": str(override_srs),
}

writer_stage = {
"type": "writers.las",
"filename": str(dest),
"forward": "all",
"extra_dims": "all",
"compression": True,
}

pipeline = [reader_stage, writer_stage]

try:
pdal_execute_pipeline(pipeline)
except subprocess.CalledProcessError as e:
raise InvalidOperation(
f"Error converting {source}\n{e}", exit_code=INVALID_FILE_FORMAT
)
assert dest.is_file()
78 changes: 75 additions & 3 deletions kart/raster/gdal_convert.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import subprocess
from pathlib import Path
from osgeo import gdal

from kart.raster.metadata_util import is_cog
Expand All @@ -14,25 +16,95 @@
gdal.SetConfigOption("GDAL_TIFF_OVR_BLOCKSIZE", "512")


def convert_tile_to_format(source, dest, target_format):
def convert_tile_to_format(source, dest, target_format, override_srs=None):
"""
Converts any GeoTIFF file at source to a tile of the given format at dest.
"""
# convert-to-COG is the only tile conversion supported or required, so far.
assert is_cog(target_format)
return convert_tile_to_cog(source, dest)
return convert_tile_to_cog(source, dest, override_srs=override_srs)


def convert_tile_to_cog(source, dest):
def convert_tile_to_cog(source, dest, override_srs=None):
"""
Converts any GeoTIFF file at source to a cloud-optimized GeoTIFF file at dest.
"""
translate_options = gdal.TranslateOptions(
format="COG",
# Most of the GeoTIFF creation options don't affect the COG driver, so not much to put here.
creationOptions=["BIGTIFF=IF_SAFER"],
# Override the source CRS if specified
outputSRS=str(override_srs) if override_srs else None,
)

gdal.Translate(str(dest), str(source), options=translate_options)

assert dest.is_file()


def convert_tile_with_crs_override(source: Path, dest: Path, override_srs: str):
"""
Converts a GeoTIFF file at source to the same format at dest, but with CRS override.
This is used when --override-crs is specified but no other conversion is needed.
Preserves compression, predictor, metadata and block size - but adds ZSTD compression if uncompressed.
"""
src_ds = gdal.Open(str(source))
assert src_ds is not None
assert not dest.exists()

creation_options = [
"BIGTIFF=IF_SAFER",
"COPY_SRC_OVERVIEWS=YES",
"COPY_SRC_MDD=YES",
]

# Preserve tiling/blocking
band = src_ds.GetRasterBand(1)

block_x, block_y = band.GetBlockSize()
creation_options.extend([f"BLOCKXSIZE={block_x}", f"BLOCKYSIZE={block_y}"])
if block_x != src_ds.RasterXSize:
# Source is tiled
creation_options.extend(["TILED=YES"])

# Preserve or add compression
compression = src_ds.GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE")
if compression and compression.upper() != "NONE":
# Preserve existing compression
creation_options.append(f"COMPRESS={compression}")

# Preserve predictor if it exists
predictor = src_ds.GetMetadataItem("PREDICTOR", "IMAGE_STRUCTURE")
if predictor:
creation_options.append(f"PREDICTOR={predictor}")
else:
# Not having compression is *really* inefficient, and since we're here
# we're going to be opinionated and just add some.
dt = band.GetDataType()
# per https://kokoalberti.com/articles/geotiff-compression-optimization-guide/
# we use the predictor that gives the best compression ratio for the data type
if "Float" in gdal.GetDataTypeName(dt):
predictor = 3
else:
predictor = 2
creation_options.extend(["COMPRESS=ZSTD", f"PREDICTOR={predictor}"])

# Preserve interleave
interleave = src_ds.GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE")
if interleave:
creation_options.append(f"INTERLEAVE={interleave}")

# create the output with CRS override and preserved characteristics
translate_options = gdal.TranslateOptions(
format="GTiff",
creationOptions=creation_options,
outputSRS=str(override_srs),
)

result_ds = gdal.Translate(str(dest), src_ds, options=translate_options)

# Close datasets
src_ds = None
result_ds = None

assert dest.is_file()
49 changes: 42 additions & 7 deletions kart/raster/import_.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
MutexOption,
KartCommand,
)
from kart.crs_util import CoordinateReferenceString
from kart.completion_shared import file_path_completer
from kart.lfs_util import prefix_sha256
from kart.parse_args import parse_import_sources_and_datasets
from kart.raster.gdal_convert import convert_tile_to_cog
from kart.raster.gdal_convert import convert_tile_to_cog, convert_tile_with_crs_override
from kart.raster.metadata_util import (
rewrite_and_merge_metadata,
is_cog,
Expand Down Expand Up @@ -111,6 +112,14 @@
"the authoritative source for the given data and data is fetched from there if needed."
),
)
@click.option(
"--override-crs",
type=CoordinateReferenceString(keep_as_string=True),
help=(
"Override the CRS of all source tiles and set the dataset CRS. "
"Can be specified as EPSG code (e.g., EPSG:4326) or as a WKT file (e.g., @myfile.wkt)."
),
)
@click.argument(
"args",
nargs=-1,
Expand All @@ -130,6 +139,7 @@ def raster_import(
num_workers,
dataset_path,
do_link,
override_crs,
args,
):
"""
Expand Down Expand Up @@ -161,6 +171,7 @@ def raster_import(
num_workers=num_workers,
do_link=do_link,
sources=sources,
override_crs=override_crs,
).import_tiles()


Expand All @@ -182,31 +193,55 @@ def check_metadata_post_convert(self):
pass

def get_merged_source_metadata(self, all_metadata):
return rewrite_and_merge_metadata(all_metadata, RewriteMetadata.DROP_PROFILE)
return rewrite_and_merge_metadata(
all_metadata, RewriteMetadata.DROP_PROFILE, override_crs=self.override_crs
)

def get_predicted_merged_metadata(self, all_metadata):
rewrite_metadata = (
RewriteMetadata.AS_IF_CONVERTED_TO_COG
if self.convert_to_cloud_optimized
else RewriteMetadata.DROP_PROFILE
)
return rewrite_and_merge_metadata(all_metadata, rewrite_metadata)
return rewrite_and_merge_metadata(
all_metadata, rewrite_metadata, override_crs=self.override_crs
)

def get_actual_merged_metadata(self, all_metadata):
rewrite_metadata = (
RewriteMetadata.NO_REWRITE
if self.convert_to_cloud_optimized
else RewriteMetadata.DROP_PROFILE
)
return rewrite_and_merge_metadata(all_metadata, rewrite_metadata)
return rewrite_and_merge_metadata(
all_metadata, rewrite_metadata, override_crs=self.override_crs
)

def get_conversion_func(self, tile_source):
if self.convert_to_cloud_optimized and not is_cog(tile_source.metadata):
if self.override_crs:
# When override_crs is specified, we always need to convert
if self.convert_to_cloud_optimized:
# Convert to COG (or maintain COG) with CRS override
return lambda source, dest: convert_tile_to_cog(
source, dest, override_srs=self.override_crs
)
else:
# Convert with CRS override, preserving original format
return lambda source, dest: convert_tile_with_crs_override(
source, dest, override_srs=self.override_crs
)
elif self.convert_to_cloud_optimized and not is_cog(tile_source.metadata):
# Convert to COG without CRS override
return convert_tile_to_cog
return None

def existing_tile_matches_source(self, source_oid, existing_summary):
"""Check if the existing tile can be reused instead of reimporting."""
# If override_crs is specified, we need to rewrite the CRS in the files,
# so we can't reuse existing tiles
if self.override_crs:
return False

source_oid = prefix_sha256(source_oid)

if existing_summary.get("oid") == source_oid:
Expand All @@ -219,8 +254,8 @@ def existing_tile_matches_source(self, source_oid, existing_summary):

# NOTE: this logic would be more complicated if we supported more than one type of conversion.
if existing_summary.get("sourceOid") == source_oid:
# The import source we were given has already been imported, but converted to COPC.
# Return True if we were going to convert it to COPC too.
# The import source we were given has already been imported, but converted to COG.
# Return True if we were going to convert it to COG too.
return self.convert_to_cloud_optimized and is_cog(existing_summary)

return False
Loading
Loading