|
1 | 1 | import logging |
2 | 2 | import math |
| 3 | +import re |
| 4 | +from collections import defaultdict |
| 5 | +from datetime import datetime |
| 6 | +from pathlib import Path |
| 7 | +from typing import Literal |
3 | 8 |
|
4 | 9 | import dask |
5 | 10 | import dask.array as da |
6 | 11 | import numpy as np |
7 | 12 | import xarray as xr |
8 | 13 |
|
9 | | -from .conf import _dtypes, _memsize_chunk_mb |
| 14 | +from .conf import ( |
| 15 | + META_FLOAT_KEYS, |
| 16 | + META_INT_KEYS, |
| 17 | + RE_PATTERNS_DORIS4, |
| 18 | + RE_PATTERNS_DORIS5, |
| 19 | + RE_PATTERNS_DORIS5_IFG, |
| 20 | + TIME_FORMAT_DORIS4, |
| 21 | + TIME_FORMAT_DORIS5, |
| 22 | + TIME_STAMP_KEY, |
| 23 | + _dtypes, |
| 24 | + _memsize_chunk_mb, |
| 25 | +) |
10 | 26 |
|
11 | 27 | logger = logging.getLogger(__name__) |
12 | 28 |
|
13 | | -# Example: https://docs.dask.org/en/stable/array-creation.html#memory-mapping |
14 | | - |
15 | 29 |
|
16 | 30 | def from_dataset(ds: xr.Dataset) -> xr.Dataset: |
17 | 31 | """Create a SLC stack or from an Xarray Dataset. |
@@ -266,3 +280,200 @@ def _calc_chunksize(shape: tuple, dtype: np.dtype, ratio: int): |
266 | 280 | chunks = (chunks_az, chunks_ra) |
267 | 281 |
|
268 | 282 | return chunks |
| 283 | + |
| 284 | + |
| 285 | +def read_metadata( |
| 286 | + files: str | list | Path, |
| 287 | + driver: Literal["doris4", "doris5"] = "doris5", |
| 288 | + ifg_file_name: str = "ifgs.res", |
| 289 | +) -> dict: |
| 290 | + """Read metadata of a coregistered interferogram stack. |
| 291 | +
|
| 292 | + This function reads metadata from one or more metadata files from a coregistered |
| 293 | + interferogram stack, and returns the metadata as a dictionary format. |
| 294 | +
|
| 295 | + This function supports two drivers: "doris4" for DORIS4 metadata files, e.g. |
| 296 | + coregistration results from TerraSAR-X; "doris5" for DORIS5 metadata files, |
| 297 | + e.g. coregistration results from Sentinel-1. More support for other drivers |
| 298 | + will be added in the future. |
| 299 | +
|
| 300 | + For drivers "doris4" and "doris5", it parses the metadata with predefined regular |
| 301 | + expressions, returning a dictionary with predefined keys. Check conf.py for |
| 302 | + available keys and regular expressions. |
| 303 | +
|
| 304 | + Specifically for the "doris5" driver, it is assumed that there is a "ifgs.res" file |
| 305 | + next to the input metadata file, which contains the interferogram size information. |
| 306 | + If the "ifgs.res" file is not found, the interferogram size information will |
| 307 | + not be included in the metadata. |
| 308 | +
|
| 309 | + If a single file is provided, it reads the metadata from that file. |
| 310 | +
|
| 311 | + If multiple files are provided, the function will read the metadata from each file, |
| 312 | + and combine the results based on the following rules: |
| 313 | + - If a metadata key has values in string format or integer format, it combines the |
| 314 | + values into a set. |
| 315 | + - If a metadata key has values in float format, and the standard deviation is less |
| 316 | + than 1% of the mean, it takes the average of the values. |
| 317 | + - For the two Doris drivers "doris4" or "doris5", if the metadata key is |
| 318 | + TIME_STAMP_KEY, it treats it as the timestamp of acquisition and |
| 319 | + converts it to a numpy array of datetime64 format, sorted in ascending order. |
| 320 | +
|
| 321 | +
|
| 322 | + Parameters |
| 323 | + ---------- |
| 324 | + files : str | list | Path |
| 325 | + Path(s) to the metadata files. |
| 326 | + driver : str, optional |
| 327 | + The driver to use for reading metadata. Supported drivers are "doris4" and |
| 328 | + "doris5". Default is "doris5". |
| 329 | + ifg_file_name : str, optional |
| 330 | + The name of the interferogram size file for the "doris5" driver. |
| 331 | + We assume this file is next to each metadata file and use it to read the |
| 332 | + interferogram size information. if it is not found, the size |
| 333 | + information will not be included in the metadata. Default is "ifgs.res". |
| 334 | +
|
| 335 | + Returns |
| 336 | + ------- |
| 337 | + dict |
| 338 | + Dictionary containing the metadata read from the files. |
| 339 | +
|
| 340 | + Raises |
| 341 | + ------ |
| 342 | + NotImplementedError |
| 343 | + If the driver is not "doris4" or "doris5". |
| 344 | + """ |
| 345 | + # Check driver |
| 346 | + if driver not in ["doris4", "doris5"]: |
| 347 | + raise NotImplementedError( |
| 348 | + f"Driver '{driver}' is not implemented. " |
| 349 | + "Supported drivers are: 'doris4', 'doris5'." |
| 350 | + ) |
| 351 | + |
| 352 | + # If there is only one file, convert it to a list |
| 353 | + if not isinstance(files, list): |
| 354 | + files = [files] |
| 355 | + |
| 356 | + # Force all files to be Path objects in case files is a list of strings |
| 357 | + files = [Path(file) for file in files] |
| 358 | + |
| 359 | + # Parse metadata from each file |
| 360 | + # if a key does not exists, a list will be created |
| 361 | + metadata = defaultdict(list) |
| 362 | + for file in files: |
| 363 | + res = _parse_metadata(file, driver) |
| 364 | + for key, value in res.items(): |
| 365 | + metadata[key].append(value) |
| 366 | + |
| 367 | + # Regulate metadata for all files |
| 368 | + metadata = _regulate_metadata(metadata, driver) |
| 369 | + |
| 370 | + return metadata |
| 371 | + |
| 372 | + |
| 373 | +def _parse_metadata(file, driver, ifg_file_name="ifgs.res"): |
| 374 | + """Parse a single metadata file to a dictionary of strings.""" |
| 375 | + # Select the appropriate patterns based on the driver |
| 376 | + if driver == "doris5": |
| 377 | + patterns = RE_PATTERNS_DORIS5 |
| 378 | + patterns_ifg = RE_PATTERNS_DORIS5_IFG |
| 379 | + elif driver == "doris4": |
| 380 | + patterns = RE_PATTERNS_DORIS4 |
| 381 | + patterns_ifg = None |
| 382 | + |
| 383 | + # Open the file |
| 384 | + with open(file) as f: |
| 385 | + content = f.read() |
| 386 | + |
| 387 | + # Read common metadata patterns |
| 388 | + results = {} |
| 389 | + for key, pattern in patterns.items(): |
| 390 | + match = re.search(pattern, content) |
| 391 | + if match: |
| 392 | + results[key] = match.group(1) |
| 393 | + else: |
| 394 | + results[key] = None |
| 395 | + |
| 396 | + # Doris5 has size information in ifgs.res file |
| 397 | + # Try to get the ifg size from ifgs.res next to slave.res, if it exists |
| 398 | + if patterns_ifg is not None: |
| 399 | + file_ifg = file.with_name(ifg_file_name) |
| 400 | + if file_ifg.exists(): |
| 401 | + with open(file_ifg) as f_ifg: |
| 402 | + content_ifg = f_ifg.read() |
| 403 | + for key, pattern in RE_PATTERNS_DORIS5_IFG.items(): |
| 404 | + match = re.search(pattern, content_ifg) |
| 405 | + if match: |
| 406 | + results[key] = match.group(1) |
| 407 | + else: |
| 408 | + results[key] = None |
| 409 | + |
| 410 | + return results |
| 411 | + |
| 412 | + |
| 413 | +def _regulate_metadata(metadata, driver): |
| 414 | + """Regulate metadata strings. |
| 415 | +
|
| 416 | + This function processes the metadata read from the DORIS files, which are strings, |
| 417 | + and converts according to the types specified in META_FLOAT_KEYS and META_INT_KEYS. |
| 418 | +
|
| 419 | + Check the documentation of `read_metadata` for the rules applied to the metadata. |
| 420 | + """ |
| 421 | + # Convert time metadata from string to datetime |
| 422 | + if driver == "doris5": |
| 423 | + time_format = TIME_FORMAT_DORIS5 |
| 424 | + elif driver == "doris4": |
| 425 | + time_format = TIME_FORMAT_DORIS4 |
| 426 | + list_time = [] |
| 427 | + # If the time is a single string, convert it to a list |
| 428 | + if isinstance(metadata[TIME_STAMP_KEY], str): |
| 429 | + metadata[TIME_STAMP_KEY] = [metadata[TIME_STAMP_KEY]] |
| 430 | + for time in metadata[TIME_STAMP_KEY]: |
| 431 | + try: |
| 432 | + dt = datetime.strptime(time, time_format) |
| 433 | + list_time.append(np.datetime64(dt).astype("datetime64[s]")) |
| 434 | + except ValueError as e: |
| 435 | + raise ValueError( |
| 436 | + f"Invalid date format for key: '{TIME_STAMP_KEY}'. " |
| 437 | + f"Expected format is '{time_format}'." |
| 438 | + ) from e |
| 439 | + metadata[TIME_STAMP_KEY] = np.sort(np.array(list_time)) |
| 440 | + |
| 441 | + for key, value in list(metadata.items()): |
| 442 | + # raise error if different types are found in value |
| 443 | + if len(set(type(v) for v in value)) > 1: |
| 444 | + raise TypeError( |
| 445 | + f"Inconsistency found in metadata key: {key}. " |
| 446 | + "Different types are found in the value list." |
| 447 | + ) |
| 448 | + |
| 449 | + # Only keep the unique values |
| 450 | + if isinstance(metadata[key], list): |
| 451 | + metadata[key] = set(value) |
| 452 | + |
| 453 | + # Unfold the single value set to strings |
| 454 | + if len(metadata[key]) == 1: |
| 455 | + metadata[key] = next(iter(metadata[key])) |
| 456 | + |
| 457 | + # if float, take the average unless std is larger than 1% of the mean |
| 458 | + if key in META_FLOAT_KEYS: |
| 459 | + # Convert to float |
| 460 | + arr = np.array(value, dtype=np.float64) |
| 461 | + if np.std(arr) / np.mean(arr) < 0.01: |
| 462 | + metadata[key] = np.mean(arr).item() # Convert to scalar |
| 463 | + else: |
| 464 | + raise ValueError( |
| 465 | + f"Inconsistency found in metadata key: {key}. " |
| 466 | + "Standard deviation is larger than 1% of the mean." |
| 467 | + ) |
| 468 | + if key in META_INT_KEYS: |
| 469 | + if isinstance(metadata[key], str): |
| 470 | + metadata[key] = int(metadata[key]) |
| 471 | + elif len(metadata[key]) > 1: # set with multiple values |
| 472 | + metadata[key] = set([int(v) for v in metadata[key]]) |
| 473 | + |
| 474 | + if key in ["number_of_lines", "number_of_pixels"]: |
| 475 | + if isinstance(metadata[key], set): |
| 476 | + warning_msg = f"Multiple values found in {key}: {metadata[key]}." |
| 477 | + logger.warning(warning_msg) |
| 478 | + |
| 479 | + return metadata |
0 commit comments