Commit 40d8dbf1 authored by Daniel Scheffler's avatar Daniel Scheffler
Browse files

Merge branch 'bugfix/fix_save_COG' into 'master'

Ensure COG format compatility in GeoArray.save().

See merge request !27
parents 9c0963af d9bcfc76
Pipeline #26322 failed with stages
in 11 minutes and 6 seconds
*.zip filter=lfs diff=lfs merge=lfs -text
*.bsq filter=lfs diff=lfs merge=lfs -text
*.tif filter=lfs diff=lfs merge=lfs -text
......@@ -2,6 +2,20 @@
History
=======
0.14.1 (coming soon)
--------------------
* Revised GeoArray.save() which fixes two GDAL warnings when writing cloud optimized GeoTiff (COG) format:
* *Warning 1*: This file used to have optimizations in its layout, but those have been, at least partly,
invalidated by later changes
* *Warning 2*: The IFD has been rewritten at the end of the file, which breaks COG layout.
* Fixed unsupported band-specific nodata values in case of ENVI output format.
* Added two tests that validate the metadata written by GeoArray.save() for ENVI and GTiff format.
* Track tif and bsq files with git-lfs.
0.14.0 (2021-07-26)
-------------------
......
......@@ -998,65 +998,51 @@ class GeoArray(object):
driver = gdal.GetDriverByName(fmt)
if driver is None:
raise Exception("'%s' is not a supported GDAL driver. Refer to www.gdal.org/formats_list.html for full "
"list of GDAL driver codes." % fmt)
raise Exception("'%s' is not a supported GDAL driver. Refer to https://gdal.org/drivers/raster/index.html "
"for full list of GDAL driver codes." % fmt)
if not os.path.isdir(os.path.dirname(out_path)):
os.makedirs(os.path.dirname(out_path))
envi_metadict = self.metadata.to_ENVI_metadict()
#####################
# write raster data #
#####################
###########################
# get source GDAL dataset #
###########################
if self.is_inmem:
# write in-memory data
ds_inmem = get_GDAL_ds_inmem(self.arr, # expects rows,columns,bands
self.geotransform, self.projection,
self._nodata) # avoid to compute the nodata value here, so use private attrib.
# write dataset
ds_out = driver.CreateCopy(out_path, ds_inmem,
options=creationOptions or []) # noqa
ds_src: gdal.Dataset
ds_out: gdal.Dataset
# # rows, columns, bands => bands, rows, columns
# out_arr = self.arr if self.ndim == 2 else np.swapaxes(np.swapaxes(self.arr, 0, 2), 1, 2)
# gdalnumeric.SaveArray(out_arr, out_path, format=fmt, prototype=ds_inmem) # expects bands,rows,columns
# ds_out = gdal.Open(out_path)
del ds_inmem
del ds_out
if self.is_inmem:
ds_src = get_GDAL_ds_inmem(self.arr, # expects rows,columns,bands
self.geotransform, self.projection,
self._nodata) # avoid to compute the nodata value here, so use private attrib.
else:
# transform linked image dataset (not in-memory) to target format
src_ds = gdal.Open(self.filePath)
if not src_ds:
raise Exception('Error reading file: ' + gdal.GetLastErrorMsg())
ds_src = gdal.Open(self.filePath)
# metadomains = {dom: src_ds.GetMetadata(dom) for dom in src_ds.GetMetadataDomainList()}
gdal.Translate(out_path, src_ds, format=fmt, creationOptions=creationOptions)
del src_ds
if not os.path.exists(out_path):
raise Exception(gdal.GetLastErrorMsg())
################
# set metadata #
################
# NOTE: The dataset has to be written BEFORE metadata are added. Otherwise, metadata are not written.
if not ds_src:
raise Exception('Error reading file: ' + gdal.GetLastErrorMsg())
ds_out = gdal.Open(out_path, gdal.GA_Update)
#########################################
# write output dataset and set metadata #
#########################################
try:
if not ds_out:
raise Exception('Error reading file: ' + gdal.GetLastErrorMsg())
# disable to write separate metadata XML files
os.environ['GDAL_PAM_ENABLED'] = 'NO'
# ENVI #
########
if fmt == 'ENVI':
# NOTE: The dataset has to be written BEFORE metadata are added. Otherwise, metadata are not written.
# write ds_src to disk and re-open it to add the metadata
gdal.Translate(out_path, ds_src, format=fmt, creationOptions=creationOptions)
del ds_src
ds_out = gdal.Open(out_path, gdal.GA_Update)
for bidx in range(self.bands):
band = ds_out.GetRasterBand(bidx + 1)
......@@ -1065,24 +1051,22 @@ class GeoArray(object):
band.SetDescription(bandname)
assert band.GetDescription() == bandname
# NOTE: band-specific nodata values are not supported by the ENVI header format
if 'nodata' in envi_metadict:
nodataVal = self.metadata.band_meta['nodata'][bidx]
band.SetNoDataValue(nodataVal)
assert band.GetNoDataValue() == nodataVal
del band
# avoid that band names are written to global meta
if 'band_names' in envi_metadict:
del envi_metadict['band_names']
# the expected key name is 'data_ignore_value', see below
if 'nodata' in envi_metadict:
del envi_metadict['nodata']
# set data_ignore_value in case self.metadata.band_meta contains a unique nodata value
if 'nodata' in self.metadata.band_meta and len(set(self.metadata.band_meta['nodata'])) == 1:
envi_metadict['data_ignore_value'] = str(self.metadata.band_meta['nodata'][0])
if 'nodata' in self.metadata.band_meta:
if len(set(self.metadata.band_meta['nodata'])) == 1:
envi_metadict['data_ignore_value'] = str(self.metadata.band_meta['nodata'][0])
else:
warnings.warn("Band-specific nodata values are not supported by the ENVI header format.")
ds_out.SetMetadata(envi_metadict, 'ENVI')
......@@ -1090,44 +1074,60 @@ class GeoArray(object):
ds_out.SetDescription(envi_metadict['description'])
ds_out.FlushCache()
gdal.Unlink(out_path + '.aux.xml')
# gdal.Unlink(out_path + '.aux.xml')
elif self.metadata.all_meta:
# set global domain metadata
if self.metadata.global_meta:
ds_out.SetMetadata(dict((k, repr(v)) for k, v in self.metadata.global_meta.items()))
else:
ds_out = ds_src
if 'description' in envi_metadict:
ds_out.SetDescription(envi_metadict['description'])
# set metadata
if self.metadata.all_meta:
# set band domain metadata
bandmeta_dict = self.metadata.to_DataFrame().astype(str).to_dict()
# set global domain metadata
if self.metadata.global_meta:
ds_out.SetMetadata(dict((k, repr(v)) for k, v in self.metadata.global_meta.items()))
for bidx in range(self.bands):
band = ds_out.GetRasterBand(bidx + 1)
bandmeta = bandmeta_dict[bidx].copy()
if 'description' in envi_metadict:
ds_out.SetDescription(envi_metadict['description'])
# filter global metadata out
bandmeta = {k: v for k, v in bandmeta.items() if k not in self.metadata.global_meta}
# meta2write = dict((k, repr(v)) for k, v in self.metadata.band_meta.items() if v is not np.nan)
# set band domain metadata
bandmeta_dict = self.metadata.to_DataFrame().astype(str).to_dict()
if 'band_names' in bandmeta:
band.SetDescription(self.metadata.band_meta['band_names'][bidx].strip())
del bandmeta['band_names']
for bidx in range(self.bands):
band = ds_out.GetRasterBand(bidx + 1)
bandmeta = bandmeta_dict[bidx].copy()
if 'nodata' in bandmeta:
band.SetNoDataValue(self.metadata.band_meta['nodata'][bidx])
del bandmeta['nodata']
# filter global metadata out
bandmeta = {k: v for k, v in bandmeta.items() if k not in self.metadata.global_meta}
# meta2write = dict((k, repr(v)) for k, v in self.metadata.band_meta.items() if v is not np.nan)
if bandmeta:
band.SetMetadata(bandmeta)
if 'band_names' in bandmeta:
band.SetDescription(self.metadata.band_meta['band_names'][bidx].strip())
del bandmeta['band_names']
band.FlushCache()
del band
if 'nodata' in bandmeta:
band.SetNoDataValue(self.metadata.band_meta['nodata'][bidx])
del bandmeta['nodata']
ds_out.FlushCache()
if bandmeta:
band.SetMetadata(bandmeta)
band.FlushCache()
del band
ds_out.FlushCache()
# write ds_out to disk,
# -> writes the in-memory array or transforms the linked dataset into the target format
gdal.Translate(out_path, ds_out, format=fmt, creationOptions=creationOptions)
del ds_src
finally:
if 'GDAL_PAM_ENABLED' in os.environ:
del os.environ['GDAL_PAM_ENABLED']
if not os.path.exists(out_path):
raise Exception(gdal.GetLastErrorMsg())
del ds_out
def dump(self, out_path):
......
No preview for this file type
ENVI
description = {
File Resize Result, x resize factor: 1.000000, y resize factor: 1.000000.
[Wed Aug 08 13:35:48 2018]}
File Resize Result, x resize factor: 1.000000, y resize factor: 1.000000. [Wed Aug 08 13:35:48 2018]}
samples = 5
lines = 5
bands = 10
......@@ -9,32 +8,65 @@ header offset = 0
file type = ENVI Standard
data type = 2
interleave = bsq
sensor type = Unknown
byte order = 0
x start = 350
y start = 10500
map info = {UTM, 1.000, 1.000, 287232.433, 4080098.314, 1.4200000000e+001, 1.4200000000e+001, 11, North, WGS-84, units=Meters, rotation=-12.00000000}
map info = {UTM, 1, 1, 287232.433, 4080098.314, 14.2, 14.2, 11, North,WGS-84, rotation=-12}
coordinate system string = {PROJCS["UTM_Zone_11N",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",500000.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",-117.0],PARAMETER["Scale_Factor",0.9996],PARAMETER["Latitude_Of_Origin",0.0],UNIT["Meter",1.0]]}
wavelength units = Nanometers
band names = {
Resize (Band 5:f141006t01p00r16_refl_BSQ.bsq),
Resize (Band 6:f141006t01p00r16_refl_BSQ.bsq),
Resize (Band 7:f141006t01p00r16_refl_BSQ.bsq),
Resize (Band 8:f141006t01p00r16_refl_BSQ.bsq),
Resize (Band 9:f141006t01p00r16_refl_BSQ.bsq),
Resize (Band 10:f141006t01p00r16_refl_BSQ.bsq),
Resize (Band 11:f141006t01p00r16_refl_BSQ.bsq),
Resize (Band 12:f141006t01p00r16_refl_BSQ.bsq),
Resize (Band 13:f141006t01p00r16_refl_BSQ.bsq),
Resize (Band 14:f141006t01p00r16_refl_BSQ.bsq)}
wavelength = {
404.596649, 414.278656, 423.964661, 433.654663, 443.349670, 453.049652,
462.752655, 472.460663, 482.173645, 491.890656}
fwhm = {
9.645100, 9.599000, 9.555200, 9.513600, 9.474300, 9.437300, 9.402500,
9.370000, 9.339700, 9.311700}
bbl = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
correction factors = {
1.10059, 1.08712, 1.0534, 0.99914, 0.98411, 0.99055, 0.96899, 0.98119,
0.9779, 0.97234}
Resize (Band 5:f141006t01p00r16_refl_BSQ.bsq),
Resize (Band 6:f141006t01p00r16_refl_BSQ.bsq),
Resize (Band 7:f141006t01p00r16_refl_BSQ.bsq),
Resize (Band 8:f141006t01p00r16_refl_BSQ.bsq),
Resize (Band 9:f141006t01p00r16_refl_BSQ.bsq),
Resize (Band 10:f141006t01p00r16_refl_BSQ.bsq),
Resize (Band 11:f141006t01p00r16_refl_BSQ.bsq),
Resize (Band 12:f141006t01p00r16_refl_BSQ.bsq),
Resize (Band 13:f141006t01p00r16_refl_BSQ.bsq),
Resize (Band 14:f141006t01p00r16_refl_BSQ.bsq)}
bbl = { 1,
1,
1,
1,
1,
1,
1,
1,
1,
1 }
corner coordinates lonlat = [[72.45254791675194, 40.30295607534694], [72.74642602223325, 40.30905731897239], [72.46095641250194, 40.07798579894774], [72.75386662265068, 40.08403894986721]]
correction factors = { 1.100590,
1.087120,
1.053400,
0.999140,
0.984110,
0.990550,
0.968990,
0.981190,
0.977900,
0.972340 }
data gain values = 0.010000, 0.010000
data offset values = 0, 0
EarthSunDist = 1.005349
fwhm = { 9.645100,
9.599000,
9.555200,
9.513600,
9.474300,
9.437300,
9.402500,
9.370000,
9.339700,
9.311700 }
sensor type = Unknown
wavelength = { 404.596649,
414.278656,
423.964661,
433.654663,
443.349670,
453.049652,
462.752655,
472.460663,
482.173645,
491.890656 }
wavelength units = Nanometers
x start = 350
y start = 10500
......@@ -34,6 +34,9 @@ import tempfile
import os
from collections import OrderedDict
from typing import Iterable
from tempfile import TemporaryDirectory
from difflib import unified_diff
from subprocess import getoutput
import numpy as np
from shapely.geometry import Polygon
......@@ -52,18 +55,21 @@ __author__ = ["Daniel Scheffler", "Jessica Palka"]
# Path of the tests-directory in the geoarray-package.
tests_path = os.path.abspath(os.path.join(geoarray_root, "..", ".."))
path_data = os.path.join(tests_path, "tests", "data", "L8_2bands_extract10x11.tif")
path_data_tif = os.path.join(tests_path, "tests", "data", "L8_2bands_extract10x11.tif")
path_subset_bsq = os.path.join(tests_path, "tests", "data", "subset_metadata.bsq")
path_subset_hdr = os.path.join(tests_path, "tests", "data", "subset_metadata.hdr")
path_subset_tif = os.path.join(tests_path, "tests", "data", "subset_metadata.tif")
# disable matplotlib figure popups
matplotlib.use('Template') # disables matplotlib figure popups
def _get_gA_inMem_notInMem():
gA_inMem = GeoArray(path_data)
def _get_gA_inMem_notInMem(path_file=path_data_tif):
gA_inMem = GeoArray(path_file)
gA_inMem.to_mem()
gA_inMem.filePath = ""
gA_notInMem = GeoArray(path_data)
gA_notInMem = GeoArray(path_file)
params_inMem_NotInMem = [("inMem", gA_inMem),
("notInMem", gA_notInMem)]
......@@ -84,10 +90,10 @@ class Test_GeoArray(TestCase):
with tempfile.TemporaryDirectory() as td:
path_zip = os.path.join(td, 'temp_archive.zip')
file_basename = os.path.basename(path_data)
file_basename = os.path.basename(path_data_tif)
with zipfile.ZipFile(path_zip, 'w') as zf:
zf.write(path_data, file_basename)
zf.write(path_data_tif, file_basename)
gA = GeoArray(f'/vsizip/{path_zip}/{file_basename}')
self.assertIsInstance(gA[:], np.ndarray)
......@@ -482,10 +488,49 @@ class Test_GeoArray(TestCase):
# TODO validate written metadata (inMem, notInMem, in case a notInMem dataset already has custom metadata)
@parameterized.expand(_get_gA_inMem_notInMem(path_subset_bsq))
def test_save_meta_ENVI(self, _, gA):
with TemporaryDirectory(prefix='geoarray__') as td:
p_out = os.path.join(td, 'outfile.bsq')
gA.save(p_out, fmt='ENVI')
# compare gdalinfo output
gdalinfo_out = [i + '\n' for i in getoutput(f'gdalinfo -mdd ENVI {p_out}').split('\n')]
gdalinfo_orig = [i + '\n' for i in getoutput(f'gdalinfo -mdd ENVI {path_subset_bsq}').split('\n')]
diff = ''.join(list(unified_diff(gdalinfo_orig[3:], gdalinfo_out[3:])))
self.assertFalse(diff)
# compare header files
with open(p_out[:-4] + '.hdr', 'r') as inF:
new = inF.readlines()
with open(path_subset_hdr, 'r') as inF:
old = inF.readlines()
diff = ''.join(list(unified_diff(old[3:], new[3:])))
self.assertFalse(diff)
# os.system(f'gdalinfo {p_out}')
@parameterized.expand(_get_gA_inMem_notInMem(path_subset_tif))
def test_save_meta_GTiff(self, _, gA):
with TemporaryDirectory(prefix='geoarray__') as td:
p_out = os.path.join(td, 'outfile.tif')
gA.save(p_out, fmt='GTiff')
# compare gdalinfo output
gdalinfo_out = [i + '\n' for i in getoutput(f'gdalinfo {p_out}').split('\n')]
gdalinfo_orig = [i + '\n' for i in getoutput(f'gdalinfo {path_subset_tif}').split('\n')]
diff = ''.join(list(unified_diff(gdalinfo_orig[2:], gdalinfo_out[2:])))
self.assertFalse(diff)
# os.system(f'gdalinfo {p_out}')
def test_show(self):
# test 3D case
self.gA.show()
self.gA.show(interactive=True) # only works if test is started with ipython.
self.gA.show(interactive=True) # only works if test is started with IPython.
# test 2D case
gA = GeoArray(self.gA[:, :, 0])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment