From 0fc41255e1f41064a4e398f0f30f46f5db077cf9 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 27 Oct 2025 17:12:06 +0000 Subject: [PATCH 1/6] dev --- cf/test/test_UGRID.py | 140 ++++++++++++++++++++++++++++++++----- cf/test/test_read_write.py | 7 +- 2 files changed, 124 insertions(+), 23 deletions(-) diff --git a/cf/test/test_UGRID.py b/cf/test/test_UGRID.py index 7d80ba3166..103faa78e8 100644 --- a/cf/test/test_UGRID.py +++ b/cf/test/test_UGRID.py @@ -1,10 +1,12 @@ import atexit import datetime import faulthandler +import itertools import os import tempfile import unittest +import netCDF4 import numpy as np faulthandler.enable() # to debug seg faults and timeouts @@ -14,12 +16,12 @@ warnings = False # Set up temporary files -n_tmpfiles = 1 +n_tmpfiles = 2 tmpfiles = [ - tempfile.mkstemp("_test_read_write.nc", dir=os.getcwd())[1] + tempfile.mkstemp("_test_ugrid.nc", dir=os.getcwd())[1] for i in range(n_tmpfiles) ] -[tmpfile1] = tmpfiles +[tmpfile, tmpfile1] = tmpfiles def _remove_tmpfiles(): @@ -34,6 +36,22 @@ def _remove_tmpfiles(): atexit.register(_remove_tmpfiles) +def n_mesh_variables(filename): + """Return the number of mesh variables in the file.""" + nc = netCDF4.Dataset(filename, "r") + n = 0 + for v in nc.variables.values(): + try: + v.getncattr("topology_dimension") + except AttributeError: + pass + else: + n += 1 + + nc.close() + return n + + class UGRIDTest(unittest.TestCase): """Test UGRID field constructs.""" @@ -76,10 +94,6 @@ def test_UGRID_read(self): g.cell_connectivity().get_connectivity(), "edge" ) - # Check that all fields have the same mesh id - mesh_ids1 = set(g.get_mesh_id() for g in f1) - self.assertEqual(len(mesh_ids1), 1) - f2 = cf.read(self.filename2) self.assertEqual(len(f2), 3) for g in f2: @@ -98,13 +112,6 @@ def test_UGRID_read(self): g.cell_connectivity().get_connectivity(), "edge" ) - # Check that all fields have the same mesh id - mesh_ids2 = set(g.get_mesh_id() for g in f2) - self.assertEqual(len(mesh_ids2), 1) - - # Check that the different files have different mesh ids - self.assertNotEqual(mesh_ids1, mesh_ids2) - def test_UGRID_data(self): """Test reading of UGRID data.""" node1, face1, edge1 = cf.read(self.filename1) @@ -177,9 +184,108 @@ def test_read_UGRID_domain(self): g.cell_connectivity().get_connectivity(), "edge" ) - # Check that all domains have the same mesh id - mesh_ids1 = set(g.get_mesh_id() for g in d1) - self.assertEqual(len(mesh_ids1), 1) + def test_read_write_UGRID_field(self): + """Test the cf.read and cf.write with UGRID fields.""" + # Face, edge, and point fields that are all part of the same + # UGRID mesh + ugrid = cf.example_fields(8, 9, 10) + + face, edge, point = (0, 1, 2) + + tmpfile = "tmpfileu.nc" + # Test for equality with the fields defined in memory. Only + # works for face and edge fields. + for cell in (face, edge): + f = ugrid[cell] + cf.write(f, tmpfile) + g = cf.read(tmpfile) + self.assertEqual(len(g), 1) + self.assertTrue(g[0].equals(f)) + + # Test round-tripping fields with multiple fields + # + # Get the indices of 'ugrid' for all possible combinations of + # fields: + # + # combinations = [(0,), (1,), ..., (2, 0, 1), (2, 1, 0)] + combinations = [ + i + for n in range(1, 4) + for i in itertools.permutations([face, edge, point], n) + ] + + for cells in combinations: + f = [] + for cell in cells: + f.append(ugrid[cell]) + + cf.write(f, tmpfile) + + # Check that there's only one mesh variable in the file + self.assertEqual(n_mesh_variables(tmpfile), 1) + + g = cf.read(tmpfile) + self.assertEqual(len(g), len(f)) + + cf.write(g, tmpfile1) + + # Check that there's only one mesh variable in the file + self.assertEqual(n_mesh_variables(tmpfile1), 1) + + h = cf.read(tmpfile1) + self.assertEqual(len(h), len(g)) + self.assertTrue(h[0].equals(g[0])) + + def test_read_write_UGRID_domain(self): + """Test the cf.read and cf.write with UGRID domains.""" + # Face, edge, and point fields/domains that are all part of + # the same UGRID mesh + ugrid = [f.domain for f in cf.example_fields(8, 9, 10)] + + face, edge, point = (0, 1, 2) + + # Test for equality with the fields defined in memory. Only + # works for face and edge domains. + for cell in (face, edge): + d = ugrid[cell] + cf.write(d, tmpfile) + e = cf.read(tmpfile, domain=True) + self.assertEqual(len(e), 2) + self.assertTrue(e[0].equals(d)) + self.assertEqual(e[1].domain_topology().get_cell(), "point") + + # Test round-tripping fields with all three domains + # + # combinations = [(0, 1, 2), (0, 2, 1), ..., (2, 0, 1), (2, 1, 0)] + combinations = list(itertools.permutations([face, edge, point], 3)) + for cells in combinations: + d = [] + for cell in cells: + d.append(ugrid[cell]) + + cf.write(d, tmpfile) + + # Check that there's only one mesh variable in the file + self.assertEqual(n_mesh_variables(tmpfile), 1) + + e = cf.read(tmpfile, domain=True) + + self.assertEqual(len(e), len(d)) + + cf.write(e, tmpfile1) + + # Check that there's only one mesh variable in the file + self.assertEqual(n_mesh_variables(tmpfile1), 1) + + f = cf.read(tmpfile1, domain=True) + self.assertEqual(len(f), len(e)) + for i, j in zip(f, e): + self.assertTrue(i.equals(j)) + + # Note: Other combintations of domain read/write are tricky, + # because the mesh variable *and* the domain variable in + # the dataset *both* define domains. Let's not worry + # about that now! if __name__ == "__main__": diff --git a/cf/test/test_read_write.py b/cf/test/test_read_write.py index 93d58c1d2e..b7bae5fd1f 100644 --- a/cf/test/test_read_write.py +++ b/cf/test/test_read_write.py @@ -346,9 +346,6 @@ def test_write_netcdf_mode(self): if fmt == "NETCDF4_CLASSIC" and ex_field_n in (6, 7): continue - print( - "TODOUGRID: excluding example fields 8, 9, 10 until writing UGRID is enabled" - ) if ex_field_n in (8, 9, 10): continue @@ -420,9 +417,7 @@ def test_write_netcdf_mode(self): # Now do the same test, but appending all of the example fields in # one operation rather than one at a time, to check that it works. cf.write(g, tmpfile, fmt=fmt, mode="w") # 1. overwrite to wipe - print( - "TODOUGRID: excluding example fields 8, 9, 10 until writing UGRID is enabled" - ) + append_ex_fields = cf.example_fields(0, 1, 2, 3, 4, 5, 6, 7) del append_ex_fields[1] # note: can remove after Issue #141 closed if fmt in "NETCDF4_CLASSIC": From c05bc58404bde1e1136646e9de1612ededa24a19 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 29 Oct 2025 12:45:23 +0000 Subject: [PATCH 2/6] ugrid_3.nc --- cf/test/create_test_files.py | 145 +++++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) diff --git a/cf/test/create_test_files.py b/cf/test/create_test_files.py index e2b6cf6f48..604f38c787 100644 --- a/cf/test/create_test_files.py +++ b/cf/test/create_test_files.py @@ -2228,6 +2228,150 @@ def _make_ugrid_2(filename): return filename +def _make_ugrid_3(filename): + """Create a UGRID mesh topology and no fields/domains.""" + n = netCDF4.Dataset(filename, "w") + + n.Conventions = f"CF-{VN}" + + n.createDimension("nMesh3_node", 7) + n.createDimension("nMesh3_edge", 9) + n.createDimension("nMesh3_face", 3) + n.createDimension("connectivity2", 2) + n.createDimension("connectivity4", 4) + n.createDimension("connectivity5", 5) + + Mesh3 = n.createVariable("Mesh3", "i4", ()) + Mesh3.cf_role = "mesh_topology" + Mesh3.topology_dimension = 2 + Mesh3.node_coordinates = "Mesh3_node_x Mesh3_node_y" + Mesh3.face_node_connectivity = "Mesh3_face_nodes" + Mesh3.edge_node_connectivity = "Mesh3_edge_nodes" + Mesh3.face_dimension = "nMesh3_face" + Mesh3.edge_dimension = "nMesh3_edge" + Mesh3.face_face_connectivity = "Mesh3_face_links" + Mesh3.edge_edge_connectivity = "Mesh3_edge_links" + + # Node + Mesh3_node_x = n.createVariable("Mesh3_node_x", "f4", ("nMesh3_node",)) + Mesh3_node_x.standard_name = "longitude" + Mesh3_node_x.units = "degrees_east" + Mesh3_node_x[...] = [-45, -43, -45, -43, -45, -43, -40] + + Mesh3_node_y = n.createVariable("Mesh3_node_y", "f4", ("nMesh3_node",)) + Mesh3_node_y.standard_name = "latitude" + Mesh3_node_y.units = "degrees_north" + Mesh3_node_y[...] = [35, 35, 33, 33, 31, 31, 34] + + Mesh3_edge_nodes = n.createVariable( + "Mesh3_edge_nodes", "i4", ("nMesh3_edge", "connectivity2") + ) + Mesh3_edge_nodes.long_name = "Maps every edge to its two nodes" + Mesh3_edge_nodes[...] = [ + [1, 6], + [3, 6], + [3, 1], + [0, 1], + [2, 0], + [2, 3], + [2, 4], + [5, 4], + [3, 5], + ] + + # Face + Mesh3_face_x = n.createVariable( + "Mesh3_face_x", "f8", ("nMesh3_face",), fill_value=-99 + ) + Mesh3_face_x.standard_name = "longitude" + Mesh3_face_x.units = "degrees_east" + Mesh3_face_x[...] = [-44, -44, -42] + + Mesh3_face_y = n.createVariable( + "Mesh3_face_y", "f8", ("nMesh3_face",), fill_value=-99 + ) + Mesh3_face_y.standard_name = "latitude" + Mesh3_face_y.units = "degrees_north" + Mesh3_face_y[...] = [34, 32, 34] + + Mesh3_face_nodes = n.createVariable( + "Mesh3_face_nodes", + "i4", + ("nMesh3_face", "connectivity4"), + fill_value=-99, + ) + Mesh3_face_nodes.long_name = "Maps every face to its corner nodes" + Mesh3_face_nodes[...] = [[2, 3, 1, 0], [4, 5, 3, 2], [6, 1, 3, -99]] + + Mesh3_face_links = n.createVariable( + "Mesh3_face_links", + "i4", + ("nMesh3_face", "connectivity4"), + fill_value=-99, + ) + Mesh3_face_links.long_name = "neighbour faces for faces" + Mesh3_face_links[...] = [ + [1, 2, -99, -99], + [0, -99, -99, -99], + [0, -99, -99, -99], + ] + + # Edge + Mesh3_edge_x = n.createVariable( + "Mesh3_edge_x", "f8", ("nMesh3_edge",), fill_value=-99 + ) + Mesh3_edge_x.standard_name = "longitude" + Mesh3_edge_x.units = "degrees_east" + Mesh3_edge_x[...] = [-41.5, -41.5, -43, -44, -45, -44, -45, -44, -43] + + Mesh3_edge_y = n.createVariable( + "Mesh3_edge_y", "f8", ("nMesh3_edge",), fill_value=-99 + ) + Mesh3_edge_y.standard_name = "latitude" + Mesh3_edge_y.units = "degrees_north" + Mesh3_edge_y[...] = [34.5, 33.5, 34, 35, 34, 33, 32, 31, 32] + + Mesh3_edge_links = n.createVariable( + "Mesh3_edge_links", + "i4", + ("nMesh3_edge", "connectivity5"), + fill_value=-99, + ) + Mesh3_edge_links.long_name = "neighbour edges for edges" + Mesh3_edge_links[...] = [ + [1, 2, 3, -99, -99], + [0, 2, 5, 8, -99], + [3, 0, 1, 5, 8], + [4, 2, 0, -99, -99], + [ + 3, + 5, + 6, + -99, + -99, + ], + [4, 6, 2, 1, 8], + [ + 4, + 5, + 7, + -99, + -99, + ], + [ + 6, + 8, + -99, + -99, + -99, + ], + [7, 5, 2, 1, -99], + ] + + n.close() + return filename + + def _make_aggregation_value(filename): """Create an aggregation variable with 'unique_values'.""" n = netCDF4.Dataset(filename, "w") @@ -2341,6 +2485,7 @@ def _make_aggregation_value(filename): ugrid_1 = _make_ugrid_1("ugrid_1.nc") ugrid_2 = _make_ugrid_2("ugrid_2.nc") +ugrid_3 = _make_ugrid_3("ugrid_3.nc") aggregation_value = _make_aggregation_value("aggregation_value.nc") From ed45ae8af8a1a1015537eabdf8ef25d00617427d Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 29 Oct 2025 13:36:48 +0000 Subject: [PATCH 3/6] dev --- cf/test/test_UGRID.py | 86 +++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 39 deletions(-) diff --git a/cf/test/test_UGRID.py b/cf/test/test_UGRID.py index 103faa78e8..848da10593 100644 --- a/cf/test/test_UGRID.py +++ b/cf/test/test_UGRID.py @@ -52,6 +52,15 @@ def n_mesh_variables(filename): return n +def combinations(face, edge, point): + """Return combinations for field/domain indexing.""" + return [ + i + for n in range(1, 4) + for i in itertools.permutations([face, edge, point], n) + ] + + class UGRIDTest(unittest.TestCase): """Test UGRID field constructs.""" @@ -63,6 +72,10 @@ class UGRIDTest(unittest.TestCase): os.path.dirname(os.path.abspath(__file__)), "ugrid_2.nc" ) + filename3 = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "ugrid_3.nc" + ) + def setUp(self): """Preparations called immediately before each test method.""" # Disable log messages to silence expected warnings @@ -189,7 +202,6 @@ def test_read_write_UGRID_field(self): # Face, edge, and point fields that are all part of the same # UGRID mesh ugrid = cf.example_fields(8, 9, 10) - face, edge, point = (0, 1, 2) tmpfile = "tmpfileu.nc" @@ -202,19 +214,8 @@ def test_read_write_UGRID_field(self): self.assertEqual(len(g), 1) self.assertTrue(g[0].equals(f)) - # Test round-tripping fields with multiple fields - # - # Get the indices of 'ugrid' for all possible combinations of - # fields: - # - # combinations = [(0,), (1,), ..., (2, 0, 1), (2, 1, 0)] - combinations = [ - i - for n in range(1, 4) - for i in itertools.permutations([face, edge, point], n) - ] - - for cells in combinations: + # Test round-tripping of field combinations + for cells in combinations(face, edge, point): f = [] for cell in cells: f.append(ugrid[cell]) @@ -241,7 +242,6 @@ def test_read_write_UGRID_domain(self): # Face, edge, and point fields/domains that are all part of # the same UGRID mesh ugrid = [f.domain for f in cf.example_fields(8, 9, 10)] - face, edge, point = (0, 1, 2) # Test for equality with the fields defined in memory. Only @@ -254,38 +254,46 @@ def test_read_write_UGRID_domain(self): self.assertTrue(e[0].equals(d)) self.assertEqual(e[1].domain_topology().get_cell(), "point") - # Test round-tripping fields with all three domains - # - # combinations = [(0, 1, 2), (0, 2, 1), ..., (2, 0, 1), (2, 1, 0)] - combinations = list(itertools.permutations([face, edge, point], 3)) - for cells in combinations: - d = [] - for cell in cells: - d.append(ugrid[cell]) + # Test round-tripping of domain combinations for the + # example_field domains, and also the domain read from + # 'ugrid_3.nc'. + for iteration in ("memory", "file"): + for cells in combinations(face, edge, point): + d = [] + for cell in cells: + d.append(ugrid[cell]) - cf.write(d, tmpfile) + if point not in cells: + # When we write a non-point domains, we also get + # the point locations. + d.append(ugrid[point]) + elif cells == (point,): + # When we write a point domain on its own, we also + # get the edge location. + d.append(ugrid[edge]) - # Check that there's only one mesh variable in the file - self.assertEqual(n_mesh_variables(tmpfile), 1) + cf.write(d, tmpfile) - e = cf.read(tmpfile, domain=True) + # Check that there's only one mesh variable in the file + self.assertEqual(n_mesh_variables(tmpfile), 1) - self.assertEqual(len(e), len(d)) + e = cf.read(tmpfile, domain=True) - cf.write(e, tmpfile1) + self.assertEqual(len(e), len(d)) - # Check that there's only one mesh variable in the file - self.assertEqual(n_mesh_variables(tmpfile1), 1) + cf.write(e, tmpfile1) + + # Check that there's only one mesh variable in the file + self.assertEqual(n_mesh_variables(tmpfile1), 1) - f = cf.read(tmpfile1, domain=True) - self.assertEqual(len(f), len(e)) - for i, j in zip(f, e): - self.assertTrue(i.equals(j)) + f = cf.read(tmpfile1, domain=True) + self.assertEqual(len(f), len(e)) + for i, j in zip(f, e): + self.assertTrue(i.equals(j)) - # Note: Other combintations of domain read/write are tricky, - # because the mesh variable *and* the domain variable in - # the dataset *both* define domains. Let's not worry - # about that now! + # Set up for the 'file' iteration + ugrid = cf.read(self.filename3, domain=True) + face, edge, point = (2, 1, 0) if __name__ == "__main__": From 020c2ddbce56dc4aa87b4e9a752db22374a6a1ba Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 17 Nov 2025 15:06:54 +0000 Subject: [PATCH 4/6] dev --- cf/test/test_read_write.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cf/test/test_read_write.py b/cf/test/test_read_write.py index b7bae5fd1f..23ae7956e4 100644 --- a/cf/test/test_read_write.py +++ b/cf/test/test_read_write.py @@ -346,6 +346,8 @@ def test_write_netcdf_mode(self): if fmt == "NETCDF4_CLASSIC" and ex_field_n in (6, 7): continue + # Exclude UGRID fields, as we deal with them in + # test_UGRID.py if ex_field_n in (8, 9, 10): continue @@ -418,6 +420,8 @@ def test_write_netcdf_mode(self): # one operation rather than one at a time, to check that it works. cf.write(g, tmpfile, fmt=fmt, mode="w") # 1. overwrite to wipe + # Exclude UGRID fields, as we deal with them in + # test_UGRID.py append_ex_fields = cf.example_fields(0, 1, 2, 3, 4, 5, 6, 7) del append_ex_fields[1] # note: can remove after Issue #141 closed if fmt in "NETCDF4_CLASSIC": From 3f0ce97452ca43f3ed5e470347d53a43ba25d43b Mon Sep 17 00:00:00 2001 From: David Hassell Date: Tue, 24 Mar 2026 14:26:31 +0000 Subject: [PATCH 5/6] dev --- Changelog.rst | 4 ++ cf/read_write/read.py | 7 ++- cf/test/example_field_0.kerchunk | 1 + cf/test/test_kerchunk.py | 99 ++++++++++++++++++++++++++++++++ docs/source/installation.rst | 3 + docs/source/introduction.rst | 4 +- docs/source/tutorial.rst | 6 +- setup.py | 2 +- 8 files changed, 117 insertions(+), 9 deletions(-) create mode 100644 cf/test/example_field_0.kerchunk create mode 100644 cf/test/test_kerchunk.py diff --git a/Changelog.rst b/Changelog.rst index 674bd687f7..b545cf2be8 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -3,6 +3,10 @@ Version NEXTVERSION **2026-??-??** +* Read Kerchunk datasets with `cf.read` + (https://github.com/NCAS-CMS/cf-python/936) +* Read open file handle datasets with `cf.read` + (https://github.com/NCAS-CMS/cf-python/issues/937) * New keyword to `cf.read`: ``filesystem`` (https://github.com/NCAS-CMS/cf-python/issues/931) * New keyword parameter to `cf.Data.compute`: ``persist`` diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 614f407f2f..22732550cf 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -20,8 +20,8 @@ class read(cfdm.read): """Read field or domain constructs from files. - The following file formats are supported: netCDF, CDL, Zarr, PP, - and UM fields file. + The following file formats are supported: netCDF, CDL, Zarr, + Kerchunk, PP, and UM fields file. NetCDF and Zarr datasets may be on local disk, on an OPeNDAP server, or in an S3 object store. @@ -144,7 +144,7 @@ class read(cfdm.read): :Parameters: - {{read datasets: (arbitrarily nested sequence of) `str`}} + {{read datasets:}} {{read recursive: `bool`, optional}} @@ -162,6 +162,7 @@ class read(cfdm.read): ``'netCDF'`` A netCDF-3 or netCDF-4 dataset ``'CDL'`` A text CDL file of a netCDF dataset ``'Zarr'`` A Zarr v2 (xarray) or Zarr v3 dataset + ``'Kerchunk'`` A Kerchunked dataset ``'UM'`` A UM fields file or PP dataset ============== ========================================== diff --git a/cf/test/example_field_0.kerchunk b/cf/test/example_field_0.kerchunk new file mode 100644 index 0000000000..4f8678854c --- /dev/null +++ b/cf/test/example_field_0.kerchunk @@ -0,0 +1 @@ +{"version":1,"refs":{"lat\/0":"base64:eF5jYMABDjQwNBwIcmNwCzpwgMHBAQAxqAWx","lat_bnds\/0.0":"base64:eF5jYCAVNMAZYX5+dkDg5xd2AAQcQAAAZ+II3Q==","lon\/0":"base64:eF5jYCAONDxQ2BDwwUPCLCAmOTOvqNQBCgB9ngjU","lon_bnds\/0.0":"base64:eF5jYKAyaACCBw8aGhQUHjzYsKGBwc0tLCwhIS0tJ6egoLi4jMEBFQAA\/noSOQ==","q\/0.0":["example_field_0.nc",17755,100],"q\/0.1":"base64:eF6z7v4lGfX4l4\/QxJO77ufUsaQsrTNvflvm6f88K8x6dlbngZp9PL2\/9\/DdP7LH8ZT9fKbpK0t0\/RVKEhaaSmnnfZy8qXfD5KeucfrtkxbNmLt8U8WGbbvWttkjAQAjCS4Q","q\/1.0":"base64:eF6TzNB+88v69hsGIGCZ2B5Yd\/9nIIgd9lZsR5Z\/3Q4Qm6\/mptye3iw5EFvX\/lxryfQ9rSD2JtPvryfnlbwGseeuW7yxYtHkhfOAwB4K3IEAAKyvI6g=","q\/1.1":"base64:eF6TtLaeY909x5IBCFju3z9yP+fIFhA7zN9fwf+5wgkQm6+3d0Pv7w1lILbu9OlF01cW7QOxN+XlZeR9zJgPYq\/d1DRx0qKFXfOAwB4K3IEAAGAXIr4=","time\/0":"\u0000\u0000\u0000\u0000\u0000\u0000?@",".zgroup":"{\"zarr_format\":2}",".zattrs":"{\"Conventions\":\"CF-1.12\"}","lat\/.zarray":"{\"shape\":[5],\"chunks\":[5],\"dtype\":\" + # cf.log_level('DISABLE') + + def test_kerchunk_read(self): + """Test cf.read with Kerchunk.""" + f = cf.read(self.netcdf)[0] + + k = cf.read(self.kerchunk, dask_chunks=3) + self.assertEqual(len(k), 1) + self.assertTrue(k[0].equals(f)) + self.assertGreater(k[0].data.npartitions, 1) + + k = cf.read([self.kerchunk, self.kerchunk], dask_chunks=3) + self.assertEqual(len(k), 2) + self.assertTrue(k[0].equals(k[-1])) + + k = cf.read([self.kerchunk, self.kerchunk, self.netcdf], dask_chunks=3) + self.assertEqual(len(k), 3) + self.assertTrue(k[0].equals(k[-1])) + self.assertTrue(k[1].equals(k[-1])) + + def test_kerchunk_original_filenames(self): + """Test original_filenames with Kerchunk.""" + k = cf.read(self.kerchunk)[0] + self.assertEqual(k.get_original_filenames(), set()) + + def test_read_dict(self): + """Test cf.read with an Kerchunk dictionary.""" + with open(kerchunk_file, "r") as fh: + d = json.load(fh) + + with self.assertRaises(ValueError): + cf.read(d) + + fs = fsspec.filesystem("reference", fo=d) + kerchunk = fs.get_mapper() + self.assertEqual(len(cf.read(kerchunk)), 1) + + def test_read_bytes(self): + """Test cf.read with an Kerchunk dictionary.""" + with open(kerchunk_file, "r") as fh: + d = json.load(fh) + + b = json.dumps(d).encode("utf-8") + with self.assertRaises(ValueError): + cf.read(b) + + d = json.loads(b) + fs = fsspec.filesystem("reference", fo=d) + kerchunk = fs.get_mapper() + self.assertEqual(len(cf.read(kerchunk)), 1) + + +if __name__ == "__main__": + print("Run date:", datetime.datetime.now()) + cf.environment() + print("") + unittest.main(verbosity=2) diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 7b07eb4de4..aa42495caa 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -225,6 +225,9 @@ Required * `scipy `_, version 1.10.0 or newer. +* `fsspec `_, version 2026.2.0 or + newer. + * `cfdm `_, version 1.13.1.0 or up to, but not including, 1.13.2.0. diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst index d1bbfc4dd7..baa4fc62ae 100644 --- a/docs/source/introduction.rst +++ b/docs/source/introduction.rst @@ -67,8 +67,8 @@ may nonetheless be modified in memory. The `cf` package can: * read :term:`field constructs ` and :term:`domain - constructs ` from netCDF, CDL, Zarr, PP and UM - datasets with a choice of netCDF backends, + constructs ` from netCDF, CDL, Zarr, Kerchunk, PP + and UM datasets with a choice of netCDF backends, * read files from OPeNDAP servers and S3 object stores, diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index a696524e8f..6e9ff6697e 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -139,15 +139,15 @@ The following file types can be read: .. -* `CFA-netCDF - `_ - files at version 0.6 or later. +* Datasets in `Kerchunk `_ format. .. * :ref:`PP and UM fields files `, whose contents are mapped into field constructs. +.. + Note that when reading netCDF4 files that contain :ref:`hierachical groups `, the group structure is saved via the :ref:`netCDF interface ` so that it may be re-used, diff --git a/setup.py b/setup.py index 838cd438c5..6bf2e5d0fd 100755 --- a/setup.py +++ b/setup.py @@ -178,7 +178,7 @@ def compile(): The ``cf`` package can: -* read field and domain constructs from netCDF, CDL, Zarr, PP and UM datasets, +* read field and domain constructs from netCDF, CDL, Zarr, Kerchunk, PP and UM datasets, * be fully flexible with respect to dataset storage chunking, From 6a26cd7138635329212fbe0cc6789e4cd0007870 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Tue, 24 Mar 2026 20:59:54 +0000 Subject: [PATCH 6/6] dev --- cf/read_write/um/umread.py | 42 ++++++++++++++++++++++++++++++++++++++ cf/test/test_pp.py | 9 ++++++++ 2 files changed, 51 insertions(+) diff --git a/cf/read_write/um/umread.py b/cf/read_write/um/umread.py index 049b538e9b..4a109abced 100644 --- a/cf/read_write/um/umread.py +++ b/cf/read_write/um/umread.py @@ -3549,6 +3549,13 @@ def read( "(only Field constructs)" ) + representation = self.dataset_representation(dataset) + if representation != "path": + raise NotImplementedError( + "Can't yet read Field constructs from a UM or PP " + f"{representation!r} dataset: {dataset!r}" + ) + if not _stash2standard_name: # -------------------------------------------------------- # Create the STASH code to standard_name conversion @@ -3835,6 +3842,41 @@ def dataset_open(self, filename, parse=True): parse=parse, ) + @classmethod + def dataset_representation(cls, dataset): + """Return the logical representation type of the input dataset. + + .. versionadded:: NEXTVERSION + + :Parameters: + + dataset: + The dataset. May be a string-valued path or a + file-like object. + + :Returns: + + `str` + The dataset representation: + + * ``'path'``: A string-valued path. + + * ``'file_handle'``: An open file handle (such as + returned by `fsspec.filesystem.open`) + + * ``'unknown'``: Anything else. + + """ + # Strings (Paths) + if isinstance(dataset, str): + return "path" + + # Check for a "binary stream" (file handle) + if hasattr(dataset, "read") and hasattr(dataset, "seek"): + return "file_handle" + + return "unknown" + """ Problems: diff --git a/cf/test/test_pp.py b/cf/test/test_pp.py index 08a85a4eef..d5812deca8 100644 --- a/cf/test/test_pp.py +++ b/cf/test/test_pp.py @@ -145,6 +145,15 @@ def test_PP_um_version(self): f = cf.read(self.ppfile, um={"version": "6.6.3"})[0] self.assertEqual(f.get_property("um_version"), "6.6.3") + def test_PP_file_object(self): + # Can't yet read PP/UM from file-like objects + with open(self.ppfile, "rb") as fh: + with self.assertRaises(NotImplementedError): + cf.read(fh) + + # Check that the file has been rewound + self.assertEqual(fh.tell(), 0) + if __name__ == "__main__": print("Run date:", datetime.datetime.now())