From 90da488351b27a94043c4d457487c55870433462 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 13 Mar 2026 15:01:50 -0400 Subject: [PATCH 1/3] fix: validate that dask-style chunks have regular shapes --- src/zarr/core/chunk_grids.py | 13 ++++++++----- tests/test_chunk_grids.py | 9 +++++++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/zarr/core/chunk_grids.py b/src/zarr/core/chunk_grids.py index 2c7945fa64..3171c39904 100644 --- a/src/zarr/core/chunk_grids.py +++ b/src/zarr/core/chunk_grids.py @@ -126,11 +126,14 @@ def normalize_chunks(chunks: Any, shape: tuple[int, ...], typesize: int) -> tupl chunks = tuple(int(chunks) for _ in shape) # handle dask-style chunks (iterable of iterables) - if all(isinstance(c, (tuple | list)) for c in chunks): - # take first chunk size for each dimension - chunks = tuple( - c[0] for c in chunks - ) # TODO: check/error/warn for irregular chunks (e.g. if c[0] != c[1:-1]) + if all(isinstance(c, (tuple, list)) for c in chunks): + for i, c in enumerate(chunks): + if len(set(c[:-1])) > 1 or (len(c) > 1 and c[-1] > c[0]): + raise ValueError( + f"Irregular chunk sizes in dimension {i}: {tuple(c)}. " + "Only uniform chunks (with an optional smaller final chunk) are supported." + ) + chunks = tuple(c[0] for c in chunks) # handle bad dimensionality if len(chunks) > len(shape): diff --git a/tests/test_chunk_grids.py b/tests/test_chunk_grids.py index 4c69c483ae..2920b5d6f3 100644 --- a/tests/test_chunk_grids.py +++ b/tests/test_chunk_grids.py @@ -35,6 +35,10 @@ def test_guess_chunks(shape: tuple[int, ...], itemsize: int) -> None: ((30, None, None), (100, 20, 10), 1, (30, 20, 10)), ((30, 20, None), (100, 20, 10), 1, (30, 20, 10)), ((30, 20, 10), (100, 20, 10), 1, (30, 20, 10)), + # dask-style chunks (uniform with optional smaller final chunk) + (((100, 100, 100), (50, 50)), (300, 100), 1, (100, 50)), + (((100, 100, 50),), (250,), 1, (100,)), + (((100,),), (100,), 1, (100,)), # auto chunking (None, (100,), 1, (100,)), (-1, (100,), 1, (100,)), @@ -52,3 +56,8 @@ def test_normalize_chunks_errors() -> None: normalize_chunks("foo", (100,), 1) with pytest.raises(ValueError): normalize_chunks((100, 10), (100,), 1) + # dask-style irregular chunks should raise + with pytest.raises(ValueError, match="Irregular chunk sizes"): + normalize_chunks(((10, 20, 30),), (60,), 1) + with pytest.raises(ValueError, match="Irregular chunk sizes"): + normalize_chunks(((100, 100), (10, 20)), (200, 30), 1) From 293977e1a46005e530d78013fa7d5d116044a4a1 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 13 Mar 2026 15:16:29 -0400 Subject: [PATCH 2/3] Apply suggestion from @dcherian Co-authored-by: Deepak Cherian --- src/zarr/core/chunk_grids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/chunk_grids.py b/src/zarr/core/chunk_grids.py index 3171c39904..25e37e30bf 100644 --- a/src/zarr/core/chunk_grids.py +++ b/src/zarr/core/chunk_grids.py @@ -128,7 +128,7 @@ def normalize_chunks(chunks: Any, shape: tuple[int, ...], typesize: int) -> tupl # handle dask-style chunks (iterable of iterables) if all(isinstance(c, (tuple, list)) for c in chunks): for i, c in enumerate(chunks): - if len(set(c[:-1])) > 1 or (len(c) > 1 and c[-1] > c[0]): + if itertools.pairwise(c[:-2], c[1:-1]).map(lambda x, y: x == y).any()) or (len(c) > 1 and c[-1] > c[0]): raise ValueError( f"Irregular chunk sizes in dimension {i}: {tuple(c)}. " "Only uniform chunks (with an optional smaller final chunk) are supported." From 9ad42a6e4a5fe55e3b470d3af7410da0018bfce5 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 13 Mar 2026 15:19:58 -0400 Subject: [PATCH 3/3] Fix shortcircuit option --- src/zarr/core/chunk_grids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/chunk_grids.py b/src/zarr/core/chunk_grids.py index 25e37e30bf..c903eba013 100644 --- a/src/zarr/core/chunk_grids.py +++ b/src/zarr/core/chunk_grids.py @@ -128,7 +128,7 @@ def normalize_chunks(chunks: Any, shape: tuple[int, ...], typesize: int) -> tupl # handle dask-style chunks (iterable of iterables) if all(isinstance(c, (tuple, list)) for c in chunks): for i, c in enumerate(chunks): - if itertools.pairwise(c[:-2], c[1:-1]).map(lambda x, y: x == y).any()) or (len(c) > 1 and c[-1] > c[0]): + if any(x != y for x, y in itertools.pairwise(c[:-1])) or (len(c) > 1 and c[-1] > c[0]): raise ValueError( f"Irregular chunk sizes in dimension {i}: {tuple(c)}. " "Only uniform chunks (with an optional smaller final chunk) are supported."