diff --git a/virtualizarr/writers/icechunk.py b/virtualizarr/writers/icechunk.py index 443ad98..e3b1361 100644 --- a/virtualizarr/writers/icechunk.py +++ b/virtualizarr/writers/icechunk.py @@ -1,12 +1,11 @@ -import asyncio from typing import TYPE_CHECKING import numpy as np +import zarr from xarray import Dataset from xarray.core.variable import Variable -import zarr -from virtualizarr.manifests import ManifestArray, ChunkManifest +from virtualizarr.manifests import ChunkManifest, ManifestArray if TYPE_CHECKING: from icechunk import IcechunkStore @@ -28,13 +27,16 @@ def dataset_to_icechunk(ds: Dataset, store: "IcechunkStore") -> None: if not isinstance(store, IcechunkStore): raise TypeError(f"expected type IcechunkStore, but got type {type(store)}") - # TODO write group metadata + # TODO only supports writing to the root group currently + root_group = zarr.group(store=store, overwrite=True) + + # TODO this is Frozen, the API for setting attributes must be something else + # root_group.attrs = ds.attrs for name, var in ds.variables.items(): write_variable_to_icechunk( store=store, - # TODO is this right? - group="root", + group=root_group, name=name, var=var, ) @@ -44,7 +46,7 @@ def dataset_to_icechunk(ds: Dataset, store: "IcechunkStore") -> None: def write_variable_to_icechunk( store: "IcechunkStore", - group: str, + group: zarr.Group, name: str, var: Variable, ) -> None: @@ -55,25 +57,28 @@ def write_variable_to_icechunk( ma = var.data zarray = ma.zarray - # TODO how do we set the other zarr attributes? i.e. the .zarray information? - # Probably need to manually create the groups and arrays in the store... - # Would that just be re-implementing xarray's `.to_zarr()` though? - array = zarr.Array.create(store, shape=zarray.shape, chunk_shape=zarray.chunks, dtype=zarray.dtype) - + # TODO when I try to create this array I get an AssertionError from inside Zarr v3 + # TODO do I need this array object for anything after ensuring the array has been created? + # array = group.create_array( + # store, + # shape=zarray.shape, + # chunk_shape=zarray.chunks, + # dtype=zarray.dtype, + # ) # TODO we also need to set zarr attributes, including DIMENSION_NAMES write_manifest_virtual_refs( - store=store, - group=group, - name=name, + store=store, + group=group, + name=name, manifest=ma.manifest, ) def write_manifest_virtual_refs( - store: "IcechunkStore", - group: str, + store: "IcechunkStore", + group: zarr.Group, name: str, manifest: ChunkManifest, ) -> None: @@ -84,10 +89,14 @@ def write_manifest_virtual_refs( # but Icechunk need to expose a suitable API first it = np.nditer( [manifest._paths, manifest._offsets, manifest._lengths], - flags=["refs_ok", "multi_index", "c_index"], # TODO is "c_index" correct? what's the convention for zarr chunk keys? - op_flags=[['readonly']] * 3 + flags=[ + "refs_ok", + "multi_index", + "c_index", + ], # TODO is "c_index" correct? what's the convention for zarr chunk keys? + op_flags=[["readonly"]] * 3, ) - for (path, offset, length) in it: + for path, offset, length in it: index = it.multi_index chunk_key = "/".join(str(i) for i in index)