Skip to content
This repository has been archived by the owner on Nov 12, 2024. It is now read-only.

Commit

Permalink
try creating zarr group and arrays explicitly
Browse files Browse the repository at this point in the history
  • Loading branch information
TomNicholas committed Sep 27, 2024
1 parent 93080b3 commit bebf370
Showing 1 changed file with 29 additions and 20 deletions.
49 changes: 29 additions & 20 deletions virtualizarr/writers/icechunk.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import asyncio
from typing import TYPE_CHECKING

import numpy as np
import zarr
from xarray import Dataset
from xarray.core.variable import Variable
import zarr

from virtualizarr.manifests import ManifestArray, ChunkManifest
from virtualizarr.manifests import ChunkManifest, ManifestArray

if TYPE_CHECKING:
from icechunk import IcechunkStore
Expand All @@ -28,13 +27,16 @@ def dataset_to_icechunk(ds: Dataset, store: "IcechunkStore") -> None:
if not isinstance(store, IcechunkStore):
raise TypeError(f"expected type IcechunkStore, but got type {type(store)}")

# TODO write group metadata
# TODO only supports writing to the root group currently
root_group = zarr.group(store=store, overwrite=True)

# TODO this is Frozen, the API for setting attributes must be something else
# root_group.attrs = ds.attrs

for name, var in ds.variables.items():
write_variable_to_icechunk(
store=store,
# TODO is this right?
group="root",
group=root_group,
name=name,
var=var,
)
Expand All @@ -44,7 +46,7 @@ def dataset_to_icechunk(ds: Dataset, store: "IcechunkStore") -> None:

def write_variable_to_icechunk(
store: "IcechunkStore",
group: str,
group: zarr.Group,
name: str,
var: Variable,
) -> None:
Expand All @@ -55,25 +57,28 @@ def write_variable_to_icechunk(
ma = var.data
zarray = ma.zarray

# TODO how do we set the other zarr attributes? i.e. the .zarray information?
# Probably need to manually create the groups and arrays in the store...
# Would that just be re-implementing xarray's `.to_zarr()` though?
array = zarr.Array.create(store, shape=zarray.shape, chunk_shape=zarray.chunks, dtype=zarray.dtype)

# TODO when I try to create this array I get an AssertionError from inside Zarr v3
# TODO do I need this array object for anything after ensuring the array has been created?
# array = group.create_array(
# store,
# shape=zarray.shape,
# chunk_shape=zarray.chunks,
# dtype=zarray.dtype,
# )

# TODO we also need to set zarr attributes, including DIMENSION_NAMES

write_manifest_virtual_refs(
store=store,
group=group,
name=name,
store=store,
group=group,
name=name,
manifest=ma.manifest,
)


def write_manifest_virtual_refs(
store: "IcechunkStore",
group: str,
store: "IcechunkStore",
group: zarr.Group,
name: str,
manifest: ChunkManifest,
) -> None:
Expand All @@ -84,10 +89,14 @@ def write_manifest_virtual_refs(
# but Icechunk need to expose a suitable API first
it = np.nditer(
[manifest._paths, manifest._offsets, manifest._lengths],
flags=["refs_ok", "multi_index", "c_index"], # TODO is "c_index" correct? what's the convention for zarr chunk keys?
op_flags=[['readonly']] * 3
flags=[
"refs_ok",
"multi_index",
"c_index",
], # TODO is "c_index" correct? what's the convention for zarr chunk keys?
op_flags=[["readonly"]] * 3,
)
for (path, offset, length) in it:
for path, offset, length in it:
index = it.multi_index
chunk_key = "/".join(str(i) for i in index)

Expand Down

0 comments on commit bebf370

Please sign in to comment.