diff --git a/.github/workflows/python-module.yml b/.github/workflows/python-module.yml index 7d43dc9..de951c6 100644 --- a/.github/workflows/python-module.yml +++ b/.github/workflows/python-module.yml @@ -128,12 +128,12 @@ jobs: cd python python -c "import wkw" pip install pytest - pytest tests + pytest tests -k "not big_read" - name: Test (non-bash) run: | cd python python -c "import wkw" - pytest tests + pytest tests -k "not big_read" - name: Publish shell: bash if: startsWith(github.event.ref, 'refs/tags') diff --git a/python/tests/test_wkw.py b/python/tests/test_wkw.py index 6133b0a..65896c0 100644 --- a/python/tests/test_wkw.py +++ b/python/tests/test_wkw.py @@ -248,6 +248,63 @@ def test_view_on_np_array(): assert np.all(data == read_data) +def test_not_too_much_data_is_written(): + def write_and_test_in_given_order(wkw_path, order): + data_shape = (35, 35, 35) + data = generate_test_data(np.uint8, data_shape, order=order) + with wkw.Dataset.create(wkw_path, wkw.Header(np.uint8)) as dataset: + dataset.write((0, 0, 0), np.ones((35, 35, 64), dtype=np.uint8)) + dataset.write((1, 2, 3), data) + + read_data = dataset.read((1, 2, 3), (35, 35, 35)) + before = dataset.read((0, 0, 0), (1, 2, 3)) + after = dataset.read((0, 0, 38), (35, 35, 26)) + + assert np.all(data == read_data) + assert np.all(before == 1) + assert np.all(after == 1) + + write_and_test_in_given_order("tests/tmp", "F") + write_and_test_in_given_order("tests/tmp2", "C") + + +def test_multiple_writes_and_reads(): + + mem_buffer = np.zeros((200, 200, 200), dtype=np.uint8, order="F") + with wkw.Dataset.create("tests/tmp", wkw.Header(np.uint8)) as dataset: + for i in range(10): + offset = np.random.randint(100, size=(3)) + size = np.random.randint(1, 100, size=(3)) + order = np.random.choice(["F", "C"]) + data = generate_test_data(np.uint8, [1] + list(size), order) + dataset.write(offset, data) + mem_buffer[ + offset[0] : offset[0] + size[0], + offset[1] : offset[1] + size[1], + offset[2] : offset[2] + size[2], + ] = data + + read_data = dataset.read((0, 0, 0), (200, 200, 200)) + assert np.all(mem_buffer == read_data) + + +def test_big_read(): + data = np.ones((10, 10, 764), order="C", dtype=np.uint8) + offset = np.array([0, 0, 640]) + bottom = (2000, 2000, 2000) + mem_buffer = np.zeros(bottom, dtype=np.uint8, order="F") + + with wkw.Dataset.create("tests/tmp", wkw.Header(np.uint8)) as dataset: + dataset.write(offset, data) + mem_buffer[ + offset[0] : offset[0] + data.shape[0], + offset[1] : offset[1] + data.shape[1], + offset[2] : offset[2] + data.shape[2], + ] = data + read_data = dataset.read((0, 0, 0), bottom) + assert np.all(read_data == mem_buffer) + + def generate_test_data(dtype, size=SIZE, order="C"): return np.array( np.random.uniform(np.iinfo(dtype).min, np.iinfo(dtype).max, size).astype(dtype), @@ -262,6 +319,10 @@ def try_rmtree(dir): pass +def setup_function(): + np.random.seed(0) + + def teardown_function(): try_rmtree("tests/tmp") try_rmtree("tests/tmp2") diff --git a/rust/src/file.rs b/rust/src/file.rs index c5b7a33..f693996 100644 --- a/rust/src/file.rs +++ b/rust/src/file.rs @@ -148,17 +148,17 @@ impl File { Vec3::from(1u32 << block_len_log2), self.header.voxel_size as usize, self.header.voxel_type, - src_mat.data_in_c_order, + false, )?; // build second buffer - let mut raw_disk_block_buf = vec![0u8; self.header.block_size()]; - let mut raw_disk_block_buf_mat = Mat::new( - raw_disk_block_buf.as_mut_slice(), + let mut c_to_fortran_buf = vec![0u8; self.header.block_size()]; + let mut c_to_fortran_buf_mat = Mat::new( + c_to_fortran_buf.as_mut_slice(), Vec3::from(1u32 << block_len_log2), self.header.voxel_size as usize, self.header.voxel_type, - false, + true, )?; // build Morton-order iterator @@ -184,18 +184,17 @@ impl File { let cur_dst_pos = cur_box.min() - cur_block_box.min(); // fill / modify buffer - src_block_buf_mat.copy_from(cur_dst_pos, src_mat, cur_src_box)?; + src_block_buf_mat.copy_from_order_agnostic( + cur_dst_pos, + src_mat, + cur_src_box, + &mut c_to_fortran_buf_mat, + )?; self.seek_block(cur_block_idx)?; // write in fortran order - let buffer_to_write = if src_block_buf_mat.data_in_c_order { - src_block_buf_mat.copy_as_fortran_order(&mut raw_disk_block_buf_mat)?; - &raw_disk_block_buf_mat - } else { - &src_block_buf_mat - }; - self.write_block(buffer_to_write.as_slice())?; + self.write_block(src_block_buf_mat.as_slice())?; } if self.header.block_type == BlockType::LZ4 || self.header.block_type == BlockType::LZ4HC { diff --git a/rust/src/mat.rs b/rust/src/mat.rs index 1542e7b..36a2ef7 100644 --- a/rust/src/mat.rs +++ b/rust/src/mat.rs @@ -50,15 +50,18 @@ impl<'a> Mat<'a> { } fn offset(&self, pos: Vec3) -> usize { + // Early usize cast is necessary as overflows happen let offset_vx = if self.data_in_c_order { - pos.z + self.shape.z * (pos.y + self.shape.y * pos.x) + pos.z as usize + + self.shape.z as usize * (pos.y as usize + self.shape.y as usize * pos.x as usize) } else { - pos.x + self.shape.x * (pos.y + self.shape.y * pos.z) + pos.x as usize + + self.shape.x as usize * (pos.y as usize + self.shape.y as usize * pos.z as usize) }; - offset_vx as usize * self.voxel_size + offset_vx * self.voxel_size } - pub fn copy_as_fortran_order(&self, buffer: &mut Mat) -> Result<()> { + pub fn copy_as_fortran_order(&self, buffer: &mut Mat, src_bbox: Box3) -> Result<()> { if !self.data_in_c_order { return Err("Mat is already in fortran order"); } @@ -95,10 +98,12 @@ impl<'a> Mat<'a> { let src_ptr = self.data.as_ptr(); let dst_ptr = buffer_data.as_mut_ptr(); + let from = src_bbox.min(); + let to = src_bbox.max(); // Do continuous read in z. Last dim in Row-Major is continuous. - for x in 0usize..x_length { - for y in 0usize..y_length { - for z in 0usize..z_length { + for x in from.x as usize..to.x as usize { + for y in from.y as usize..to.y as usize { + for z in from.z as usize..to.z as usize { let row_major_index = linearize(x, y, z, &row_major_stride); let column_major_index = linearize(x, y, z, &column_major_stride); unsafe { @@ -112,6 +117,26 @@ impl<'a> Mat<'a> { Ok(()) } + pub fn copy_from_order_agnostic( + &mut self, + dst_pos: Vec3, + src: &Mat, + src_box: Box3, + intermediate_buffer: &mut Mat, + ) -> Result<()> { + if self.data_in_c_order { + return Err("copy_from_order_agnostic has to be called on a fortran order buffer."); + } + + if src.data_in_c_order { + intermediate_buffer.copy_from(dst_pos, src, src_box)?; + let dst_bbox = Box3::new(dst_pos, dst_pos + src_box.width())?; + intermediate_buffer.copy_as_fortran_order(self, dst_bbox) + } else { + self.copy_from(dst_pos, src, src_box) + } + } + pub fn copy_from(&mut self, dst_pos: Vec3, src: &Mat, src_box: Box3) -> Result<()> { // make sure that matrices are matching if self.voxel_size != src.voxel_size {