From 593d63badf806d7fb11d4160d9c3fba5bb1ee5f3 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Tue, 9 Jun 2026 19:46:43 +0100 Subject: [PATCH 1/8] Optimize buffer ops Signed-off-by: Adam Gutglick --- vortex-buffer/src/alignment.rs | 33 ++++++++++++++-- vortex-buffer/src/buffer.rs | 69 +++++++++++++++++++++++++++------ vortex-buffer/src/buffer_mut.rs | 6 +-- 3 files changed, 90 insertions(+), 18 deletions(-) diff --git a/vortex-buffer/src/alignment.rs b/vortex-buffer/src/alignment.rs index bcd17a64f10..c09bb82e7cb 100644 --- a/vortex-buffer/src/alignment.rs +++ b/vortex-buffer/src/alignment.rs @@ -54,6 +54,9 @@ impl Alignment { Self::new(align_of::()) } + /// The largest valid alignment: the greatest power of 2 that fits into a `u16`. + pub const MAX: Alignment = Alignment::new(1 << 15); + /// Check if `self` alignment is a "larger" than `other` alignment. /// /// ## Example @@ -67,10 +70,32 @@ impl Alignment { /// assert!(!b.is_aligned_to(a)); /// ``` #[inline] - pub fn is_aligned_to(&self, other: Alignment) -> bool { - // Since we know alignments are powers of 2, we can compare them by checking if the number - // of trailing zeros in the binary representation of the alignment is greater or equal. - self.0.trailing_zeros() >= other.0.trailing_zeros() + pub const fn is_aligned_to(&self, other: Alignment) -> bool { + // Since both alignments are powers of 2, divisibility is equivalent to ordering. + self.0 >= other.0 + } + + /// Check if the given byte offset (or length) is a multiple of this alignment. + /// + /// ## Example + /// + /// ``` + /// use vortex_buffer::Alignment; + /// + /// let a = Alignment::new(4); + /// assert!(a.is_offset_aligned(8)); + /// assert!(!a.is_offset_aligned(2)); + /// ``` + #[inline] + pub const fn is_offset_aligned(&self, offset: usize) -> bool { + // Alignment is always a power of 2, so a mask test is equivalent to `offset % self == 0`. + offset & (self.0 - 1) == 0 + } + + /// Check if the given pointer is aligned to this alignment. + #[inline] + pub fn is_ptr_aligned(&self, ptr: *const T) -> bool { + self.is_offset_aligned(ptr.addr()) } /// Returns the log2 of the alignment. diff --git a/vortex-buffer/src/buffer.rs b/vortex-buffer/src/buffer.rs index 3b778358577..c76c4112805 100644 --- a/vortex-buffer/src/buffer.rs +++ b/vortex-buffer/src/buffer.rs @@ -32,10 +32,17 @@ pub struct Buffer { pub(crate) _marker: PhantomData, } +/// Zero-length backing memory for empty buffers, aligned to [`Alignment::MAX`] so it satisfies +/// any valid alignment without allocating. +#[repr(align(32768))] +struct EmptyBacking([u8; 0]); + +static EMPTY_BACKING: EmptyBacking = EmptyBacking([]); + impl Default for Buffer { fn default() -> Self { Self { - bytes: Default::default(), + bytes: Bytes::from_static(&EMPTY_BACKING.0), length: 0, alignment: Alignment::of::(), _marker: PhantomData, @@ -101,12 +108,27 @@ impl Buffer { /// Create a new empty `ByteBuffer` with the provided alignment. pub fn empty() -> Self { - BufferMut::empty().freeze() + Self::empty_aligned(Alignment::of::()) } /// Create a new empty `ByteBuffer` with the provided alignment. + /// + /// This does not allocate: empty buffers are backed by a shared static allocation that is + /// aligned to [`Alignment::MAX`]. pub fn empty_aligned(alignment: Alignment) -> Self { - BufferMut::empty_aligned(alignment).freeze() + if !alignment.is_aligned_to(Alignment::of::()) { + vortex_panic!( + "Alignment {} must align to the scalar type's alignment {}", + alignment, + Alignment::of::(), + ); + } + Self { + bytes: Bytes::from_static(&EMPTY_BACKING.0), + length: 0, + alignment, + _marker: PhantomData, + } } /// Create a new full `ByteBuffer` with the given value. @@ -152,7 +174,7 @@ impl Buffer { Alignment::of::(), ); } - if bytes.as_ptr().align_offset(*alignment) != 0 { + if !alignment.is_ptr_aligned(bytes.as_ptr()) { vortex_panic!( "Bytes alignment must align to the requested alignment {}", alignment, @@ -320,7 +342,7 @@ impl Buffer { let begin_byte = begin * size_of::(); let end_byte = end * size_of::(); - if !begin_byte.is_multiple_of(*alignment) { + if !alignment.is_offset_aligned(begin_byte) { vortex_panic!( "range start must be aligned to {alignment:?}, byte {}", begin_byte @@ -369,7 +391,7 @@ impl Buffer { vortex_panic!("slice_ref subset alignment must at least align to the buffer alignment") } - if subset.as_ptr().align_offset(*alignment) != 0 { + if !alignment.is_ptr_aligned(subset.as_ptr()) { vortex_panic!("slice_ref subset must be aligned to {:?}", alignment); } @@ -435,17 +457,17 @@ impl Buffer { /// Convert self into `BufferMut`, cloning the data if there are multiple strong references. pub fn into_mut(self) -> BufferMut { self.try_into_mut() - .unwrap_or_else(|buffer| BufferMut::::copy_from(&buffer)) + .unwrap_or_else(|buffer| BufferMut::::copy_from_aligned(&buffer, buffer.alignment)) } /// Returns whether a `Buffer` is aligned to the given alignment. pub fn is_aligned(&self, alignment: Alignment) -> bool { - self.bytes.as_ptr().align_offset(*alignment) == 0 + alignment.is_ptr_aligned(self.bytes.as_ptr()) } /// Return a `Buffer` with the given alignment. Where possible, this will be zero-copy. pub fn aligned(mut self, alignment: Alignment) -> Self { - if self.as_ptr().align_offset(*alignment) == 0 { + if alignment.is_ptr_aligned(self.as_ptr()) { self.alignment = alignment; self } else { @@ -462,7 +484,7 @@ impl Buffer { /// Return a `Buffer` with the given alignment. Panics if the buffer is not aligned. pub fn ensure_aligned(mut self, alignment: Alignment) -> Self { - if self.as_ptr().align_offset(*alignment) == 0 { + if alignment.is_ptr_aligned(self.as_ptr()) { self.alignment = alignment; self } else { @@ -634,7 +656,7 @@ impl Buf for ByteBuffer { #[inline] fn advance(&mut self, cnt: usize) { - if !cnt.is_multiple_of(*self.alignment) { + if !self.alignment.is_offset_aligned(cnt) { vortex_panic!( "Cannot advance buffer by {} items, resulting alignment is not {}", cnt, @@ -786,6 +808,31 @@ mod test { assert_eq!(vec, buff.as_ref()); } + #[test] + fn empty_aligned_max_alignment() { + // Empty buffers are backed by a static and must satisfy any valid alignment. + let buf = Buffer::::empty_aligned(Alignment::MAX); + assert!(buf.is_empty()); + assert!(buf.is_aligned(Alignment::MAX)); + } + + #[test] + fn empty_slice_preserves_alignment() { + let buf = Buffer::::zeroed_aligned(8, Alignment::new(64)); + let sliced = buf.slice(0..0); + assert!(sliced.is_empty()); + assert_eq!(sliced.alignment(), Alignment::new(64)); + assert!(sliced.is_aligned(Alignment::new(64))); + } + + #[test] + fn empty_into_mut_preserves_alignment() { + let buf = Buffer::::empty_aligned(Alignment::new(64)); + let buf_mut = buf.into_mut(); + assert_eq!(buf_mut.alignment(), Alignment::new(64)); + assert!(buf_mut.is_empty()); + } + #[test] fn test_slice_unaligned_end_pos() { let data = vec![0u8; 2]; diff --git a/vortex-buffer/src/buffer_mut.rs b/vortex-buffer/src/buffer_mut.rs index ceb8732d701..6b3885e18af 100644 --- a/vortex-buffer/src/buffer_mut.rs +++ b/vortex-buffer/src/buffer_mut.rs @@ -358,7 +358,7 @@ impl BufferMut { } let bytes_at = at * size_of::(); - if !bytes_at.is_multiple_of(*self.alignment) { + if !self.alignment.is_offset_aligned(bytes_at) { vortex_panic!( "Cannot split buffer at {}, resulting alignment is not {}", at, @@ -742,7 +742,7 @@ impl Buf for ByteBufferMut { } fn advance(&mut self, cnt: usize) { - if !cnt.is_multiple_of(*self.alignment) { + if !self.alignment.is_offset_aligned(cnt) { vortex_panic!( "Cannot advance buffer by {} items, resulting alignment is not {}", cnt, @@ -765,7 +765,7 @@ unsafe impl BufMut for ByteBufferMut { #[inline] unsafe fn advance_mut(&mut self, cnt: usize) { - if !cnt.is_multiple_of(*self.alignment) { + if !self.alignment.is_offset_aligned(cnt) { vortex_panic!( "Cannot advance buffer by {} items, resulting alignment is not {}", cnt, From a6f4eda0100b1cd2092044bf3b5c01c3370478dd Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Tue, 9 Jun 2026 20:43:23 +0100 Subject: [PATCH 2/8] bit-wise ops Signed-off-by: Adam Gutglick --- Cargo.lock | 1 - vortex-buffer/Cargo.toml | 1 - vortex-buffer/src/bit/buf.rs | 23 +++++++++-- vortex-buffer/src/bit/buf_mut.rs | 65 +++++++++++++++++++++++++------- vortex-buffer/src/string.rs | 2 +- 5 files changed, 72 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fd66ecc5670..333f7bff74f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9450,7 +9450,6 @@ name = "vortex-buffer" version = "0.1.0" dependencies = [ "arrow-buffer", - "bitvec", "bytes", "codspeed-divan-compat", "itertools 0.14.0", diff --git a/vortex-buffer/Cargo.toml b/vortex-buffer/Cargo.toml index ae9d7e6cc05..8f797e0c8bc 100644 --- a/vortex-buffer/Cargo.toml +++ b/vortex-buffer/Cargo.toml @@ -24,7 +24,6 @@ warn-copy = ["dep:tracing"] [dependencies] arrow-buffer = { workspace = true } -bitvec = { workspace = true } bytes = { workspace = true } itertools = { workspace = true } memmap2 = { workspace = true, optional = true } diff --git a/vortex-buffer/src/bit/buf.rs b/vortex-buffer/src/bit/buf.rs index 457827f4ab9..61b69b3f372 100644 --- a/vortex-buffer/src/bit/buf.rs +++ b/vortex-buffer/src/bit/buf.rs @@ -228,10 +228,11 @@ impl BitBuffer { unsafe { buffer.push_unchecked(packed) } } - buffer.truncate(len.div_ceil(8)); + let mut bytes = buffer.into_byte_buffer(); + bytes.truncate(len.div_ceil(8)); Self { - buffer: buffer.freeze().into_byte_buffer(), + buffer: bytes.freeze(), offset: 0, len, } @@ -312,7 +313,23 @@ impl BitBuffer { assert!(end <= self.len); let len = end - start; - Self::new_with_offset(self.buffer.clone(), len, self.offset + start) + let offset = self.offset + start; + let byte_offset = offset / 8; + let bit_offset = offset % 8; + + // Trim whole bytes off the front directly rather than going through `new_with_offset`, + // which would slice (and re-clone) the clone we'd have to pass it. + let buffer = if byte_offset != 0 { + self.buffer.slice_unaligned(byte_offset..) + } else { + self.buffer.clone().aligned(Alignment::none()) + }; + + Self { + buffer, + offset: bit_offset, + len, + } } /// Slice any full bytes from the buffer, leaving the offset < 8. diff --git a/vortex-buffer/src/bit/buf_mut.rs b/vortex-buffer/src/bit/buf_mut.rs index 02e4ce66755..6cf2075d606 100644 --- a/vortex-buffer/src/bit/buf_mut.rs +++ b/vortex-buffer/src/bit/buf_mut.rs @@ -3,7 +3,7 @@ use std::ops::Not; -use bitvec::view::BitView; +use arrow_buffer::bit_mask::set_bits; use crate::BitBuffer; use crate::BufferMut; @@ -267,7 +267,9 @@ impl BitBufferMut { /// Clears the bit buffer (but keeps any allocated memory). pub fn clear(&mut self) { - // Since there are no items we need to drop, we simply set the length to 0. + // Also clear the byte buffer (not just `len`) so the "bits beyond len are zero" + // invariant holds; `append_false` and `append_buffer` rely on it. + self.buffer.clear(); self.len = 0; self.offset = 0; } @@ -518,17 +520,16 @@ impl BitBufferMut { self.buffer.as_mut_slice()[dst_byte + full_bytes] |= src_bytes[full_bytes] & mask; } } else { - // Use bitvec for unaligned bit copying. - let self_slice = self - .buffer - .as_mut_slice() - .view_bits_mut::(); - let other_slice = buffer - .inner() - .as_slice() - .view_bits::(); - let source_range = src_bit_offset..src_bit_offset + bit_len; - self_slice[start_bit_pos..end_bit_pos].copy_from_bitslice(&other_slice[source_range]); + // Word-wise bit copy that handles mismatched source/destination bit offsets. + // `set_bits` ORs into the destination, which is safe because bits beyond `len` + // are guaranteed to be zero. + set_bits( + self.buffer.as_mut_slice(), + buffer.inner().as_slice(), + start_bit_pos, + src_bit_offset, + bit_len, + ); } self.len += bit_len; @@ -923,7 +924,43 @@ mod tests { assert!(frozen.value(7)); } - #[cfg_attr(miri, ignore)] // bitvec crate uses a ptr cast that Miri doesn't support + #[test] + fn test_append_buffer_after_truncate() { + // Truncating leaves stale set bits in the last partial byte; an append after that + // must overwrite them rather than OR into them. + let mut buf = BitBufferMut::new_set(16); + buf.truncate(3); + buf.append_buffer(&crate::BitBuffer::new_unset(8)); + + let frozen = buf.freeze(); + assert_eq!(frozen.len(), 11); + for i in 0..3 { + assert!(frozen.value(i), "bit {i} should be set"); + } + for i in 3..11 { + assert!(!frozen.value(i), "bit {i} should be unset"); + } + } + + #[test] + fn test_append_buffer_misaligned_long() { + // Force mismatched source/destination bit offsets across many words. + let source = crate::BitBuffer::from_iter((0..301).map(|i| i % 3 == 0)); + let source = source.slice(5..301); + + let mut dest = BitBufferMut::with_capacity(512); + dest.append_n(true, 3); + dest.append_buffer(&source); + + assert_eq!(dest.len(), 3 + source.len()); + for i in 0..3 { + assert!(dest.value(i), "prefix bit {i}"); + } + for i in 0..source.len() { + assert_eq!(dest.value(3 + i), source.value(i), "bit {i}"); + } + } + #[test] fn test_append_buffer_with_offsets() { // Create source buffer with offset diff --git a/vortex-buffer/src/string.rs b/vortex-buffer/src/string.rs index bcaf8d7c3d2..ca5ae5251d5 100644 --- a/vortex-buffer/src/string.rs +++ b/vortex-buffer/src/string.rs @@ -25,7 +25,7 @@ impl BufferString { /// Creates an empty `BufferString`. pub fn empty() -> Self { - Self(ByteBuffer::from(vec![])) + Self(ByteBuffer::empty()) } /// Return a view of the contents of BufferString as an immutable `&str`. From da4b1c4f8954e9e1abbdb776e423fb91e647dede Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Wed, 10 Jun 2026 17:16:53 +0100 Subject: [PATCH 3/8] some fixes Signed-off-by: Adam Gutglick --- vortex-buffer/src/bit/buf_mut.rs | 37 +++++++++++++++++++++++++------- vortex-buffer/src/buffer.rs | 21 ++++++++++-------- 2 files changed, 41 insertions(+), 17 deletions(-) diff --git a/vortex-buffer/src/bit/buf_mut.rs b/vortex-buffer/src/bit/buf_mut.rs index 6cf2075d606..e5b94b70660 100644 --- a/vortex-buffer/src/bit/buf_mut.rs +++ b/vortex-buffer/src/bit/buf_mut.rs @@ -650,6 +650,8 @@ impl FromIterator for BitBufferMut { #[cfg(test)] mod tests { + use rstest::rstest; + use crate::BufferMut; use crate::bit::buf_mut::BitBufferMut; use crate::bitbuffer; @@ -924,6 +926,20 @@ mod tests { assert!(frozen.value(7)); } + #[test] + fn append_after_clear_reads_back_false() { + // `clear` must not leave stale set bits behind: `append_false` and `append_buffer` + // rely on bits beyond `len` being zero. + let mut bools = BitBufferMut::new_set(16); + bools.clear(); + bools.append_false(); + bools.append_buffer(&crate::BitBuffer::new_unset(8)); + + let bools = bools.freeze(); + assert_eq!(bools.len(), 9); + assert_eq!(bools.true_count(), 0); + } + #[test] fn test_append_buffer_after_truncate() { // Truncating leaves stale set bits in the last partial byte; an append after that @@ -942,22 +958,27 @@ mod tests { } } - #[test] - fn test_append_buffer_misaligned_long() { - // Force mismatched source/destination bit offsets across many words. + #[rstest] + #[case::both_aligned(0, 0)] + #[case::dst_unaligned(3, 0)] + #[case::src_unaligned(0, 5)] + #[case::mismatched(3, 5)] + #[case::equal_nonzero(5, 5)] + fn test_append_buffer_long(#[case] dst_prefix: usize, #[case] src_start: usize) { + // Exercise every alignment combination across many words. let source = crate::BitBuffer::from_iter((0..301).map(|i| i % 3 == 0)); - let source = source.slice(5..301); + let source = source.slice(src_start..301); let mut dest = BitBufferMut::with_capacity(512); - dest.append_n(true, 3); + dest.append_n(true, dst_prefix); dest.append_buffer(&source); - assert_eq!(dest.len(), 3 + source.len()); - for i in 0..3 { + assert_eq!(dest.len(), dst_prefix + source.len()); + for i in 0..dst_prefix { assert!(dest.value(i), "prefix bit {i}"); } for i in 0..source.len() { - assert_eq!(dest.value(3 + i), source.value(i), "bit {i}"); + assert_eq!(dest.value(dst_prefix + i), source.value(i), "bit {i}"); } } diff --git a/vortex-buffer/src/buffer.rs b/vortex-buffer/src/buffer.rs index c76c4112805..e439f8841e6 100644 --- a/vortex-buffer/src/buffer.rs +++ b/vortex-buffer/src/buffer.rs @@ -32,17 +32,20 @@ pub struct Buffer { pub(crate) _marker: PhantomData, } -/// Zero-length backing memory for empty buffers, aligned to [`Alignment::MAX`] so it satisfies -/// any valid alignment without allocating. -#[repr(align(32768))] -struct EmptyBacking([u8; 0]); - -static EMPTY_BACKING: EmptyBacking = EmptyBacking([]); +/// Zero-length backing for empty buffers, "aligned" to [`Alignment::MAX`] so it satisfies any +/// valid alignment without allocating. A zero-length slice never reads memory, so it may use a +/// dangling pointer as long as it is non-null and aligned. +const EMPTY_BACKING: &[u8] = { + let addr = 1usize << 15; + assert!(Alignment::MAX.is_offset_aligned(addr)); + // SAFETY: the pointer is non-null and aligned, and the slice is zero-length. + unsafe { std::slice::from_raw_parts(std::ptr::without_provenance(addr), 0) } +}; impl Default for Buffer { fn default() -> Self { Self { - bytes: Bytes::from_static(&EMPTY_BACKING.0), + bytes: Bytes::from_static(EMPTY_BACKING), length: 0, alignment: Alignment::of::(), _marker: PhantomData, @@ -113,7 +116,7 @@ impl Buffer { /// Create a new empty `ByteBuffer` with the provided alignment. /// - /// This does not allocate: empty buffers are backed by a shared static allocation that is + /// This does not allocate: empty buffers are backed by a zero-length `Bytes` that is /// aligned to [`Alignment::MAX`]. pub fn empty_aligned(alignment: Alignment) -> Self { if !alignment.is_aligned_to(Alignment::of::()) { @@ -124,7 +127,7 @@ impl Buffer { ); } Self { - bytes: Bytes::from_static(&EMPTY_BACKING.0), + bytes: Bytes::from_static(EMPTY_BACKING), length: 0, alignment, _marker: PhantomData, From 063d7a3832ef854d6f346fe28b0dd39d1132d626 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 11 Jun 2026 11:49:05 +0100 Subject: [PATCH 4/8] Try fake ptr outsie of window's null ptr safe region Signed-off-by: Adam Gutglick --- vortex-buffer/src/buffer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-buffer/src/buffer.rs b/vortex-buffer/src/buffer.rs index e439f8841e6..377d56003d5 100644 --- a/vortex-buffer/src/buffer.rs +++ b/vortex-buffer/src/buffer.rs @@ -36,7 +36,7 @@ pub struct Buffer { /// valid alignment without allocating. A zero-length slice never reads memory, so it may use a /// dangling pointer as long as it is non-null and aligned. const EMPTY_BACKING: &[u8] = { - let addr = 1usize << 15; + let addr = 1usize << 20; assert!(Alignment::MAX.is_offset_aligned(addr)); // SAFETY: the pointer is non-null and aligned, and the slice is zero-length. unsafe { std::slice::from_raw_parts(std::ptr::without_provenance(addr), 0) } From ef2eebb07051ef1a484bc24c30caf100d0b86921 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 11 Jun 2026 11:51:02 +0100 Subject: [PATCH 5/8] Test empty buffer eq Signed-off-by: Adam Gutglick --- vortex-buffer/src/buffer.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/vortex-buffer/src/buffer.rs b/vortex-buffer/src/buffer.rs index 377d56003d5..08640526c22 100644 --- a/vortex-buffer/src/buffer.rs +++ b/vortex-buffer/src/buffer.rs @@ -847,4 +847,12 @@ mod test { // to be aligned. aligned_buffer.slice(0..1); } + + #[test] + fn test_empty_equality() { + let a = Buffer::::empty(); + let b = Buffer::::empty(); + + assert_eq!(a, b); + } } From a7bdc0060d9427d0d64d72c80342a214329d7e53 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 11 Jun 2026 13:28:23 +0100 Subject: [PATCH 6/8] debug windows Signed-off-by: Adam Gutglick --- .github/workflows/ci.yml | 9 +- encodings/parquet-variant/src/vtable.rs | 127 +++++++++++++++++++++--- 2 files changed, 119 insertions(+), 17 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bf9c0a785b6..fe6ead1a852 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -329,15 +329,20 @@ jobs: - name: Rust Tests (Windows) if: matrix.os == 'windows-x64' run: | + # TEMP: full Windows workspace serial run while debugging PR #8322 hangs. + # Targeted packages and both package-split runs pass quickly, so test whether the + # original full workspace only hangs under nextest parallelism. cargo nextest run --cargo-profile ci --locked --workspace --all-features --no-fail-fast ` - --exclude vortex-bench ` + --test-threads 1 ` + --exclude vortex-bench --exclude vortex-bench-server ` --exclude vortex-python --exclude vortex-duckdb ` --exclude vortex-fuzz --exclude vortex-cuda --exclude vortex-cuda-ffi ` --exclude vortex-nvcomp --exclude vortex-cub --exclude vortex-test-e2e-cuda ` --exclude duckdb-bench ` --exclude lance-bench --exclude datafusion-bench --exclude random-access-bench ` --exclude compress-bench --exclude xtask --exclude vortex-datafusion ` - --exclude gpu-scan-cli --exclude vortex-sqllogictest + --exclude gpu-scan-cli --exclude vortex-sqllogictest ` + --status-level all --final-status-level all - name: Rust Tests (Other) if: matrix.os != 'windows-x64' run: | diff --git a/encodings/parquet-variant/src/vtable.rs b/encodings/parquet-variant/src/vtable.rs index f2ad4f64743..9f4a0e53bb1 100644 --- a/encodings/parquet-variant/src/vtable.rs +++ b/encodings/parquet-variant/src/vtable.rs @@ -344,6 +344,16 @@ mod tests { use crate::ParquetVariant; use crate::array::ParquetVariantArrayExt; + fn debug_step(test: &str, step: &str) { + use std::io::Write as _; + + println!( + "[parquet_variant::{test}] {step} thread={:?}", + std::thread::current().id() + ); + drop(std::io::stdout().flush()); + } + fn roundtrip(array: ArrayRef) -> VortexResult { let dtype = array.dtype().clone(); let len = array.len(); @@ -366,6 +376,7 @@ mod tests { #[fixture] fn typed_value_variant_array() -> VortexResult { + debug_step("typed_value_variant_array", "building fixture"); let mut metadata = BinaryViewBuilder::new(); for _ in 0..3 { metadata.append_value(b"\x01\x00"); @@ -382,27 +393,35 @@ mod tests { None, )?; - ParquetVariant::from_arrow_variant(&ArrowVariantArray::try_new(&arrow_storage)?) + let array = + ParquetVariant::from_arrow_variant(&ArrowVariantArray::try_new(&arrow_storage)?); + debug_step("typed_value_variant_array", "built fixture"); + array } #[fixture] fn parquet_variant_file_session() -> VortexSession { + debug_step("parquet_variant_file_session", "building session"); let session = VortexSession::empty() .with::() .with::() .with::(); vortex_file::register_default_encodings(&session); session.arrays().register(ParquetVariant); + debug_step("parquet_variant_file_session", "built session"); session } #[fixture] fn write_strategy() -> Arc { + debug_step("write_strategy", "building zoned write strategy"); let mut allowed = vortex_file::ALLOWED_ENCODINGS.clone(); allowed.insert(ParquetVariant.id()); - vortex_file::WriteStrategyBuilder::default() + let strategy = vortex_file::WriteStrategyBuilder::default() .with_allow_encodings(allowed) - .build() + .build(); + debug_step("write_strategy", "built zoned write strategy"); + strategy } #[test] @@ -451,24 +470,63 @@ mod tests { #[from(typed_value_variant_array)] expected: VortexResult, parquet_variant_file_session: VortexSession, ) -> VortexResult<()> { + debug_step( + "test_file_roundtrip_typed_value_variant_with_statistics", + "start", + ); let expected = expected?; + debug_step( + "test_file_roundtrip_typed_value_variant_with_statistics", + "resolved expected array", + ); let mut bytes = ByteBufferMut::empty(); + debug_step( + "test_file_roundtrip_typed_value_variant_with_statistics", + "starting flat write", + ); parquet_variant_file_session .write_options() .with_strategy(Arc::new(FlatLayoutStrategy::default())) .write(&mut bytes, expected.to_array_stream()) .await?; + debug_step( + "test_file_roundtrip_typed_value_variant_with_statistics", + &format!("finished flat write bytes_len={}", bytes.len()), + ); - let actual = parquet_variant_file_session + debug_step( + "test_file_roundtrip_typed_value_variant_with_statistics", + "opening buffer", + ); + let opened = parquet_variant_file_session .open_options() - .open_buffer(bytes)? - .scan()? - .into_array_stream()? - .read_all() - .await?; + .open_buffer(bytes)?; + debug_step( + "test_file_roundtrip_typed_value_variant_with_statistics", + "opened buffer", + ); + let scan = opened.scan()?; + debug_step( + "test_file_roundtrip_typed_value_variant_with_statistics", + "created scan", + ); + let stream = scan.into_array_stream()?; + debug_step( + "test_file_roundtrip_typed_value_variant_with_statistics", + "created array stream", + ); + let actual = stream.read_all().await?; + debug_step( + "test_file_roundtrip_typed_value_variant_with_statistics", + "read all arrays", + ); assert_arrays_eq!(expected, actual); + debug_step( + "test_file_roundtrip_typed_value_variant_with_statistics", + "done", + ); Ok(()) } @@ -479,24 +537,63 @@ mod tests { parquet_variant_file_session: VortexSession, write_strategy: Arc, ) -> VortexResult<()> { + debug_step( + "test_file_roundtrip_typed_value_variant_with_zoned_strategy", + "start", + ); let expected = expected?; + debug_step( + "test_file_roundtrip_typed_value_variant_with_zoned_strategy", + "resolved expected array", + ); let mut bytes = ByteBufferMut::empty(); + debug_step( + "test_file_roundtrip_typed_value_variant_with_zoned_strategy", + "starting zoned write", + ); parquet_variant_file_session .write_options() .with_strategy(write_strategy) .write(&mut bytes, expected.to_array_stream()) .await?; + debug_step( + "test_file_roundtrip_typed_value_variant_with_zoned_strategy", + &format!("finished zoned write bytes_len={}", bytes.len()), + ); - let actual = parquet_variant_file_session + debug_step( + "test_file_roundtrip_typed_value_variant_with_zoned_strategy", + "opening buffer", + ); + let opened = parquet_variant_file_session .open_options() - .open_buffer(bytes)? - .scan()? - .into_array_stream()? - .read_all() - .await?; + .open_buffer(bytes)?; + debug_step( + "test_file_roundtrip_typed_value_variant_with_zoned_strategy", + "opened buffer", + ); + let scan = opened.scan()?; + debug_step( + "test_file_roundtrip_typed_value_variant_with_zoned_strategy", + "created scan", + ); + let stream = scan.into_array_stream()?; + debug_step( + "test_file_roundtrip_typed_value_variant_with_zoned_strategy", + "created array stream", + ); + let actual = stream.read_all().await?; + debug_step( + "test_file_roundtrip_typed_value_variant_with_zoned_strategy", + "read all arrays", + ); assert_arrays_eq!(expected, actual); + debug_step( + "test_file_roundtrip_typed_value_variant_with_zoned_strategy", + "done", + ); Ok(()) } From ad6949ba651360904b43267892b61c2c8ad6c55c Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Fri, 12 Jun 2026 13:58:08 +0100 Subject: [PATCH 7/8] restore ci Signed-off-by: Adam Gutglick --- .github/workflows/ci.yml | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fe6ead1a852..bf9c0a785b6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -329,20 +329,15 @@ jobs: - name: Rust Tests (Windows) if: matrix.os == 'windows-x64' run: | - # TEMP: full Windows workspace serial run while debugging PR #8322 hangs. - # Targeted packages and both package-split runs pass quickly, so test whether the - # original full workspace only hangs under nextest parallelism. cargo nextest run --cargo-profile ci --locked --workspace --all-features --no-fail-fast ` - --test-threads 1 ` - --exclude vortex-bench --exclude vortex-bench-server ` + --exclude vortex-bench ` --exclude vortex-python --exclude vortex-duckdb ` --exclude vortex-fuzz --exclude vortex-cuda --exclude vortex-cuda-ffi ` --exclude vortex-nvcomp --exclude vortex-cub --exclude vortex-test-e2e-cuda ` --exclude duckdb-bench ` --exclude lance-bench --exclude datafusion-bench --exclude random-access-bench ` --exclude compress-bench --exclude xtask --exclude vortex-datafusion ` - --exclude gpu-scan-cli --exclude vortex-sqllogictest ` - --status-level all --final-status-level all + --exclude gpu-scan-cli --exclude vortex-sqllogictest - name: Rust Tests (Other) if: matrix.os != 'windows-x64' run: | From 11bb86c75bc0343a2f16e8c2123d2ac6a2a7cbbd Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Fri, 12 Jun 2026 14:57:03 +0100 Subject: [PATCH 8/8] clean up Signed-off-by: Adam Gutglick --- encodings/parquet-variant/src/vtable.rs | 127 +++--------------------- 1 file changed, 15 insertions(+), 112 deletions(-) diff --git a/encodings/parquet-variant/src/vtable.rs b/encodings/parquet-variant/src/vtable.rs index 9f4a0e53bb1..f2ad4f64743 100644 --- a/encodings/parquet-variant/src/vtable.rs +++ b/encodings/parquet-variant/src/vtable.rs @@ -344,16 +344,6 @@ mod tests { use crate::ParquetVariant; use crate::array::ParquetVariantArrayExt; - fn debug_step(test: &str, step: &str) { - use std::io::Write as _; - - println!( - "[parquet_variant::{test}] {step} thread={:?}", - std::thread::current().id() - ); - drop(std::io::stdout().flush()); - } - fn roundtrip(array: ArrayRef) -> VortexResult { let dtype = array.dtype().clone(); let len = array.len(); @@ -376,7 +366,6 @@ mod tests { #[fixture] fn typed_value_variant_array() -> VortexResult { - debug_step("typed_value_variant_array", "building fixture"); let mut metadata = BinaryViewBuilder::new(); for _ in 0..3 { metadata.append_value(b"\x01\x00"); @@ -393,35 +382,27 @@ mod tests { None, )?; - let array = - ParquetVariant::from_arrow_variant(&ArrowVariantArray::try_new(&arrow_storage)?); - debug_step("typed_value_variant_array", "built fixture"); - array + ParquetVariant::from_arrow_variant(&ArrowVariantArray::try_new(&arrow_storage)?) } #[fixture] fn parquet_variant_file_session() -> VortexSession { - debug_step("parquet_variant_file_session", "building session"); let session = VortexSession::empty() .with::() .with::() .with::(); vortex_file::register_default_encodings(&session); session.arrays().register(ParquetVariant); - debug_step("parquet_variant_file_session", "built session"); session } #[fixture] fn write_strategy() -> Arc { - debug_step("write_strategy", "building zoned write strategy"); let mut allowed = vortex_file::ALLOWED_ENCODINGS.clone(); allowed.insert(ParquetVariant.id()); - let strategy = vortex_file::WriteStrategyBuilder::default() + vortex_file::WriteStrategyBuilder::default() .with_allow_encodings(allowed) - .build(); - debug_step("write_strategy", "built zoned write strategy"); - strategy + .build() } #[test] @@ -470,63 +451,24 @@ mod tests { #[from(typed_value_variant_array)] expected: VortexResult, parquet_variant_file_session: VortexSession, ) -> VortexResult<()> { - debug_step( - "test_file_roundtrip_typed_value_variant_with_statistics", - "start", - ); let expected = expected?; - debug_step( - "test_file_roundtrip_typed_value_variant_with_statistics", - "resolved expected array", - ); let mut bytes = ByteBufferMut::empty(); - debug_step( - "test_file_roundtrip_typed_value_variant_with_statistics", - "starting flat write", - ); parquet_variant_file_session .write_options() .with_strategy(Arc::new(FlatLayoutStrategy::default())) .write(&mut bytes, expected.to_array_stream()) .await?; - debug_step( - "test_file_roundtrip_typed_value_variant_with_statistics", - &format!("finished flat write bytes_len={}", bytes.len()), - ); - debug_step( - "test_file_roundtrip_typed_value_variant_with_statistics", - "opening buffer", - ); - let opened = parquet_variant_file_session + let actual = parquet_variant_file_session .open_options() - .open_buffer(bytes)?; - debug_step( - "test_file_roundtrip_typed_value_variant_with_statistics", - "opened buffer", - ); - let scan = opened.scan()?; - debug_step( - "test_file_roundtrip_typed_value_variant_with_statistics", - "created scan", - ); - let stream = scan.into_array_stream()?; - debug_step( - "test_file_roundtrip_typed_value_variant_with_statistics", - "created array stream", - ); - let actual = stream.read_all().await?; - debug_step( - "test_file_roundtrip_typed_value_variant_with_statistics", - "read all arrays", - ); + .open_buffer(bytes)? + .scan()? + .into_array_stream()? + .read_all() + .await?; assert_arrays_eq!(expected, actual); - debug_step( - "test_file_roundtrip_typed_value_variant_with_statistics", - "done", - ); Ok(()) } @@ -537,63 +479,24 @@ mod tests { parquet_variant_file_session: VortexSession, write_strategy: Arc, ) -> VortexResult<()> { - debug_step( - "test_file_roundtrip_typed_value_variant_with_zoned_strategy", - "start", - ); let expected = expected?; - debug_step( - "test_file_roundtrip_typed_value_variant_with_zoned_strategy", - "resolved expected array", - ); let mut bytes = ByteBufferMut::empty(); - debug_step( - "test_file_roundtrip_typed_value_variant_with_zoned_strategy", - "starting zoned write", - ); parquet_variant_file_session .write_options() .with_strategy(write_strategy) .write(&mut bytes, expected.to_array_stream()) .await?; - debug_step( - "test_file_roundtrip_typed_value_variant_with_zoned_strategy", - &format!("finished zoned write bytes_len={}", bytes.len()), - ); - debug_step( - "test_file_roundtrip_typed_value_variant_with_zoned_strategy", - "opening buffer", - ); - let opened = parquet_variant_file_session + let actual = parquet_variant_file_session .open_options() - .open_buffer(bytes)?; - debug_step( - "test_file_roundtrip_typed_value_variant_with_zoned_strategy", - "opened buffer", - ); - let scan = opened.scan()?; - debug_step( - "test_file_roundtrip_typed_value_variant_with_zoned_strategy", - "created scan", - ); - let stream = scan.into_array_stream()?; - debug_step( - "test_file_roundtrip_typed_value_variant_with_zoned_strategy", - "created array stream", - ); - let actual = stream.read_all().await?; - debug_step( - "test_file_roundtrip_typed_value_variant_with_zoned_strategy", - "read all arrays", - ); + .open_buffer(bytes)? + .scan()? + .into_array_stream()? + .read_all() + .await?; assert_arrays_eq!(expected, actual); - debug_step( - "test_file_roundtrip_typed_value_variant_with_zoned_strategy", - "done", - ); Ok(()) }