Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions encodings/experimental/onpair/src/compute/byte_length.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_array::IntoArray;
use vortex_array::ValidityVTable;
use vortex_array::arrays::ConstantArray;
use vortex_array::builtins::ArrayBuiltins;
use vortex_array::dtype::DType;
use vortex_array::dtype::PType;
use vortex_array::scalar::Scalar;
use vortex_array::scalar_fn::fns::byte_length::ByteLengthKernel;
use vortex_array::validity::Validity;

use crate::OnPair;
use crate::OnPairArraySlotsExt;

impl ByteLengthKernel for OnPair {
fn byte_length(
array: vortex_array::ArrayView<'_, Self>,
_ctx: &mut vortex_array::ExecutionCtx,
) -> vortex_error::VortexResult<Option<vortex_array::ArrayRef>> {
let nullable = array.dtype().nullability();
let dtype = DType::Primitive(PType::U64, nullable);
// Uncompressed lengths are non-nullable and may be less than u64 each
let lengths = array.uncompressed_lengths().cast(dtype.clone())?;
Ok(Some(match OnPair::validity(array)? {
Validity::NonNullable | Validity::AllValid => lengths,
Validity::Array(v) => lengths.mask(v)?,
Validity::AllInvalid => {
ConstantArray::new(Scalar::null(dtype), lengths.len()).into_array()
}
}))
}
}
143 changes: 143 additions & 0 deletions encodings/experimental/onpair/src/compute/compare.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_array::ArrayRef;
use vortex_array::ArrayView;
use vortex_array::ExecutionCtx;
use vortex_array::IntoArray;
use vortex_array::arrays::BoolArray;
use vortex_array::arrays::ConstantArray;
use vortex_array::builtins::ArrayBuiltins;
use vortex_array::dtype::DType;
use vortex_array::scalar::Scalar;
use vortex_array::scalar_fn::fns::binary::CompareKernel;
use vortex_array::scalar_fn::fns::operators::CompareOperator;
use vortex_buffer::BitBuffer;
use vortex_error::VortexResult;

use crate::OnPair;
use crate::OnPairArraySlotsExt;

impl CompareKernel for OnPair {
fn compare(
lhs: ArrayView<'_, Self>,
rhs: &ArrayRef,
operator: CompareOperator,
ctx: &mut ExecutionCtx,
) -> VortexResult<Option<ArrayRef>> {
let Some(constant) = rhs.as_constant() else {
return Ok(None);
};
let is_empty = match constant.dtype() {
DType::Utf8(_) => constant.as_utf8().is_empty(),
DType::Binary(_) => constant.as_binary().is_empty(),
_ => return Ok(None),
};
if is_empty != Some(true) {
return Ok(None);
}

let lengths = lhs.uncompressed_lengths();
let buffer = match operator {
// every value is greater than an empty string
CompareOperator::Gte => BitBuffer::new_set(lhs.len()),

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

todo once we can have a constant with validity

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn’t Mask scalar function that?

// no value is less than an empty string
CompareOperator::Lt => BitBuffer::new_unset(lhs.len()),
_ => lengths
.binary(
ConstantArray::new(Scalar::zero_value(lengths.dtype()), lengths.len())
.into_array(),
operator.into(),
)?
.execute(ctx)?,
};
Ok(Some(
Comment on lines +21 to +54

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lets make a trait over the two string arrays we can use to impl this?

@myrrc myrrc Jun 12, 2026

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I started with a trait but it's a worse approach, because the only thing we can re-implement is comparison with an empty constant and then the diff is twice as large
I don't think over-generalisation is a good thing here.

BoolArray::new(
buffer,
lhs.validity()?
.union_nullability(constant.dtype().nullability()),
)
.into_array(),
))
}
}

#[cfg(test)]
mod tests {
use std::sync::LazyLock;

use rstest::rstest;
use vortex_array::IntoArray;
use vortex_array::VortexSessionExecute;
use vortex_array::arrays::BoolArray;
use vortex_array::arrays::ConstantArray;
use vortex_array::arrays::VarBinArray;
use vortex_array::assert_arrays_eq;
use vortex_array::builtins::ArrayBuiltins;
use vortex_array::dtype::DType;
use vortex_array::dtype::Nullability;
use vortex_array::scalar::Scalar;
use vortex_array::scalar_fn::fns::operators::Operator;
use vortex_array::session::ArraySession;
use vortex_error::VortexResult;
use vortex_session::VortexSession;

use crate::compress::DEFAULT_DICT12_CONFIG;
use crate::compress::onpair_compress;

static SESSION: LazyLock<VortexSession> =
LazyLock::new(|| VortexSession::empty().with::<ArraySession>());

#[cfg_attr(miri, ignore)]
#[rstest]
#[case(Operator::Eq, [true, false, true, false])]
#[case(Operator::NotEq, [false, true, false, true])]
#[case(Operator::Gt, [false, true, false, true])]
#[case(Operator::Gte, [true, true, true, true])]
#[case(Operator::Lt, [false, false, false, false])]
#[case(Operator::Lte, [true, false, true, false])]
fn compare_empty_string(#[case] op: Operator, #[case] expected: [bool; 4]) -> VortexResult<()> {
let input = VarBinArray::from_iter(
[Some(""), Some("a"), Some(""), Some("bbb")],
DType::Utf8(Nullability::NonNullable),
);
let arr = onpair_compress(&input, input.len(), input.dtype(), DEFAULT_DICT12_CONFIG)?
.into_array();

let mut ctx = SESSION.create_execution_ctx();
let result = arr
.binary(ConstantArray::new("", input.len()).into_array(), op)?
.execute::<BoolArray>(&mut ctx)?;
assert_arrays_eq!(&result, &BoolArray::from_iter(expected));
Ok(())
}

#[cfg_attr(miri, ignore)]
#[test]
fn compare_empty_string_nullable() -> VortexResult<()> {
let input = VarBinArray::from_iter(
[Some(""), None, Some("x")],
DType::Utf8(Nullability::Nullable),
);
let arr = onpair_compress(&input, input.len(), input.dtype(), DEFAULT_DICT12_CONFIG)?
.into_array();
let mut ctx = SESSION.create_execution_ctx();

let eq_empty = arr
.clone()
.binary(ConstantArray::new("", arr.len()).into_array(), Operator::Eq)?
.execute::<BoolArray>(&mut ctx)?;
assert_arrays_eq!(
&eq_empty,
&BoolArray::from_iter([Some(true), None, Some(false)])
);

let null_rhs =
ConstantArray::new(Scalar::null(DType::Utf8(Nullability::Nullable)), arr.len());
let eq_null = arr
.binary(null_rhs.into_array(), Operator::Eq)?
.execute::<BoolArray>(&mut ctx)?;
assert_arrays_eq!(&eq_null, &BoolArray::from_iter([None::<bool>, None, None]));
Ok(())
}
}
2 changes: 2 additions & 0 deletions encodings/experimental/onpair/src/compute/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

mod byte_length;
mod cast;
mod compare;
mod filter;
mod slice;
9 changes: 7 additions & 2 deletions encodings/experimental/onpair/src/kernel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,14 @@

use vortex_array::arrays::filter::FilterExecuteAdaptor;
use vortex_array::kernel::ParentKernelSet;
use vortex_array::scalar_fn::fns::binary::CompareExecuteAdaptor;
use vortex_array::scalar_fn::fns::byte_length::ByteLengthExecuteAdaptor;

use crate::OnPair;

// TODO: implement ListExecute & TakeExecute for OnPair
pub(super) const PARENT_KERNELS: ParentKernelSet<OnPair> =
ParentKernelSet::new(&[ParentKernelSet::lift(&FilterExecuteAdaptor(OnPair))]);
pub(super) const PARENT_KERNELS: ParentKernelSet<OnPair> = ParentKernelSet::new(&[
ParentKernelSet::lift(&FilterExecuteAdaptor(OnPair)),
ParentKernelSet::lift(&CompareExecuteAdaptor(OnPair)),
ParentKernelSet::lift(&ByteLengthExecuteAdaptor(OnPair)),
]);
9 changes: 9 additions & 0 deletions vortex-array/src/scalar_fn/fns/byte_length.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use crate::arrays::varbinview::VarBinViewArrayExt;
use crate::dtype::DType;
use crate::dtype::Nullability;
use crate::dtype::PType;
use crate::expr::Expression;
use crate::kernel::ExecuteParentKernel;
use crate::scalar::Scalar;
use crate::scalar_fn::Arity;
Expand Down Expand Up @@ -122,6 +123,14 @@ impl ScalarFnVTable for ByteLength {
}
}

fn validity(
&self,
_: &Self::Options,
expression: &Expression,
) -> VortexResult<Option<Expression>> {
Ok(Some(expression.child(0).validity()?))
}

fn is_null_sensitive(&self, _options: &Self::Options) -> bool {
false
}
Expand Down
Loading