Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,18 @@ parallel_proc = ["rayon"]
# in your build pipeline, as it's mostly C-code.
c_ffi_tests = ['cc']

# Adds Categorical8, Categorical16, and Categorical64.
# Swaps the default categorical type from CategoricalArray<u32> to CategoricalArray<u8>.
# When enabled, TextArray contains Categorical8 instead of Categorical32, keeping the
# match arm count at 1. Useful for SIMD-optimised workloads where categories fit
# within 256 values.
default_categorical_8 = []

# Adds Categorical16, Categorical64, and whichever of Categorical8/Categorical32
# is not the current default.
#
# Highly recommend keeping these off unless required
# E.g., constrained or embedded environments, as they add combinatorial
# weight to the binary and enum match arms
extended_categorical = []
# Highly recommend keeping this off unless required, as it adds combinatorial
# weight to the binary and enum match arms.
extended_categorical = ["default_categorical_8"]

# Adds UInt8, UInt16, Int8, Int16 types.
#
Expand Down
6 changes: 6 additions & 0 deletions examples/ffi/apache_arrow_ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ mod apache_arrow_test {
};
use arrow::array::{ArrayRef, RecordBatch, make_array};
use minarrow::ffi::arrow_c_ffi::{export_to_c, import_from_c};
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
use minarrow::ffi::arrow_dtype::CategoricalIndexType;
use minarrow::ffi::schema::Schema;
use minarrow::{Array, ArrowType, Field, FieldArray, NumericArray, Table, TextArray};
Expand Down Expand Up @@ -79,6 +80,7 @@ mod apache_arrow_test {
let arr_string32 = Arc::new(minarrow::StringArray::<u32>::from_slice(&[
"abc", "def", "",
])) as Arc<minarrow::StringArray<u32>>;
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
let arr_categorical32 = Arc::new(minarrow::CategoricalArray::<u32>::from_slices(
&[0, 1, 2],
&["A".to_string(), "B".to_string(), "C".to_string()],
Expand Down Expand Up @@ -122,6 +124,7 @@ mod apache_arrow_test {
let minarr_float64 = Array::NumericArray(NumericArray::Float64(arr_float64));
let minarr_bool = Array::BooleanArray(arr_bool);
let minarr_string32 = Array::TextArray(TextArray::String32(arr_string32));
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
let minarr_categorical32 = Array::TextArray(TextArray::Categorical32(arr_categorical32));
#[cfg(feature = "datetime")]
let minarr_datetime32 = Array::TemporalArray(TemporalArray::Datetime32(arr_datetime32));
Expand All @@ -145,6 +148,7 @@ mod apache_arrow_test {
let field_float64 = Field::new("float64", ArrowType::Float64, false, None);
let field_bool = Field::new("bool", ArrowType::Boolean, false, None);
let field_string32 = Field::new("string32", ArrowType::String, false, None);
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
let field_categorical32 = Field::new(
"categorical32",
ArrowType::Dictionary(CategoricalIndexType::UInt32),
Expand Down Expand Up @@ -174,6 +178,7 @@ mod apache_arrow_test {
let fa_float64 = FieldArray::new(field_float64, minarr_float64);
let fa_bool = FieldArray::new(field_bool, minarr_bool);
let fa_string32 = FieldArray::new(field_string32, minarr_string32);
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
let fa_categorical32 = FieldArray::new(field_categorical32, minarr_categorical32);
#[cfg(feature = "datetime")]
let fa_datetime32 = FieldArray::new(field_datetime32, minarr_datetime32);
Expand All @@ -200,6 +205,7 @@ mod apache_arrow_test {
cols.push(fa_float64);
cols.push(fa_bool);
cols.push(fa_string32);
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
cols.push(fa_categorical32);
#[cfg(feature = "datetime")]
{
Expand Down
6 changes: 6 additions & 0 deletions examples/ffi/polars_ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ mod polars_roundtrip {
use std::sync::Arc;

use minarrow::ffi::arrow_c_ffi::{export_to_c, import_from_c};
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
use minarrow::ffi::arrow_dtype::CategoricalIndexType;
use minarrow::ffi::schema::Schema;
use minarrow::{Array, ArrowType, Field, FieldArray, NumericArray, Table, TextArray};
Expand Down Expand Up @@ -82,6 +83,7 @@ mod polars_roundtrip {
let arr_string32 = Arc::new(minarrow::StringArray::<u32>::from_slice(&[
"abc", "def", "",
])) as Arc<minarrow::StringArray<u32>>;
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
let arr_categorical32 = Arc::new(minarrow::CategoricalArray::<u32>::from_slices(
&[0, 1, 2],
&["A".to_string(), "B".to_string(), "C".to_string()],
Expand Down Expand Up @@ -125,6 +127,7 @@ mod polars_roundtrip {
let minarr_float64 = Array::NumericArray(NumericArray::Float64(arr_float64));
let minarr_bool = Array::BooleanArray(arr_bool);
let minarr_string32 = Array::TextArray(TextArray::String32(arr_string32));
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
let minarr_categorical32 = Array::TextArray(TextArray::Categorical32(arr_categorical32));
#[cfg(feature = "datetime")]
let minarr_datetime32 = Array::TemporalArray(TemporalArray::Datetime32(arr_datetime32));
Expand All @@ -148,6 +151,7 @@ mod polars_roundtrip {
let field_float64 = Field::new("float64", ArrowType::Float64, false, None);
let field_bool = Field::new("bool", ArrowType::Boolean, false, None);
let field_string32 = Field::new("string32", ArrowType::String, false, None);
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
let field_categorical32 = Field::new(
"categorical32",
ArrowType::Dictionary(CategoricalIndexType::UInt32),
Expand Down Expand Up @@ -176,6 +180,7 @@ mod polars_roundtrip {
let fa_float64 = FieldArray::new(field_float64, minarr_float64);
let fa_bool = FieldArray::new(field_bool, minarr_bool);
let fa_string32 = FieldArray::new(field_string32, minarr_string32);
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
let fa_categorical32 = FieldArray::new(field_categorical32, minarr_categorical32);
#[cfg(feature = "datetime")]
let fa_datetime32 = FieldArray::new(field_datetime32, minarr_datetime32);
Expand All @@ -202,6 +207,7 @@ mod polars_roundtrip {
cols.push(fa_float64);
cols.push(fa_bool);
cols.push(fa_string32);
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
cols.push(fa_categorical32);
#[cfg(feature = "datetime")]
{
Expand Down
11 changes: 1 addition & 10 deletions examples/print/print_arrays.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

use std::sync::Arc;

use minarrow::aliases::{BoolArr, CatArr, FltArr, IntArr, StrArr};
use minarrow::aliases::{BoolArr, FltArr, IntArr, StrArr};
use minarrow::enums::array::Array;
use minarrow::{Bitmask, MaskedArray, NumericArray, Print, TextArray};

Expand All @@ -44,12 +44,6 @@ fn main() {
// String and Dictionary/Categorical
let col_str32 = StrArr::from_slice(&["red", "blue", "green", "yellow", "purple"]);

let col_cat32 = CatArr::<u32>::from_values(
["apple", "banana", "cherry", "banana", "apple"]
.iter()
.copied(),
);

// --- Print NumericArray, TextArray, TemporalArray enums
println!("\n--- Enums: NumericArray, TextArray, TemporalArray ---");
NumericArray::Int32(Arc::new(col_i32.clone())).print();
Expand All @@ -60,7 +54,6 @@ fn main() {
println!("\n");
TextArray::String32(Arc::new(col_str32.clone())).print();
println!("\n");
let _ = &TextArray::Categorical32(Arc::new(col_cat32.clone())).print();

println!("\n--- Array (top-level) ---");
Array::from_int32(col_i32.clone()).print();
Expand All @@ -71,8 +64,6 @@ fn main() {
println!("\n");
Array::from_string32(col_str32.clone()).print();
println!("\n");
Array::from_categorical32(col_cat32.clone()).print();
println!("\n");
// --- Print Array Views (ArrayV, NumericArrayV, TextArrayV, TemporalArrayV)
#[cfg(feature = "views")]
println!("\n--- Array Views ---");
Expand Down
7 changes: 6 additions & 1 deletion examples/print/print_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
//! cargo run --example print_table
//! ---------------------------------------------------------

use minarrow::aliases::{BoolArr, CatArr, FltArr, IntArr, StrArr};
use minarrow::aliases::{BoolArr, FltArr, IntArr, StrArr};
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
use minarrow::aliases::CatArr;
use minarrow::{Bitmask, FieldArray, MaskedArray, Print, Table};
#[cfg(feature = "datetime")]
use minarrow::{DatetimeArray, enums::time_units::TimeUnit};
Expand All @@ -41,6 +43,7 @@ fn main() {

// String and Dictionary/Categorical
let col_str32 = StrArr::<u32>::from_slice(&["red", "blue", "green", "yellow", "purple"]);
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
let col_cat32 = CatArr::<u32>::from_values(
["apple", "banana", "cherry", "banana", "apple"]
.iter()
Expand Down Expand Up @@ -74,6 +77,7 @@ fn main() {
let fa_f64 = FieldArray::from_arr("float64_col", col_f64);
let fa_bool = FieldArray::from_arr("bool_col", col_bool);
let fa_str32 = FieldArray::from_arr("utf8_col", col_str32);
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
let fa_cat32 = FieldArray::from_arr("dict32_col", col_cat32);
#[cfg(feature = "datetime")]
let fa_dt32 = FieldArray::from_arr("datetime32_col", col_dt32);
Expand All @@ -90,6 +94,7 @@ fn main() {
tbl.add_col(fa_f64);
tbl.add_col(fa_bool);
tbl.add_col(fa_str32);
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
tbl.add_col(fa_cat32);
#[cfg(feature = "datetime")]
tbl.add_col(fa_dt32);
Expand Down
42 changes: 25 additions & 17 deletions src/conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -527,20 +527,22 @@ macro_rules! string_to_cat {
};
}

#[cfg(feature = "extended_categorical")]
#[cfg(feature = "default_categorical_8")]
string_to_cat!(u32, u8);
#[cfg(feature = "extended_categorical")]
string_to_cat!(u32, u16);
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
string_to_cat!(u32, u32);
#[cfg(feature = "extended_categorical")]
string_to_cat!(u32, u64);
#[cfg(feature = "extended_categorical")]
#[cfg(feature = "default_categorical_8")]
#[cfg(feature = "large_string")]
string_to_cat!(u64, u8);
#[cfg(feature = "extended_categorical")]
#[cfg(feature = "large_string")]
string_to_cat!(u64, u16);
#[cfg(feature = "large_string")]
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
string_to_cat!(u64, u32);
#[cfg(feature = "extended_categorical")]
#[cfg(feature = "large_string")]
Expand Down Expand Up @@ -586,20 +588,22 @@ macro_rules! cat_to_string {
};
}

#[cfg(feature = "extended_categorical")]
#[cfg(feature = "default_categorical_8")]
cat_to_string!(u8, u32);
#[cfg(feature = "extended_categorical")]
cat_to_string!(u16, u32);
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
cat_to_string!(u32, u32);
#[cfg(feature = "extended_categorical")]
cat_to_string!(u64, u32);
#[cfg(feature = "extended_categorical")]
#[cfg(feature = "default_categorical_8")]
#[cfg(feature = "large_string")]
cat_to_string!(u8, u64);
#[cfg(feature = "large_string")]
#[cfg(feature = "extended_categorical")]
cat_to_string!(u16, u64);
#[cfg(feature = "large_string")]
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
cat_to_string!(u32, u64);
#[cfg(feature = "large_string")]
#[cfg(feature = "extended_categorical")]
Expand Down Expand Up @@ -644,7 +648,7 @@ impl TryFrom<&StringArray<u64>> for StringArray<u32> {
}
}

#[cfg(feature = "extended_categorical")]
#[cfg(any(feature = "default_categorical_8", feature = "extended_categorical"))]
macro_rules! cat_to_cat_widen {
($src:ty, $dst:ty) => {
impl From<&CategoricalArray<$src>> for CategoricalArray<$dst> {
Expand All @@ -660,7 +664,7 @@ macro_rules! cat_to_cat_widen {
};
}

#[cfg(feature = "extended_categorical")]
#[cfg(any(feature = "default_categorical_8", feature = "extended_categorical"))]
macro_rules! cat_to_cat_narrow {
($src:ty, $dst:ty) => {
impl TryFrom<&CategoricalArray<$src>> for CategoricalArray<$dst> {
Expand All @@ -683,23 +687,23 @@ macro_rules! cat_to_cat_narrow {
};
}

#[cfg(feature = "extended_categorical")]
#[cfg(feature = "default_categorical_8")]
cat_to_cat_widen!(u8, u16);
#[cfg(feature = "extended_categorical")]
#[cfg(feature = "default_categorical_8")]
cat_to_cat_widen!(u8, u32);
#[cfg(feature = "extended_categorical")]
#[cfg(feature = "default_categorical_8")]
cat_to_cat_widen!(u8, u64);
#[cfg(feature = "extended_categorical")]
cat_to_cat_widen!(u16, u32);
#[cfg(feature = "extended_categorical")]
cat_to_cat_widen!(u16, u64);
#[cfg(feature = "extended_categorical")]
cat_to_cat_widen!(u32, u64);
#[cfg(feature = "extended_categorical")]
#[cfg(feature = "default_categorical_8")]
cat_to_cat_narrow!(u16, u8);
#[cfg(feature = "extended_categorical")]
#[cfg(feature = "default_categorical_8")]
cat_to_cat_narrow!(u32, u8);
#[cfg(feature = "extended_categorical")]
#[cfg(feature = "default_categorical_8")]
cat_to_cat_narrow!(u64, u8);
#[cfg(feature = "extended_categorical")]
cat_to_cat_narrow!(u32, u16);
Expand All @@ -709,7 +713,7 @@ cat_to_cat_narrow!(u64, u16);
cat_to_cat_narrow!(u64, u32);

// identity conversions (Arc-clone) for completeness
#[cfg(feature = "extended_categorical")]
#[cfg(feature = "default_categorical_8")]
impl From<&CategoricalArray<u8>> for CategoricalArray<u8> {
fn from(c: &CategoricalArray<u8>) -> Self {
c.clone()
Expand Down Expand Up @@ -952,15 +956,15 @@ impl View for Arc<StringArray<u64>> {
type BufferT = u8;
}

#[cfg(feature = "extended_categorical")]
#[cfg(feature = "default_categorical_8")]
impl From<Arc<CategoricalArray<u8>>> for Array {
fn from(a: Arc<CategoricalArray<u8>>) -> Self {
Array::TextArray(TextArray::Categorical8(a))
}
}

#[cfg(feature = "views")]
#[cfg(feature = "extended_categorical")]
#[cfg(feature = "default_categorical_8")]
impl View for Arc<CategoricalArray<u8>> {
type BufferT = u8;
}
Expand All @@ -978,13 +982,15 @@ impl View for Arc<CategoricalArray<u16>> {
type BufferT = u16;
}

#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
impl From<Arc<CategoricalArray<u32>>> for Array {
fn from(a: Arc<CategoricalArray<u32>>) -> Self {
Array::TextArray(TextArray::Categorical32(a))
}
}

#[cfg(feature = "views")]
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
impl View for Arc<CategoricalArray<u32>> {
type BufferT = u32;
}
Expand Down Expand Up @@ -1191,15 +1197,15 @@ impl View for StringArray<u64> {
type BufferT = u8;
}

#[cfg(feature = "extended_categorical")]
#[cfg(feature = "default_categorical_8")]
impl From<CategoricalArray<u8>> for Array {
fn from(a: CategoricalArray<u8>) -> Self {
Array::TextArray(TextArray::Categorical8(a.into()))
}
}

#[cfg(feature = "views")]
#[cfg(feature = "extended_categorical")]
#[cfg(feature = "default_categorical_8")]
impl View for CategoricalArray<u8> {
type BufferT = u8;
}
Expand All @@ -1217,13 +1223,15 @@ impl View for CategoricalArray<u16> {
type BufferT = u16;
}

#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
impl From<CategoricalArray<u32>> for Array {
fn from(a: CategoricalArray<u32>) -> Self {
Array::TextArray(TextArray::Categorical32(a.into()))
}
}

#[cfg(feature = "views")]
#[cfg(any(not(feature = "default_categorical_8"), feature = "extended_categorical"))]
impl View for CategoricalArray<u32> {
type BufferT = u32;
}
Expand Down
Loading
Loading