From ba737d5fcec6208a2756783c7bae61f27403de06 Mon Sep 17 00:00:00 2001 From: PB <37089506+pbower@users.noreply.github.com> Date: Fri, 3 Apr 2026 06:47:32 +1100 Subject: [PATCH] 1. Add EQ Hash to Field relations 2. Add SharedBuffer Arc tool --- src/enums/time_units.rs | 4 ++-- src/ffi/arrow_dtype.rs | 4 ++-- src/structs/field.rs | 9 ++++++++- src/structs/shared_buffer/mod.rs | 20 ++++++++++++++++++++ 4 files changed, 32 insertions(+), 5 deletions(-) diff --git a/src/enums/time_units.rs b/src/enums/time_units.rs index 1fe6f89..71577ef 100644 --- a/src/enums/time_units.rs +++ b/src/enums/time_units.rs @@ -38,7 +38,7 @@ use std::fmt::{Display, Formatter, Result as FmtResult}; /// ## Behaviour /// - Unit values are stored on the `DatetimeArray`, enabling variant-specific logic. /// - When transmitted over FFI, an `Apache Arrow`- produces compatible native format. -#[derive(PartialEq, Clone, Copy, Debug, Default)] +#[derive(PartialEq, Eq, Hash, Clone, Copy, Debug, Default)] pub enum TimeUnit { /// Seconds for Apache Arrow `Time32` and `Time64` units. Seconds, @@ -64,7 +64,7 @@ pub enum TimeUnit { /// T-integer represents an interval, rather than an epoch value. /// Then, it will materialise as an `Interval` *Apache Arrow* type /// when sent over FFI. -#[derive(PartialEq, Clone, Debug)] +#[derive(PartialEq, Eq, Hash, Clone, Debug)] pub enum IntervalUnit { YearMonth, DaysTime, diff --git a/src/ffi/arrow_dtype.rs b/src/ffi/arrow_dtype.rs index 3ccf46d..4a65ebc 100644 --- a/src/ffi/arrow_dtype.rs +++ b/src/ffi/arrow_dtype.rs @@ -77,7 +77,7 @@ use crate::{BooleanArray, CategoricalArray, Float, FloatArray, Integer, StringAr /// - For `DatetimeArray` types, `ArrowType` reflects only the physical encoding. /// Logical distinctions (e.g., interpreting a `Date64` as a timestamp vs. a duration) are stored in `Field` metadata. /// - Dictionary key widths are defined by the associated `CategoricalIndexType`. -#[derive(PartialEq, Clone, Debug)] +#[derive(PartialEq, Eq, Hash, Clone, Debug)] pub enum ArrowType { Null, Boolean, @@ -139,7 +139,7 @@ pub enum ArrowType { /// - Maps directly to the integer index type in Apache Arrow's `DictionaryType`. /// - Preserved when sending categorical arrays over the Arrow C Data Interface. -#[derive(PartialEq, Clone, Debug)] +#[derive(PartialEq, Eq, Hash, Clone, Debug)] pub enum CategoricalIndexType { #[cfg(feature = "default_categorical_8")] UInt8, diff --git a/src/structs/field.rs b/src/structs/field.rs index 0f64bc9..edf0201 100644 --- a/src/structs/field.rs +++ b/src/structs/field.rs @@ -27,6 +27,7 @@ use std::collections::BTreeMap; use std::fmt::{Display, Formatter}; +use std::sync::Arc; use std::sync::atomic::{AtomicUsize, Ordering}; #[cfg(feature = "datetime")] @@ -64,7 +65,7 @@ static UNNAMED_FIELD_COUNTER: AtomicUsize = AtomicUsize::new(1); /// - This ensures that when sent over Arrow C-FFI (or `to_apache_arrow()`), /// it converts to the correct external type. Whilst, avoiding proliferating many /// specialised types prematurely, keeping the API and binary size minimal. -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Field { pub name: String, pub dtype: ArrowType, @@ -235,6 +236,12 @@ impl Display for Field { } } +impl From> for Field { + fn from(arc: Arc) -> Self { + Arc::try_unwrap(arc).unwrap_or_else(|a| (*a).clone()) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/structs/shared_buffer/mod.rs b/src/structs/shared_buffer/mod.rs index ea67be6..42dd4cc 100644 --- a/src/structs/shared_buffer/mod.rs +++ b/src/structs/shared_buffer/mod.rs @@ -19,6 +19,8 @@ //! This is an internal module that backs the `Buffer` type supporting //! the typed Arrays in *Minarrow*. +use std::sync::Arc; + use crate::Vec64; use crate::structs::shared_buffer::internal::owned::{OWNED_VT, Owned}; use crate::structs::shared_buffer::internal::pvec::PromotableVec; @@ -161,6 +163,24 @@ impl SharedBuffer { Self::from_vec64(Vec64(raw_vec)) } + /// Constructs a `SharedBuffer` from an `Arc` where `M: AsRef<[u8]>`. + /// + /// Handles the double deref internally so callers don't need a wrapper + /// type. Use `.slice()` for sub-region views. + pub fn from_arc + Send + Sync + 'static>(arc: Arc) -> Self { + // ArcOwner adapts Arc to AsRef<[u8]> for from_owner. + // Always Sized since Arc is a pointer regardless of M. + struct ArcOwner(Arc); + impl> AsRef<[u8]> for ArcOwner { + #[inline] + fn as_ref(&self) -> &[u8] { (*self.0).as_ref() } + } + unsafe impl Send for ArcOwner {} + unsafe impl Sync for ArcOwner {} + + Self::from_owner(ArcOwner(arc)) + } + /// Constructs a `SharedBuffer` from an arbitrary owner (e.g. Arc<[u8]>, mmap, etc). /// /// The owner must implement `AsRef<[u8]> + Send + Sync + 'static`.