diff --git a/CHANGELOG.md b/CHANGELOG.md index 228b85b..6c97dbc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,19 @@ Numerical changes are marked with [NUMERICAL]. ## [Unreleased] +## [0.3.0] - 2026-03-24 + +### Fixed +- [NUMERICAL] `ts_covariance`: unified to ddof=0 (population), consistent with all other + variance/std operators. Fixes the broken identity `cov/(std_x*std_y) == corr` which + previously had ~5-20% error due to mixed ddof. Cross-validated against numpy (diff < 1e-15). + +### Changed +- OPERATORS.md rewritten as pure operator reference manual (signatures, behavior, edge cases) +- Design rationale moved to CLAUDE.md Section 4.1 (developer-facing) +- Fixed incorrect signatures in docs: `trade_when`, `scale`, `bucket` +- Fixed README example code to use only columns present in sample data + ## [0.2.0] - 2026-03-23 ### Added @@ -24,7 +37,7 @@ Numerical changes are marked with [NUMERICAL]. ### Fixed - [NUMERICAL] `ts_product`: silently returned null for negative inputs; now correctly handles negative values via sign-magnitude decomposition -- [NUMERICAL] `ts_covariance`: used ddof=0 (population) inconsistent with `ts_corr` (ddof=1); aligned to ddof=1 (sample) +- [NUMERICAL] `ts_covariance`: added explicit ddof parameter (was using Polars default) - [NUMERICAL] `divide()`: no zero-denominator protection; now returns null where abs(divisor) < 1e-10 - [NUMERICAL] `inverse()`: no zero protection; now returns null where abs(x) < 1e-10 - `ts_regression` rettype=7 (MSE): implicit Inf-to-null on window=2; now has explicit guard diff --git a/CLAUDE.md b/CLAUDE.md index aeaa1af..acca1e2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -36,6 +36,8 @@ elvers/ tests/ conftest.py Test fixtures (make_ts, make_factor) test_*.py One test file per operator module +OPERATORS.md Operator specification: numerical conventions, per-operator behavior, design rationale +CLAUDE.md Development standards (this file) ``` --- @@ -92,13 +94,34 @@ git push -u origin fix/bug-name ### 4.1 Numerical Correctness (Highest Priority) +Operator behavior reference: [OPERATORS.md](OPERATORS.md). +The rules below are for writing new code: + - All divisions MUST have explicit zero guards: `pl.when(denom.abs() < 1e-10).then(None).otherwise(num / denom)` - NEVER rely on the Factor constructor's implicit Inf-to-null conversion as normal logic flow -- Statistical convention: ddof=0 (population) for std/variance, ddof=1 (sample) for corr/cov. - This is consistent across the entire library. - Null semantics: null propagates naturally through Polars expressions. Boundary cases - (zero denominator, constant window, insufficient data) must be handled explicitly. + (zero denominator, constant window, insufficient data) must be handled explicitly + +#### Design Decisions (rationale for current conventions) + +- **NaN/Inf unified to null**: eliminates the NaN-infection problem (`NaN + 1 = NaN`) + that silently corrupts downstream computations. The Factor constructor converts on + creation so the entire library operates on a single missing-value type. +- **ddof=0 everywhere**: rolling windows and cross-sections operate on the full observed + population, not a sample from a larger one. ddof=0 is semantically correct and avoids + n=1 division-by-zero (ddof=1 divides by n-1=0). +- **ts_corr/ts_autocorr use ddof=1 internally**: Polars `rolling_corr(ddof=0)` has a bug + where ddof only applies to the covariance numerator, not the variance denominator, + producing values outside [-1, 1]. Reported: https://github.com/pola-rs/polars/issues/16161. + Correlation is ddof-invariant (cancels in ratio), so ddof=1 output is correct. +- **rank range (0, 1] not [0, 1]**: a rank of 0 is ambiguous (could mean "missing" or + "lowest"). Strictly positive range ensures every ranked value is distinguishable from null. +- **Zero guard threshold 1e-10**: conservative enough to catch near-zero denominators, + small enough not to interfere with legitimate small values in financial data. +- **ts_product sign-magnitude decomposition**: naive `exp(sum(log(x)))` fails for negative + inputs because `log(x)` is undefined for x < 0. Separating sign and magnitude handles + this correctly. ### 4.2 Operator Writing Rules @@ -178,7 +201,8 @@ negative values. Previously returned null, now returns correct product. ## 7. Code Review Rules -- All PRs require at least one reviewer approval before merge +- When the team has multiple developers, enable "Require approvals" in branch protection +- Currently (single-developer mode): CI status checks are required, review approval is optional - Reviewer must verify: 1. Tests pass and cover the change 2. Numerical correctness (manually verify at least one expected value) @@ -220,26 +244,26 @@ pytest tests/ -v ruff check elvers/ # 2. Update version number (single source of truth) -# Edit elvers/__init__.py: __version__ = "0.2.0" +# Edit elvers/__init__.py: __version__ = "X.Y.Z" # 3. Update CHANGELOG.md -# Move items from [Unreleased] to [0.2.0] - YYYY-MM-DD +# Move items from [Unreleased] to [X.Y.Z] - YYYY-MM-DD # 4. Commit the release git add elvers/__init__.py CHANGELOG.md -git commit -m "release: v0.2.0" +git commit -m "release: vX.Y.Z" git push origin dev # 5. Create PR: dev -> main on GitHub -# Title: "release: v0.2.0" +# Title: "release: vX.Y.Z" # Wait for CI to pass and review approval # Squash merge on GitHub # 6. Tag on main (after PR merged) git checkout main git pull origin main -git tag v0.2.0 -git push origin v0.2.0 +git tag vX.Y.Z +git push origin vX.Y.Z # 7. Automated (triggered by tag push): # - CI runs full test suite again @@ -251,14 +275,14 @@ git push origin v0.2.0 ### What Happens Automatically -When you push a tag like `v0.2.0`: +When you push a tag like `vX.Y.Z`: 1. `.github/workflows/publish.yml` triggers 2. Runs full test suite on Python 3.10-3.13 (safety net) 3. If tests pass: builds package, publishes to PyPI 4. Creates a GitHub Release page at github.com/quantbai/elvers/releases with auto-generated release notes from commit messages -5. Users can now `pip install elvers==0.2.0` +5. Users can now `pip install elvers==X.Y.Z # specific version` ### What You See on GitHub After Release @@ -268,29 +292,15 @@ When you push a tag like `v0.2.0`: --- -## 10. One-Time Setup (for repository admin) - -### PyPI Trusted Publisher (required for automated publishing) - -1. Go to https://pypi.org -> Your projects -> elvers -> Publishing -2. Add a new publisher: - - Owner: quantbai - - Repository: elvers - - Workflow name: publish.yml - - Environment: (leave blank) +## 10. Setup -### GitHub Branch Protection (strongly recommended) +### Infrastructure (already configured) -1. GitHub repo -> Settings -> Branches -> Add rule -2. Branch name pattern: `main` -3. Enable: - - "Require a pull request before merging" - - "Require approvals" (1 minimum) - - "Require status checks to pass before merging" - - Select required status check: "test" -4. Save changes +- PyPI Trusted Publisher: configured for quantbai/elvers -> publish.yml +- GitHub Branch Protection on main: require PR, require CI status checks +- GitHub Actions: ci.yml (push/PR) + publish.yml (tag-triggered release) -### Local Development Setup (every developer) +### Local Development Setup (every new developer) ```bash git clone https://github.com/quantbai/elvers.git @@ -302,33 +312,15 @@ pre-commit install --- -## 11. Commands Reference +## 11. Quick Reference ```bash -# === Setup === -pip install -e ".[dev]" # Install with dev dependencies -pre-commit install # Install git hooks - -# === Daily Development === +pip install -e ".[dev]" # Setup +pre-commit install # Git hooks pytest tests/ -v # Run all tests -pytest tests/test_timeseries.py -v # Single file -pytest tests/test_timeseries.py::TestTsProduct -v # Single class -ruff check elvers/ # Lint check -ruff check elvers/ --fix # Auto-fix lint issues +pytest tests/test_timeseries.py::TestTsProduct -v # Single test class +ruff check elvers/ --fix # Lint + auto-fix ruff format elvers/ # Format code - -# === Git === -git status # See what changed -git diff # See actual changes -git add # Stage specific files (never git add -A) -git commit -m "type(scope): msg" # Commit with convention -git push origin # Push to remote -git log --oneline -10 # Recent history - -# === Release === -python -m build # Build package locally (for testing) -git tag v0.2.0 # Create version tag -git push origin v0.2.0 # Push tag (triggers publish) ``` --- diff --git a/OPERATORS.md b/OPERATORS.md new file mode 100644 index 0000000..42d7a85 --- /dev/null +++ b/OPERATORS.md @@ -0,0 +1,380 @@ +# Elvers Operator Reference + +72 operators. All accept and return `Factor`. + +--- + +## Conventions + +| Convention | Rule | +| --- | --- | +| Missing values | NaN and Inf are converted to null on Factor creation. Single missing-value type (null) throughout. | +| Null arithmetic | `5.0 + null = null`. Use `filter=True` on add/subtract/multiply to treat null as identity (0 for +/-, 1 for *). | +| Division by zero | All divisions guarded at `abs(denom) < 1e-10`, returning null. | +| Std / Variance | ddof=0 (population) for all std, variance, covariance, zscore, normalize, winsorize. | +| Correlation | ddof-invariant. `ts_covariance(x,y,w) / (ts_std_dev(x,w) * ts_std_dev(y,w)) == ts_corr(x,y,w)`. | +| Rank | Range (0, 1]. Does not include zero. Ties: `average` method. Null excluded. | +| Rolling warmup | All `ts_*` operators: first `window-1` values per symbol are null (`min_samples=window`). | + +--- + +## Arithmetic + +### `add(a, b, filter=False)` + +Element-wise addition. `filter=True`: null treated as 0. + +```python +add(close, volume) # null propagates +add(close, volume, filter=True) # null -> 0 +close + volume # operator overload +``` + +### `subtract(a, b, filter=False)` + +Element-wise subtraction. `filter=True`: null treated as 0. + +### `multiply(a, b, filter=False)` + +Element-wise multiplication. `filter=True`: null treated as 1. + +### `divide(a, b)` + +Element-wise division. Returns null where `abs(b) < 1e-10`. + +### `reverse(x)` + +Negation: `-x`. + +### `densify(x)` + +Remaps group labels to consecutive integers `0..(n-1)` per timestamp. Dense rank (ties get same value). + +### `bucket(x, buckets=None, range_params=None)` + +Assigns values to bucket indices based on edge boundaries. Provide either `buckets` (list of edges) or `range_params` (start, end, step). Returns integer labels. Null in, null out. + +--- + +## Time-Series + +Per-symbol rolling window operators. All share: `min_samples=window`, first `window-1` values are null. + +### `ts_delay(x, window)` + +Value from `window` periods ago. `x[t - window]`. + +### `ts_delta(x, window)` + +`x[t] - x[t - window]`. + +### `ts_mean(x, window)` + +Simple moving average. `(1/n) * sum(x)` where `n` = non-null count in window. + +### `ts_sum(x, window)` + +Rolling sum. + +### `ts_std_dev(x, window)` + +Rolling standard deviation. ddof=0. + +### `ts_min(x, window)` + +Rolling minimum. + +### `ts_max(x, window)` + +Rolling maximum. + +### `ts_median(x, window)` + +Rolling median. + +### `ts_rank(x, window)` + +Rank of current value within its rolling window. + +- Range: (0, 1] +- Ties: `average` +- Formula: `average_rank / window` + +### `ts_skewness(x, window)` + +Rolling skewness. Population (bias=True). + +### `ts_kurtosis(x, window)` + +Rolling excess kurtosis. Fisher definition (normal = 0). + +### `ts_zscore(x, window)` + +`(x - rolling_mean) / rolling_std`. ddof=0. Returns 0 if `std < 1e-10`. + +### `ts_corr(x, y, window)` + +Rolling Pearson correlation. Range: [-1, 1]. + +### `ts_covariance(x, y, window)` + +Rolling population covariance. ddof=0. + +### `ts_product(x, window)` + +Rolling product. Handles negatives via sign-magnitude decomposition. Zero in window returns 0. + +- Math: `(-1)^count_neg * exp(sum(log(abs(x))))` + +### `ts_step(n)` + +Incrementing counter starting at 1. No warmup. + +### `ts_decay_linear(x, window)` + +Linearly weighted moving average. Weights: `[1, 2, ..., window]` (oldest=1, newest=window). + +- Math: `sum(x[i] * w[i]) / sum(w[i])` + +### `ts_decay_exp_window(x, window, factor)` + +Exponentially weighted moving average. `factor` in (0, 1): smaller = faster decay. + +- Math: `sum(x[t-i] * factor^i) / sum(factor^i)` for `i = 0..window-1` + +### `days_from_last_change(x)` + +Periods since last value change. Returns 0 on change. No warmup. + +### `ts_av_diff(x, window)` + +`x - ts_mean(x, window)`. + +### `ts_scale(x, window)` + +Min-max normalization: `(x - min) / (max - min)`. Range: [0, 1]. Returns 0 if range < 1e-10. + +### `ts_percentile(x, window, q)` + +Rolling percentile at quantile `q` in [0, 1]. + +### `ts_quantile(x, window, driver="gaussian")` + +`ts_rank` then inverse CDF. Drivers: `gaussian`, `uniform`, `cauchy`. Rank clamped to [0.001, 0.999]. + +### `ts_cv(x, window)` + +Coefficient of variation: `std / abs(mean)`. ddof=0. Returns null if `abs(mean) < 1e-10`. + +### `ts_autocorr(x, window, lag=1)` + +Rolling autocorrelation with specified lag. + +### `ts_count_nans(x, window)` + +Count of null values in rolling window. + +### `ts_backfill(x, window, k=1)` + +Fill nulls with k-th most recent non-null value. `k=1`: most recent. No warmup. + +### `kth_element(x, window, k, ignore="NaN")` + +k-th non-ignored value looking back. `ignore="NaN"`: skip null. `ignore="NaN 0"`: skip null and zero. + +### `last_diff_value(x, window)` + +Most recent value in lookback that differs from current. Null if none. + +### `inst_tvr(x, window)` + +Instrument turnover: `sum(abs(delta)) / sum(abs(x))`. Returns 0 if denom < 1e-10. + +### `ts_delta_limit(x, y, limit_volume=0.1)` + +Clips per-period change in `x` to a fraction of `y`. + +### `ts_regression(y, x, window, lag=0, rettype=0)` + +Rolling OLS regression. + +| rettype | Returns | +| --- | --- | +| 0 | Residual | +| 1 | Intercept | +| 2 | Slope (beta) | +| 3 | Fitted value | +| 4 | SSE | +| 5 | SST | +| 6 | R-squared | +| 7 | MSE | +| 8 | Std error of beta | +| 9 | Std error of alpha | + +Zero guard on `sum(x^2) < 1e-10` and `SST < 1e-10`. + +### `trade_when(trigger, alpha, exit_cond)` + +Hold `alpha` when `trigger > 0`, null when `exit_cond > 0`, forward-fill otherwise. All three parameters are Factors. + +--- + +## Cross-Sectional + +Across all symbols at each timestamp. + +### `rank(x)` + +Normalized rank in (0, 1]. Ties: `average`. Null excluded. + +Example: `(4, 3, 6, 10, 2)` -> `(0.6, 0.4, 0.8, 1.0, 0.2)` + +### `zscore(x)` + +`(x - mean) / std`. ddof=0. Returns 0 if `std < 1e-10`. + +### `mean(x)` + +Cross-sectional mean, broadcast to all symbols. + +### `median(x)` + +Cross-sectional median, broadcast to all symbols. + +### `scale(x, target=1.0, longscale=0.0, shortscale=0.0)` + +Scale so `sum(abs(x)) = target`. When `longscale`/`shortscale` are non-zero, scale long and short legs separately. Returns 0 if sum < 1e-10. + +### `normalize(x, use_std=False, limit=0.0)` + +Demean: `x - mean(x)`. `use_std=True`: divide by std (ddof=0). `limit > 0`: clip to [-limit, limit]. + +### `quantile(x, driver="gaussian", sigma=1.0)` + +Rank then inverse CDF. Drivers: `gaussian`, `uniform`, `cauchy`. Acklam approximation (error < 1.15e-9). + +### `signal(x)` + +Zero-mean, unit-absolute-sum normalization. Returns 0 if `abs_sum < 1e-10` or `count < 2`. + +### `winsorize(x, std=4)` + +Clip to `[mean - std*sigma, mean + std*sigma]`. ddof=0. + +### `truncate(x, max_percent=0.01)` + +Cap each value so no position exceeds `max_percent` of total absolute sum. + +### `left_tail(x, maximum)` + +Null values above `maximum`. + +### `right_tail(x, minimum)` + +Null values below `minimum`. + +--- + +## Neutralization and Group + +### `vector_neut(x, y)` + +Orthogonal residual: `x - proj_y(x)`. Returns `x` if `dot(y,y) < 1e-10`. + +### `regression_neut(y, x)` + +OLS residual: `y - (alpha + beta * x)`. beta = 0 if `var(x) < 1e-10`. + +### `group_neutralize(x, group)` + +`x - group_mean(x)`. + +### `group_rank(x, group)` + +Rank within group. (0, 1], `average`, null excluded. + +### `group_zscore(x, group)` + +Z-score within group. ddof=0. Returns 0 if group std < 1e-10. + +### `group_scale(x, group)` + +Min-max within group to [0, 1]. Returns 0 if range < 1e-10. + +### `group_normalize(x, group, target=1)` + +Scale within group so `sum(abs(x)) = target`. Returns 0 if sum < 1e-10. + +### `group_mean(x, group, weight=None)` + +Group mean. With weight: weighted mean, falls back to unweighted if `sum(weight) < 1e-10`. + +### `group_median(x, group)` + +Group median, broadcast to members. + +### `group_backfill(x, group, std=4)` + +Fill nulls with winsorized group mean (ddof=0, clipped at `std` sigmas). + +--- + +## Math + +### `log(x, base=None)` + +Natural log (default) or specified base. Null for non-positive inputs. + +### `sqrt(x)` + +Square root. Null for negative inputs. For sign-preserving: `signed_power(x, 0.5)`. + +### `sign(x)` + +-1, 0, or +1. Null returns null. + +### `power(x, exp)` + +`x ^ exp`. Scalar or Factor exponent. + +### `signed_power(x, exp)` + +`sign(x) * abs(x) ^ exp`. Preserves sign. + +### `inverse(x)` + +`1 / x`. Returns null where `abs(x) < 1e-10`. + +### `s_log_1p(x)` + +`sign(x) * log(1 + abs(x))`. Compresses large values, preserves sign and order. + +### `maximum(x, y)` + +Element-wise max. Scalar or Factor. + +### `minimum(x, y)` + +Element-wise min. Scalar or Factor. + +### `where(cond, x, y)` + +`x` where `cond` is truthy, `y` otherwise. Null in `cond` is falsy. + +--- + +## Experimental (not exported) + +In `ops/_dev.py`. Use Python callbacks (`rolling_map`). Not in `__all__`. + +### `ts_arg_max(x, window)` [DEV] + +Periods since max in window. + +### `ts_arg_min(x, window)` [DEV] + +Periods since min in window. + +### `hump(x, hump=0.01)` [DEV] + +Limit turnover by capping per-period changes. diff --git a/README.md b/README.md index 7d4a05a..da5965f 100644 --- a/README.md +++ b/README.md @@ -2,25 +2,33 @@ Elvers +[![PyPI](https://img.shields.io/pypi/v/elvers.svg)](https://pypi.org/project/elvers/) +[![CI](https://github.com/quantbai/elvers/actions/workflows/ci.yml/badge.svg)](https://github.com/quantbai/elvers/actions/workflows/ci.yml) [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/) [![Polars](https://img.shields.io/badge/Polars-1.37+-CD853F.svg)](https://pola.rs/) -**ELVERS** is a high-performance, strictly typed multi-factor alpha research engine powered by [Polars](https://pola.rs/). +Polars-native factor computation engine for quantitative research. All operators execute as Rust-backed Polars expressions with no Python loops in the hot path. +## Core Abstractions -## Design Philosophy +- **`Panel`** -- Balanced `(timestamp, symbol)` container with strict alignment guarantees. Prevents look-ahead bias by construction. +- **`Factor`** -- Immutable signal vector. Every operator takes `Factor` and returns `Factor` with eager evaluation. -Quantitative research requires rapid iteration over large universe panels without compromising execution speed. Legacy pandas-based pipelines are interpretable but inherently scale poorly. elvers addresses this through a robust two-layer abstraction: - -- **`Panel`** — A continuous, balanced panel container enforcing strict `(timestamp, symbol)` alignment. It mitigates look-ahead bias and standardizes index integrity across all transformations. -- **`Factor`** — A fully evaluated, eagerly executed vector of signal exposures. Bound directly to the global panel, factors resolve native Polars expressions instantaneously utilizing highly-parallelized core routines underneath Rust and C. - -The architecture guarantees that complex computational graphs—from primitive time-series aggregations to complex cross-sectional neutralizations—are resolved at theoretical hardware peaks with virtually zero Python interpreter overhead in the hot path. +## Numerical Conventions +| Convention | Summary | +| --- | --- | +| Null semantics | NaN/Inf unified to null on construction. Single missing-value type throughout. | +| Division by zero | All divisions guarded at `abs(denom) < 1e-10`, returning null. | +| Rank range | (0, 1] -- does not pass through zero. Ties use `average` method. | +| Std/Variance/Covariance | ddof=0 (population). Correlation is ddof-invariant. | +| Rolling warmup | All `ts_*` operators: first `window-1` values are null. | +Full conventions and per-operator specifications: +**[OPERATORS.md](OPERATORS.md)** ## Installation @@ -28,62 +36,56 @@ The architecture guarantees that complex computational graphs—from primitive t pip install elvers ``` +## Usage +```python +from elvers import load, ts_rank, ts_regression, zscore, signal -## Quick Start -Compose factors exactly as intuitively as they are expressed mathematically: +panel = load() # built-in sample data (crypto 1d OHLCV) +close, volume = panel["close"], panel["volume"] + +momentum = ts_rank(close, 20) +vol_adj = zscore(momentum) / zscore(ts_rank(volume, 20)) +beta_resid = ts_regression(close, volume, window=60, rettype=0) +alpha = signal(vol_adj) +``` + +Sub-daily data is supported via the `interval` parameter: ```python -from elvers import load, ts_rank, zscore +panel = load("hourly.parquet", interval="1h") +``` -# Load your own data -# panel = load("your_ohlcv.csv") +## Operators -# Load built-in sample dataset -panel = load() +72 operators. All accept and return `Factor`. -close = panel["close"] -volume = panel["volume"] +**Time-Series** -- rolling window per symbol: -# Define and execute expressions instantly -momentum = ts_rank(close, 20) -alpha = zscore(momentum) +`ts_delay` `ts_delta` `ts_mean` `ts_sum` `ts_std_dev` `ts_min` `ts_max` `ts_median` `ts_rank` `ts_skewness` `ts_kurtosis` `ts_zscore` `ts_corr` `ts_covariance` `ts_product` `ts_step` `ts_decay_linear` `ts_decay_exp_window` `days_from_last_change` `ts_av_diff` `ts_scale` `ts_percentile` `ts_quantile` `ts_cv` `ts_autocorr` `ts_count_nans` `ts_backfill` `kth_element` `last_diff_value` `inst_tvr` `ts_delta_limit` `ts_regression` `trade_when` -# Extract native Polars DataFrame -result = alpha.df -print(result) -``` +**Cross-Sectional** -- across symbols at each timestamp: -Both `Panel` and `Factor` expose a `.df` property that returns the underlying `pl.DataFrame`: - -- **`panel.df`** — Full panel frame with all OHLCV columns intact. -- **`factor.df`** — Flat `(T_days * N_symbols, 3)` frame aligned to the original spatial coordinates: - -```text -shape: (T_days * N_symbols, 3) -┌────────────┬────────┬───────────┐ -│ timestamp ┆ symbol ┆ factor │ -│ --- ┆ --- ┆ --- │ -│ date ┆ str ┆ f64 │ -╞════════════╪════════╪═══════════╡ -│ 2024-01-01 ┆ BTC ┆ null │ -│ ... ┆ ... ┆ ... │ -│ 2024-12-31 ┆ ETH ┆ 1.243 │ -└────────────┴────────┴───────────┘ -``` +`rank` `zscore` `mean` `median` `scale` `normalize` `quantile` `signal` `winsorize` `truncate` `left_tail` `right_tail` + +**Neutralization and Group** -- sector/industry neutralization: -Rows are ordered by `timestamp` (ascending), then `symbol` (ascending). +`vector_neut` `regression_neut` `group_neutralize` `group_rank` `group_zscore` `group_scale` `group_normalize` `group_mean` `group_median` `group_backfill` -> **Note**: Rolling window operators naturally yield `null` for the initial `window - 1` periods per symbol. The full dense panel shape is preserved throughout all operations. +**Math**: +`log` `sqrt` `sign` `power` `signed_power` `inverse` `s_log_1p` `maximum` `minimum` `where` -## Operator Library +**Arithmetic**: -| Category | Supported Operators | -| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| **Time-Series** | `ts_delay`, `ts_delta`, `ts_mean`, `ts_sum`, `ts_std_dev`, `ts_min`, `ts_max`, `ts_median`, `ts_rank`, `ts_skewness`, `ts_kurtosis`, `ts_zscore`, `ts_corr`, `ts_covariance`, `ts_product`, `ts_decay_linear`, `ts_av_diff`, `ts_scale`, `ts_quantile`, `ts_cv`, `ts_autocorr`, `ts_count_nans`, `ts_backfill` | -| **Cross-Sectional** | `rank`, `zscore`, `mean`, `median`, `scale`, `normalize`, `signal` | -| **Neutralization** | `vector_neut`, `regression_neut`, `group_neutralize`, `group_rank`, `group_zscore`, `group_scale`, `group_normalize` | -| **Math** | `log`, `ln`, `sqrt`, `sign`, `power`, `signed_power`, `inverse`, `s_log_1p`, `maximum`, `minimum`, `where`, standard operators (`+`, `-`, `*`, `/`, `**`, `abs`) | +`add` `subtract` `multiply` `divide` `reverse` `densify` `bucket` and standard operators (`+` `-` `*` `/` `**` `abs`) +## Development + +```bash +pip install -e ".[dev]" +pytest tests/ -v +ruff check elvers/ +``` +See [CLAUDE.md](CLAUDE.md) for full development standards. diff --git a/elvers/__init__.py b/elvers/__init__.py index 3c41065..b2eac87 100644 --- a/elvers/__init__.py +++ b/elvers/__init__.py @@ -1,13 +1,10 @@ """ -Elvers - Polars-native factor expression engine. - -High-performance, strictly-typed multi-factor alpha research -built on Polars for lightning-fast factor computation. +Elvers - Polars-native factor computation engine. Author: quantbai """ -__version__ = "0.2.0" +__version__ = "0.3.0" __author__ = "quantbai" from .core import Factor diff --git a/elvers/ops/timeseries.py b/elvers/ops/timeseries.py index 5655c8d..9822ae9 100644 --- a/elvers/ops/timeseries.py +++ b/elvers/ops/timeseries.py @@ -98,13 +98,13 @@ def ts_corr(a: Factor, b: Factor, window: int) -> Factor: def ts_covariance(a: Factor, b: Factor, window: int) -> Factor: - """Rolling sample covariance between two factors over N periods (ddof=1).""" + """Rolling population covariance between two factors over N periods (ddof=0).""" merged = a.df.rename({"factor": "_a"}).join( b.df.select(["timestamp", "symbol", pl.col("factor").alias("_b")]), on=["timestamp", "symbol"], how="inner" ).sort(["symbol", "timestamp"]) result = merged.with_columns( - pl.rolling_cov(pl.col("_a"), pl.col("_b"), window_size=window, min_samples=window, ddof=1) + pl.rolling_cov(pl.col("_a"), pl.col("_b"), window_size=window, min_samples=window, ddof=0) .over("symbol").alias("factor") ).select(["timestamp", "symbol", "factor"]) return Factor(result, f"ts_covariance({a.name},{b.name},{window})") diff --git a/tests/test_timeseries.py b/tests/test_timeseries.py index d9bdcd1..045550d 100644 --- a/tests/test_timeseries.py +++ b/tests/test_timeseries.py @@ -106,8 +106,8 @@ class TestTsCovariance: def test_population_cov(self): a = make_ts([2.0, 4.0, 6.0, 8.0, 10.0]) b = make_ts([1.0, 3.0, 5.0, 7.0, 9.0]) - # ddof=1 (sample covariance): sum((xi-mx)(yi-my))/(n-1) = 40/4 = 10.0 - assert _last(ts_covariance(a, b, 5))[0] == pytest.approx(10.0, rel=1e-6) + # ddof=0 (population covariance): sum((xi-mx)(yi-my))/n = 40/5 = 8.0 + assert _last(ts_covariance(a, b, 5))[0] == pytest.approx(8.0, rel=1e-6) class TestTsProduct: