quantbai · quantbai · Mar 24, 2026 · Mar 24, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -100,7 +100,9 @@ Full numerical conventions and per-operator specifications are in
 - All divisions MUST have explicit zero guards:
   `pl.when(denom.abs() < 1e-10).then(None).otherwise(num / denom)`
 - NEVER rely on the Factor constructor's implicit Inf-to-null conversion as normal logic flow
-- Statistical convention: ddof=0 (population) for std/variance, ddof=1 (sample) for corr/cov
+- Statistical convention: ddof=0 (population) for all std/variance/covariance.
+  ts_corr/ts_autocorr use ddof=1 in Polars rolling_corr due to a Polars constraint,
+  but correlation output is ddof-invariant. See OPERATORS.md for details.
 - Null semantics: null propagates naturally through Polars expressions. Boundary cases
   (zero denominator, constant window, insufficient data) must be handled explicitly
 

diff --git a/OPERATORS.md b/OPERATORS.md
@@ -53,13 +53,24 @@ not a substitute for explicit zero guards.
 
 ### Standard Deviation: ddof Convention
 
-| Context | ddof | Rationale |
-| --- | --- | --- |
-| All `std` / `variance` / `zscore` / `normalize` / `winsorize` | 0 (population) | Cross-sectional and rolling-window contexts operate on the full observed population, not a sample drawn from a larger one. |
-| `ts_corr` / `ts_covariance` / `ts_autocorr` | 1 (sample) | Maintains the identity `corr(x,y) = cov(x,y) / (std(x) * std(y))` when std uses ddof=0 internally, because Pearson correlation requires unbiased covariance estimation. |
+All variance, standard deviation, and covariance computations use **ddof=0
+(population)** throughout the library. Rolling windows and cross-sections
+operate on the full observed data, not a sample drawn from a larger population.
+
+This applies to: `ts_std_dev`, `ts_covariance`, `ts_zscore`, `ts_cv`,
+`zscore`, `normalize`, `winsorize`, `group_zscore`, `group_backfill`.
 
-This split is consistent across the entire library. Every function that computes
-variance or standard deviation documents which ddof it uses.
+`ts_corr` and `ts_autocorr` delegate to Polars `rolling_corr`, which
+requires `ddof=1` internally due to an implementation constraint in Polars
+(ddof=0 produces incorrect correlation values). Because ddof cancels in
+the correlation ratio `cov / (std_x * std_y)`, the output is identical
+regardless of ddof. The identity holds:
+
+```
+ts_covariance(x, y, w) / (ts_std_dev(x, w) * ts_std_dev(y, w)) == ts_corr(x, y, w)
+```
+
+This is verified against numpy to machine precision (`diff < 1e-15`).
 
 ### Rank Conventions
 
@@ -262,9 +273,9 @@ Rolling Pearson correlation between two Factors.
 
 ### `ts_covariance(x, y, window)`
 
-Rolling sample covariance between two Factors.
+Rolling population covariance between two Factors.
 
-- ddof=1
+- ddof=0
 - Warmup: `window - 1` null values
 
 ### `ts_product(x, window)`

diff --git a/elvers/ops/timeseries.py b/elvers/ops/timeseries.py
@@ -98,13 +98,13 @@ def ts_corr(a: Factor, b: Factor, window: int) -> Factor:
 
 
 def ts_covariance(a: Factor, b: Factor, window: int) -> Factor:
-    """Rolling sample covariance between two factors over N periods (ddof=1)."""
+    """Rolling population covariance between two factors over N periods (ddof=0)."""
     merged = a.df.rename({"factor": "_a"}).join(
         b.df.select(["timestamp", "symbol", pl.col("factor").alias("_b")]),
         on=["timestamp", "symbol"], how="inner"
     ).sort(["symbol", "timestamp"])
     result = merged.with_columns(
-        pl.rolling_cov(pl.col("_a"), pl.col("_b"), window_size=window, min_samples=window, ddof=1)
+        pl.rolling_cov(pl.col("_a"), pl.col("_b"), window_size=window, min_samples=window, ddof=0)
         .over("symbol").alias("factor")
     ).select(["timestamp", "symbol", "factor"])
     return Factor(result, f"ts_covariance({a.name},{b.name},{window})")

diff --git a/tests/test_timeseries.py b/tests/test_timeseries.py
@@ -106,8 +106,8 @@ class TestTsCovariance:
     def test_population_cov(self):
         a = make_ts([2.0, 4.0, 6.0, 8.0, 10.0])
         b = make_ts([1.0, 3.0, 5.0, 7.0, 9.0])
-        # ddof=1 (sample covariance): sum((xi-mx)(yi-my))/(n-1) = 40/4 = 10.0
-        assert _last(ts_covariance(a, b, 5))[0] == pytest.approx(10.0, rel=1e-6)
+        # ddof=0 (population covariance): sum((xi-mx)(yi-my))/n = 40/5 = 8.0
+        assert _last(ts_covariance(a, b, 5))[0] == pytest.approx(8.0, rel=1e-6)
 
 
 class TestTsProduct: