flyingrobots · flyingrobots · Mar 29, 2026 · Mar 28, 2026 · Mar 28, 2026 · Mar 28, 2026
@@ -45,6 +45,8 @@ temp
 # Local caches
 .cache
 artifacts/pr-review/
+__pycache__/
+*.pyc
 
 # Playwright artifacts
 test-results

@@ -77,9 +77,9 @@ docs-ci:
 	@echo "[docs] CI build (no npm install)"
 	@npm run --silent docs:build
 # Benchmarks and reports
-.PHONY: bench-report vendor-d3 bench-serve bench-open
+.PHONY: bench-report bench-vendor vendor-d3 bench-serve bench-open
 
-vendor-d3:
+bench-vendor:
 	@mkdir -p docs/benchmarks/vendor
 	@if [ ! -f docs/benchmarks/vendor/d3.v7.min.js ]; then \
 	  echo "Downloading D3 v7 to docs/benchmarks/vendor..."; \
@@ -88,6 +88,22 @@ vendor-d3:
 	else \
 	  echo "D3 already present (docs/benchmarks/vendor/d3.v7.min.js)"; \
 	fi
+	@if [ ! -f docs/benchmarks/vendor/open-props.min.css ]; then \
+	  echo "Downloading Open Props to docs/benchmarks/vendor..."; \
+	  curl -fsSL https://unpkg.com/open-props@1.7.16/open-props.min.css -o docs/benchmarks/vendor/open-props.min.css; \
+	  echo "Open Props saved to docs/benchmarks/vendor/open-props.min.css"; \
+	else \
+	  echo "Open Props already present (docs/benchmarks/vendor/open-props.min.css)"; \
+	fi
+	@if [ ! -f docs/benchmarks/vendor/normalize.dark.min.css ]; then \
+	  echo "Downloading Open Props normalize.dark to docs/benchmarks/vendor..."; \
+	  curl -fsSL https://unpkg.com/open-props@1.7.16/normalize.dark.min.css -o docs/benchmarks/vendor/normalize.dark.min.css; \
+	  echo "Open Props normalize.dark saved to docs/benchmarks/vendor/normalize.dark.min.css"; \
+	else \
+	  echo "Open Props normalize.dark already present (docs/benchmarks/vendor/normalize.dark.min.css)"; \
+	fi
+
+vendor-d3: bench-vendor
 
 bench-serve:
 	@echo "Serving repo at http://localhost:$(BENCH_PORT) (Ctrl+C to stop)"
@@ -96,6 +112,9 @@ bench-serve:
 OPEN := $(shell if command -v open >/dev/null 2>&1; then echo open; \
 	elif command -v xdg-open >/dev/null 2>&1; then echo xdg-open; \
 	elif command -v powershell.exe >/dev/null 2>&1; then echo powershell.exe; fi)
+BENCH_INLINE_REPORT_PATH := $(abspath docs/benchmarks/report-inline.html)
+BENCH_INLINE_REPORT_URI := file://$(BENCH_INLINE_REPORT_PATH)
+BENCH_POLICY_REPORT_URI := $(BENCH_INLINE_REPORT_URI)\#parallel-policy
 
 bench-open:
 	@if [ -n "$(OPEN)" ]; then \
@@ -104,7 +123,7 @@ bench-open:
 	  echo "Open URL: http://localhost:$(BENCH_PORT)/docs/benchmarks/" ; \
 	fi
 
-bench-report: vendor-d3
+bench-report: bench-vendor
 	@echo "Running benches (warp-benches)..."
 	cargo bench -p warp-benches
 	@echo "Starting local server on :$(BENCH_PORT) and opening dashboard..."
@@ -147,19 +166,60 @@ bench-stop:
 	  echo "[bench] No PID file at target/bench_http.pid"; \
 	fi
 
-.PHONY: bench-bake bench-open-inline
+.PHONY: bench-bake bench-open-inline bench-policy-bake bench-policy-export bench-policy-open-inline
 
-# Bake a standalone HTML with inline data that works over file://
-bench-bake: vendor-d3
+# Bake an offline-friendly HTML report with inline data and local vendored assets.
+bench-bake: bench-vendor
 	@echo "Running benches (warp-benches)..."
 	cargo bench -p warp-benches
 	@echo "Baking inline report..."
-	@python3 scripts/bench_bake.py --out docs/benchmarks/report-inline.html
+	@cargo xtask bench bake \
+	  --out docs/benchmarks/report-inline.html \
+	  --policy-json-out docs/benchmarks/parallel-policy-matrix.json
+	@cargo xtask bench check-artifacts \
+	  --html docs/benchmarks/report-inline.html \
+	  --json docs/benchmarks/parallel-policy-matrix.json
 	@echo "Opening inline report..."
-	@open docs/benchmarks/report-inline.html
+	@if [ -n "$(OPEN)" ]; then \
+	  $(OPEN) docs/benchmarks/report-inline.html >/dev/null 2>&1 || echo "Open file: docs/benchmarks/report-inline.html" ; \
+	else \
+	  echo "Open file: docs/benchmarks/report-inline.html" ; \
+	fi
 
 bench-open-inline:
-	@open docs/benchmarks/report-inline.html
+	@if [ -n "$(OPEN)" ]; then \
+	  $(OPEN) docs/benchmarks/report-inline.html >/dev/null 2>&1 || echo "Open file: docs/benchmarks/report-inline.html" ; \
+	else \
+	  echo "Open file: docs/benchmarks/report-inline.html" ; \
+	fi
+
+bench-policy-export: bench-vendor
+	@echo "Exporting parallel policy matrix JSON..."
+	@cargo xtask bench policy-export \
+	  --json-out docs/benchmarks/parallel-policy-matrix.json
+	@echo "Baking unified inline report..."
+	@cargo xtask bench bake --out docs/benchmarks/report-inline.html
+	@cargo xtask bench check-artifacts \
+	  --html docs/benchmarks/report-inline.html \
+	  --json docs/benchmarks/parallel-policy-matrix.json
+	@pnpm exec prettier --write docs/benchmarks/report-inline.html >/dev/null
+
+bench-policy-bake: bench-vendor
+	@echo "Running parallel policy matrix benchmarks..."
+	cargo bench -p warp-benches --bench parallel_baseline -- parallel_policy_matrix
+	@$(MAKE) bench-policy-export
+	@if [ -n "$(OPEN)" ]; then \
+	  $(OPEN) "$(BENCH_POLICY_REPORT_URI)" >/dev/null 2>&1 || echo "Open URL: $(BENCH_POLICY_REPORT_URI)" ; \
+	else \
+	  echo "Open URL: $(BENCH_POLICY_REPORT_URI)" ; \
+	fi
+
+bench-policy-open-inline:
+	@if [ -n "$(OPEN)" ]; then \
+	  $(OPEN) "$(BENCH_POLICY_REPORT_URI)" >/dev/null 2>&1 || echo "Open URL: $(BENCH_POLICY_REPORT_URI)" ; \
+	else \
+	  echo "Open URL: $(BENCH_POLICY_REPORT_URI)" ; \
+	fi
 
 # Spec-000 (WASM) helpers
 .PHONY: spec-000-dev spec-000-build

@@ -24,6 +24,21 @@ results. This README summarizes how to run them and read the output.
     - Throughput “elements” = rule applications (`n`). Uses `BatchSize::PerIteration`
       so engine construction is excluded from timing.
 
+- `parallel_baseline.rs`
+    - Compares serial execution, the current shard-parallel baseline, the Phase 6B
+      work-queue pipeline, worker-count scaling, and the shard-policy matrix.
+    - The policy matrix compares:
+        - dynamic shard claiming + per-worker deltas
+        - dynamic shard claiming + per-shard deltas
+        - static round-robin shard assignment + per-worker deltas
+        - static round-robin shard assignment + per-shard deltas
+        - dedicated one-worker-per-shard + one-delta-per-shard
+    - Each case includes canonical delta merge after parallel execution, so the
+      study reflects full policy cost for the synthetic independent workload.
+    - The policy matrix runs across loads `100`, `1000`, and `10000`, with worker
+      counts `1`, `4`, and `8` where the policy uses a worker pool.
+    - Throughput “elements” = executed items in the synthetic independent workload.
+
 ## Run
 
 Run the full benches suite:
@@ -37,15 +52,19 @@ Run a single bench target (faster dev loop):
 ```sh
 cargo bench -p warp-benches --bench snapshot_hash
 cargo bench -p warp-benches --bench scheduler_drain
+cargo bench -p warp-benches --bench parallel_baseline
 ```
 
 Criterion HTML reports are written under `target/criterion/<group>/report/index.html`.
 
 ### Charts & Reports
 
 - Live server + dashboard: `make bench-report` opens `http://localhost:8000/docs/benchmarks/`.
-- Offline static report (no server): `make bench-bake` writes `docs/benchmarks/report-inline.html` with results injected.
+- Offline static report (no server): `make bench-bake` writes `docs/benchmarks/report-inline.html` with results, policy payload, and provenance injected.
     - Open the file directly (Finder or `open docs/benchmarks/report-inline.html`).
+- The same static page also hosts the parallel shard-policy study.
+    - Run `make bench-policy-bake`, then open the `Parallel policy matrix` tab.
+    - `make bench-policy-export` rebakes from the existing local Criterion tree without rerunning benches.
 
 ## Interpreting Results
 

@@ -20,14 +20,16 @@
 //! - `serial_vs_parallel_N`: Compare parallel sharded execution vs serial baseline
 //! - `work_queue_pipeline_N`: Full Phase 6B pipeline (build_work_units → execute_work_queue)
 //! - `worker_scaling_100`: How throughput scales with worker count (1, 2, 4, 8, 16)
+//! - `parallel_policy_matrix`: Compare shard assignment and delta accumulation policies across loads
 use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput};
 use std::collections::BTreeMap;
+use std::num::NonZeroUsize;
 use std::time::Duration;
 use warp_core::parallel::{build_work_units, execute_work_queue, WorkerResult};
 use warp_core::{
-    execute_parallel, execute_serial, make_node_id, make_type_id, make_warp_id, AtomPayload,
-    AttachmentKey, AttachmentValue, ExecItem, GraphStore, GraphView, NodeId, NodeKey, NodeRecord,
-    OpOrigin, TickDelta, WarpId, WarpOp,
+    execute_parallel, execute_parallel_with_policy, execute_serial, make_node_id, make_type_id,
+    make_warp_id, AtomPayload, AttachmentKey, AttachmentValue, ExecItem, GraphStore, GraphView,
+    NodeId, NodeKey, NodeRecord, OpOrigin, ParallelExecutionPolicy, TickDelta, WarpId, WarpOp,
 };
 
 /// Simple executor that sets an attachment on the scope node.
@@ -46,6 +48,20 @@ fn touch_executor(view: GraphView<'_>, scope: &NodeId, delta: &mut TickDelta) {
     });
 }
 
+/// Mirrors the production commit-path merge shape used without `delta_validate`.
+fn merge_for_commit_path(deltas: Vec<TickDelta>) -> Vec<WarpOp> {
+    let mut flat: Vec<_> = deltas
+        .into_iter()
+        .flat_map(TickDelta::into_ops_unsorted)
+        .map(|op| (op.sort_key(), op))
+        .collect();
+
+    flat.sort_unstable_by(|a, b| a.0.cmp(&b.0));
+
+    flat.dedup_by(|a, b| a.0 == b.0);
+    flat.into_iter().map(|(_, op)| op).collect()
+}
+
 /// Create a test graph with N independent nodes.
 fn make_test_store(n: usize) -> (GraphStore, Vec<NodeId>) {
     let node_ty = make_type_id("bench/node");
@@ -297,10 +313,107 @@ fn bench_worker_scaling(c: &mut Criterion) {
     group.finish();
 }
 
+// =============================================================================
+// Policy matrix comparison
+// =============================================================================
+
+fn policy_label(policy: ParallelExecutionPolicy) -> &'static str {
+    match policy {
+        ParallelExecutionPolicy::DYNAMIC_PER_WORKER => "dynamic_per_worker",
+        ParallelExecutionPolicy::DYNAMIC_PER_SHARD => "dynamic_per_shard",
+        ParallelExecutionPolicy::STATIC_PER_WORKER => "static_per_worker",
+        ParallelExecutionPolicy::STATIC_PER_SHARD => "static_per_shard",
+        ParallelExecutionPolicy::DEDICATED_PER_SHARD => "dedicated_per_shard",
+        _ => panic!("unmapped ParallelExecutionPolicy in parallel_policy_matrix"),
+    }
+}
+
+fn worker_hint(workers: usize) -> NonZeroUsize {
+    NonZeroUsize::new(workers.max(1)).map_or(NonZeroUsize::MIN, |w| w)
+}
+
+/// Compares shard assignment and delta accumulation strategies directly.
+///
+/// This includes canonical delta merge after parallel execution so the
+/// `PerWorker` vs `PerShard` axis reflects the full policy cost visible to the
+/// engine, not just executor-stage delta production.
+fn bench_policy_matrix(c: &mut Criterion) {
+    let mut group = c.benchmark_group("parallel_policy_matrix");
+    group
+        .warm_up_time(Duration::from_secs(2))
+        .measurement_time(Duration::from_secs(5))
+        .sample_size(40);
+
+    let policies = [
+        ParallelExecutionPolicy::DYNAMIC_PER_WORKER,
+        ParallelExecutionPolicy::DYNAMIC_PER_SHARD,
+        ParallelExecutionPolicy::STATIC_PER_WORKER,
+        ParallelExecutionPolicy::STATIC_PER_SHARD,
+        ParallelExecutionPolicy::DEDICATED_PER_SHARD,
+    ];
+
+    for &n in &[100usize, 1_000, 10_000] {
+        group.throughput(Throughput::Elements(n as u64));
+        for policy in policies {
+            if policy == ParallelExecutionPolicy::DEDICATED_PER_SHARD {
+                group.bench_with_input(BenchmarkId::new(policy_label(policy), n), &n, |b, &n| {
+                    b.iter_batched(
+                        || {
+                            let (store, nodes) = make_test_store(n);
+                            let items = make_exec_items(&nodes);
+                            (store, items)
+                        },
+                        |(store, items)| {
+                            let view = GraphView::new(&store);
+                            let deltas =
+                                execute_parallel_with_policy(view, &items, worker_hint(1), policy);
+                            let merged = merge_for_commit_path(deltas);
+                            criterion::black_box(merged)
+                        },
+                        BatchSize::SmallInput,
+                    );
+                });
+                continue;
+            }
+
+            for &workers in &[1usize, 4, 8] {
+                group.bench_with_input(
+                    BenchmarkId::new(format!("{}/{}w", policy_label(policy), workers), n),
+                    &n,
+                    |b, &n| {
+                        b.iter_batched(
+                            || {
+                                let (store, nodes) = make_test_store(n);
+                                let items = make_exec_items(&nodes);
+                                (store, items)
+                            },
+                            |(store, items)| {
+                                let view = GraphView::new(&store);
+                                let deltas = execute_parallel_with_policy(
+                                    view,
+                                    &items,
+                                    worker_hint(workers),
+                                    policy,
+                                );
+                                let merged = merge_for_commit_path(deltas);
+                                criterion::black_box(merged)
+                            },
+                            BatchSize::SmallInput,
+                        );
+                    },
+                );
+            }
+        }
+    }
+
+    group.finish();
+}
+
 criterion_group!(
     benches,
     bench_serial_vs_parallel,
     bench_work_queue,
-    bench_worker_scaling
+    bench_worker_scaling,
+    bench_policy_matrix
 );
 criterion_main!(benches);
@@ -167,8 +167,9 @@ pub use ident::{
     TypeId, WarpId,
 };
 pub use parallel::{
-    execute_parallel, execute_parallel_sharded, execute_serial, shard_of, ExecItem, MergeConflict,
-    PoisonedDelta, NUM_SHARDS,
+    execute_parallel, execute_parallel_sharded, execute_parallel_sharded_with_policy,
+    execute_parallel_with_policy, execute_serial, shard_of, DeltaAccumulationPolicy, ExecItem,
+    MergeConflict, ParallelExecutionPolicy, PoisonedDelta, ShardAssignmentPolicy, NUM_SHARDS,
 };
 /// Delta merging functions, only available with `delta_validate` feature.
 ///