From 143b78cd160a8e983d7a43abd7830b384aa040b8 Mon Sep 17 00:00:00 2001
From: rich7420 <rc910420@gmail.com>
Date: Thu, 12 Mar 2026 16:19:33 +0800
Subject: [PATCH 1/2] feat: add SVHN Quantum Kernel SVM benchmark

---
 .../svhn_kernel_amplitude.py                  | 254 ++++++++++++++++
 .../qdp_pipeline/svhn_kernel_amplitude.py     | 276 ++++++++++++++++++
 uv.lock                                       |   4 -
 3 files changed, 530 insertions(+), 4 deletions(-)
 create mode 100644 qdp/qdp-python/benchmark/encoding_benchmarks/pennylane_baseline/svhn_kernel_amplitude.py
 create mode 100644 qdp/qdp-python/benchmark/encoding_benchmarks/qdp_pipeline/svhn_kernel_amplitude.py

diff --git a/qdp/qdp-python/benchmark/encoding_benchmarks/pennylane_baseline/svhn_kernel_amplitude.py b/qdp/qdp-python/benchmark/encoding_benchmarks/pennylane_baseline/svhn_kernel_amplitude.py
new file mode 100644
index 000000000..7b331ce89
--- /dev/null
+++ b/qdp/qdp-python/benchmark/encoding_benchmarks/pennylane_baseline/svhn_kernel_amplitude.py
@@ -0,0 +1,254 @@
+#!/usr/bin/env python3
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Quantum Kernel SVM — PennyLane baseline (CPU encoding) — SVHN dataset.
+
+Pipeline:
+  SVHN (32×32×3) → Flatten (3072) → L2-norm + zero-pad (4096, 12 qubits)
+    → Quantum Kernel K[i,j] = (encoded[i] · encoded[j])² → sklearn SVM
+
+Encoding: CPU NumPy (L2-normalise + zero-pad to 2^12 = 4096).
+Kernel:   Precomputed squared inner product of amplitude-encoded state vectors.
+Classifier: sklearn.svm.SVC(kernel='precomputed').
+
+Each pipeline step is timed separately to show the encoding fraction.
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import time
+import urllib.request
+
+import numpy as np
+
+try:
+    from sklearn.preprocessing import StandardScaler
+    from sklearn.svm import SVC
+except ImportError as e:
+    raise SystemExit(
+        "scikit-learn is required. Install with: uv sync --group benchmark"
+    ) from e
+
+try:
+    from scipy.io import loadmat
+except ImportError as e:
+    raise SystemExit("scipy is required. Install with: pip install scipy") from e
+
+
+# ---------------------------------------------------------------------------
+# SVHN data loading
+# ---------------------------------------------------------------------------
+
+SVHN_URLS = {
+    "train": "http://ufldl.stanford.edu/housenumbers/train_32x32.mat",
+    "test": "http://ufldl.stanford.edu/housenumbers/test_32x32.mat",
+}
+
+
+def _download_if_needed(url: str, dest: str) -> str:
+    if not os.path.exists(dest):
+        os.makedirs(os.path.dirname(dest), exist_ok=True)
+        print(f"    Downloading {url} ...")
+        urllib.request.urlretrieve(url, dest)
+        print(f"    Saved to {dest}")
+    return dest
+
+
+def load_svhn(
+    data_home: str | None = None,
+) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    """Load SVHN train/test: (n, 3072) float64 in [0,1], labels 0-9."""
+    if data_home is None:
+        data_home = os.path.join(os.path.expanduser("~"), "scikit_learn_data", "svhn")
+
+    train_path = _download_if_needed(
+        SVHN_URLS["train"], os.path.join(data_home, "train_32x32.mat")
+    )
+    test_path = _download_if_needed(
+        SVHN_URLS["test"], os.path.join(data_home, "test_32x32.mat")
+    )
+
+    train_mat = loadmat(train_path)
+    test_mat = loadmat(test_path)
+
+    X_train = (
+        train_mat["X"].transpose(3, 0, 1, 2).reshape(-1, 3072).astype(np.float64)
+        / 255.0
+    )
+    X_test = (
+        test_mat["X"].transpose(3, 0, 1, 2).reshape(-1, 3072).astype(np.float64) / 255.0
+    )
+    Y_train = train_mat["y"].ravel().astype(int) % 10
+    Y_test = test_mat["y"].ravel().astype(int) % 10
+
+    return X_train, X_test, Y_train, Y_test
+
+
+# ---------------------------------------------------------------------------
+# Encoding & kernel
+# ---------------------------------------------------------------------------
+
+NUM_QUBITS = 12
+STATE_DIM = 2**NUM_QUBITS  # 4096
+CLASS_POS = 1
+CLASS_NEG = 7
+
+
+def _filter_binary(X, Y):
+    mask = (Y == CLASS_POS) | (Y == CLASS_NEG)
+    return X[mask], np.where(Y[mask] == CLASS_POS, 1, -1)
+
+
+def encode_cpu(X: np.ndarray) -> np.ndarray:
+    """L2-normalise + zero-pad to 4096. Returns (n, 4096) float64."""
+    norms = np.linalg.norm(X, axis=1, keepdims=True)
+    norms[norms == 0] = 1.0
+    X_normed = X / norms
+    pad = STATE_DIM - X.shape[1]
+    if pad > 0:
+        X_normed = np.concatenate(
+            [X_normed, np.zeros((X_normed.shape[0], pad), dtype=X_normed.dtype)], axis=1
+        )
+    return X_normed
+
+
+def compute_kernel(X1: np.ndarray, X2: np.ndarray) -> np.ndarray:
+    """Quantum kernel: K[i,j] = |⟨ψ(x_j)|ψ(x_i)⟩|² = (X1 @ X2.T)²."""
+    return (X1 @ X2.T) ** 2
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Quantum Kernel SVM — PennyLane baseline (CPU) — SVHN (12 qubits)"
+    )
+    parser.add_argument(
+        "--n-samples",
+        type=int,
+        default=5000,
+        help="Total samples for CV (default: 5000)",
+    )
+    parser.add_argument("--folds", type=int, default=5, help="CV folds (default: 5)")
+    parser.add_argument(
+        "--seed", type=int, default=42, help="Random seed (default: 42)"
+    )
+    parser.add_argument(
+        "--svm-c",
+        type=float,
+        default=100.0,
+        help="SVM regularisation C (default: 100.0)",
+    )
+    parser.add_argument("--data-home", type=str, default=None, help="Data cache dir")
+    args = parser.parse_args()
+
+    print("Quantum Kernel SVM — CPU baseline — SVHN")
+    print(
+        f"  {NUM_QUBITS} qubits, {STATE_DIM}-dim state, binary: digit {CLASS_POS} vs {CLASS_NEG}"
+    )
+    print(f"  n_samples={args.n_samples}, {args.folds}-fold CV, C={args.svm_c}")
+    print()
+
+    # Load & filter
+    print("  Loading SVHN ...")
+    X_train_all, X_test_all, Y_train_all, Y_test_all = load_svhn(
+        data_home=args.data_home
+    )
+    X_all = np.concatenate([X_train_all, X_test_all], axis=0)
+    Y_all = np.concatenate([Y_train_all, Y_test_all], axis=0)
+    X_bin, Y_bin = _filter_binary(X_all, Y_all)
+    print(f"  Binary filtered: {len(Y_bin):,} samples (pos={np.mean(Y_bin == 1):.2f})")
+
+    rng = np.random.default_rng(args.seed)
+    if args.n_samples < len(Y_bin):
+        idx = rng.choice(len(Y_bin), size=args.n_samples, replace=False)
+        X_bin, Y_bin = X_bin[idx], Y_bin[idx]
+    print(f"  Subsampled: {len(Y_bin):,} samples")
+    print()
+
+    # Step 1: StandardScaler + Encode (all data, once)
+    t0 = time.perf_counter()
+    scaler = StandardScaler().fit(X_bin)
+    X_scaled = scaler.transform(X_bin)
+    X_encoded = encode_cpu(X_scaled)
+    encode_sec = time.perf_counter() - t0
+    print(
+        f"  Step 1: Scale+Encode ........  {encode_sec:.4f}s  (n={len(Y_bin)}, dim={STATE_DIM})"
+    )
+
+    # Step 2: Full kernel matrix
+    t0 = time.perf_counter()
+    K_full = compute_kernel(X_encoded, X_encoded)
+    kernel_sec = time.perf_counter() - t0
+    print(
+        f"  Step 2: Kernel      ........  {kernel_sec:.4f}s  ({K_full.shape[0]}×{K_full.shape[1]})"
+    )
+
+    # Step 3: k-fold cross-validation
+    from sklearn.model_selection import StratifiedKFold
+
+    skf = StratifiedKFold(n_splits=args.folds, shuffle=True, random_state=args.seed)
+
+    fold_accs = []
+    cv_fit_sec = 0.0
+    cv_pred_sec = 0.0
+
+    print(f"\n  Step 3: {args.folds}-fold Cross-Validation")
+    for fold, (train_idx, test_idx) in enumerate(skf.split(X_encoded, Y_bin), 1):
+        K_train = K_full[np.ix_(train_idx, train_idx)]
+        K_test = K_full[np.ix_(test_idx, train_idx)]
+
+        t0 = time.perf_counter()
+        svm = SVC(kernel="precomputed", C=args.svm_c)
+        svm.fit(K_train, Y_bin[train_idx])
+        cv_fit_sec += time.perf_counter() - t0
+
+        t0 = time.perf_counter()
+        acc = svm.score(K_test, Y_bin[test_idx])
+        cv_pred_sec += time.perf_counter() - t0
+
+        fold_accs.append(acc)
+        n_sv = svm.n_support_.sum()
+        print(
+            f"    Fold {fold}/{args.folds}: acc={acc:.4f}  "
+            f"(train={len(train_idx)}, test={len(test_idx)}, SVs={n_sv})"
+        )
+
+    mean_acc = np.mean(fold_accs)
+    std_acc = np.std(fold_accs)
+
+    total_sec = encode_sec + kernel_sec + cv_fit_sec + cv_pred_sec
+    encode_pct = encode_sec / total_sec * 100
+
+    print(f"\n  {'─' * 50}")
+    print(f"  Encode time:        ........  {encode_sec:.4f}s")
+    print(f"  Kernel time:        ........  {kernel_sec:.4f}s")
+    print(f"  CV fit time:        ........  {cv_fit_sec:.4f}s  ({args.folds} folds)")
+    print(f"  CV predict time:    ........  {cv_pred_sec:.4f}s")
+    print(f"  Total:              ........  {total_sec:.4f}s")
+    print(f"  Encoding fraction:  ........  {encode_pct:.1f}%")
+    print(f"  Accuracy:           ........  {mean_acc:.4f} ± {std_acc:.4f}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qdp/qdp-python/benchmark/encoding_benchmarks/qdp_pipeline/svhn_kernel_amplitude.py b/qdp/qdp-python/benchmark/encoding_benchmarks/qdp_pipeline/svhn_kernel_amplitude.py
new file mode 100644
index 000000000..a377e5daf
--- /dev/null
+++ b/qdp/qdp-python/benchmark/encoding_benchmarks/qdp_pipeline/svhn_kernel_amplitude.py
@@ -0,0 +1,276 @@
+#!/usr/bin/env python3
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Quantum Kernel SVM — QDP pipeline (GPU encoding) — SVHN dataset.
+
+Pipeline:
+  SVHN (32×32×3) → Flatten (3072) → QdpEngine.encode(amplitude) on GPU (4096, 12 qubits)
+    → Quantum Kernel K[i,j] = (encoded[i] · encoded[j])² → sklearn SVM
+
+Encoding: QdpEngine (GPU) — data stays on CUDA for kernel matmul, then moves to CPU for SVM.
+Kernel:   Precomputed squared inner product (GPU torch.mm).
+Classifier: sklearn.svm.SVC(kernel='precomputed').
+
+Each pipeline step is timed separately to show the encoding fraction.
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import time
+import urllib.request
+
+import numpy as np
+import torch
+
+try:
+    from sklearn.preprocessing import StandardScaler
+    from sklearn.svm import SVC
+except ImportError as e:
+    raise SystemExit(
+        "scikit-learn is required. Install with: uv sync --group benchmark"
+    ) from e
+
+try:
+    from scipy.io import loadmat
+except ImportError as e:
+    raise SystemExit("scipy is required. Install with: pip install scipy") from e
+
+try:
+    from qumat_qdp import QdpEngine
+except ImportError as e:
+    raise SystemExit(
+        "qumat_qdp is required. Install with: uv sync --group benchmark"
+    ) from e
+
+
+# ---------------------------------------------------------------------------
+# SVHN data loading
+# ---------------------------------------------------------------------------
+
+SVHN_URLS = {
+    "train": "http://ufldl.stanford.edu/housenumbers/train_32x32.mat",
+    "test": "http://ufldl.stanford.edu/housenumbers/test_32x32.mat",
+}
+
+
+def _download_if_needed(url: str, dest: str) -> str:
+    if not os.path.exists(dest):
+        os.makedirs(os.path.dirname(dest), exist_ok=True)
+        print(f"    Downloading {url} ...")
+        urllib.request.urlretrieve(url, dest)
+        print(f"    Saved to {dest}")
+    return dest
+
+
+def load_svhn(
+    data_home: str | None = None,
+) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    """Load SVHN train/test: (n, 3072) float64 in [0,1], labels 0-9."""
+    if data_home is None:
+        data_home = os.path.join(os.path.expanduser("~"), "scikit_learn_data", "svhn")
+
+    train_path = _download_if_needed(
+        SVHN_URLS["train"], os.path.join(data_home, "train_32x32.mat")
+    )
+    test_path = _download_if_needed(
+        SVHN_URLS["test"], os.path.join(data_home, "test_32x32.mat")
+    )
+
+    train_mat = loadmat(train_path)
+    test_mat = loadmat(test_path)
+
+    X_train = (
+        train_mat["X"].transpose(3, 0, 1, 2).reshape(-1, 3072).astype(np.float64)
+        / 255.0
+    )
+    X_test = (
+        test_mat["X"].transpose(3, 0, 1, 2).reshape(-1, 3072).astype(np.float64) / 255.0
+    )
+    Y_train = train_mat["y"].ravel().astype(int) % 10
+    Y_test = test_mat["y"].ravel().astype(int) % 10
+
+    return X_train, X_test, Y_train, Y_test
+
+
+# ---------------------------------------------------------------------------
+# Encoding & kernel
+# ---------------------------------------------------------------------------
+
+NUM_QUBITS = 12
+STATE_DIM = 2**NUM_QUBITS  # 4096
+CLASS_POS = 1
+CLASS_NEG = 7
+
+
+def _filter_binary(X, Y):
+    mask = (Y == CLASS_POS) | (Y == CLASS_NEG)
+    return X[mask], np.where(Y[mask] == CLASS_POS, 1, -1)
+
+
+def encode_qdp(X: np.ndarray, device_id: int = 0) -> torch.Tensor:
+    """QdpEngine amplitude encode → CUDA float64 tensor (n, 4096)."""
+    engine = QdpEngine(device_id=device_id, precision="float64")
+    qt = engine.encode(
+        X.astype(np.float64),
+        num_qubits=NUM_QUBITS,
+        encoding_method="amplitude",
+    )
+    encoded = torch.from_dlpack(qt)
+    if encoded.is_complex():
+        encoded = encoded.real
+    return encoded[: X.shape[0]]
+
+
+def compute_kernel_gpu(X1: torch.Tensor, X2: torch.Tensor) -> np.ndarray:
+    """Quantum kernel on GPU: K[i,j] = (X1 @ X2.T)². Returns CPU numpy."""
+    K = torch.mm(X1, X2.T)
+    K = K**2
+    return K.cpu().numpy()
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Quantum Kernel SVM — QDP pipeline (GPU) — SVHN (12 qubits)"
+    )
+    parser.add_argument(
+        "--n-samples",
+        type=int,
+        default=5000,
+        help="Total samples for CV (default: 5000)",
+    )
+    parser.add_argument("--folds", type=int, default=5, help="CV folds (default: 5)")
+    parser.add_argument(
+        "--seed", type=int, default=42, help="Random seed (default: 42)"
+    )
+    parser.add_argument(
+        "--svm-c",
+        type=float,
+        default=100.0,
+        help="SVM regularisation C (default: 100.0)",
+    )
+    parser.add_argument(
+        "--device-id", type=int, default=0, help="CUDA device (default: 0)"
+    )
+    parser.add_argument("--data-home", type=str, default=None, help="Data cache dir")
+    args = parser.parse_args()
+
+    print("Quantum Kernel SVM — QDP pipeline (GPU) — SVHN")
+    print(
+        f"  {NUM_QUBITS} qubits, {STATE_DIM}-dim state, binary: digit {CLASS_POS} vs {CLASS_NEG}"
+    )
+    print(f"  n_samples={args.n_samples}, {args.folds}-fold CV, C={args.svm_c}")
+    print(f"  CUDA: {torch.cuda.is_available()}, device_id: {args.device_id}")
+    print()
+
+    # Load & filter
+    print("  Loading SVHN ...")
+    X_train_all, X_test_all, Y_train_all, Y_test_all = load_svhn(
+        data_home=args.data_home
+    )
+    X_all = np.concatenate([X_train_all, X_test_all], axis=0)
+    Y_all = np.concatenate([Y_train_all, Y_test_all], axis=0)
+    X_bin, Y_bin = _filter_binary(X_all, Y_all)
+    print(f"  Binary filtered: {len(Y_bin):,} samples (pos={np.mean(Y_bin == 1):.2f})")
+
+    rng = np.random.default_rng(args.seed)
+    if args.n_samples < len(Y_bin):
+        idx = rng.choice(len(Y_bin), size=args.n_samples, replace=False)
+        X_bin, Y_bin = X_bin[idx], Y_bin[idx]
+    print(f"  Subsampled: {len(Y_bin):,} samples")
+    print()
+
+    # Step 1: StandardScaler + Encode (GPU)
+    torch.cuda.synchronize()
+    t0 = time.perf_counter()
+    scaler = StandardScaler().fit(X_bin)
+    X_scaled = scaler.transform(X_bin)
+    X_encoded = encode_qdp(X_scaled, args.device_id)
+    torch.cuda.synchronize()
+    encode_sec = time.perf_counter() - t0
+    print(
+        f"  Step 1: Scale+Encode ........  {encode_sec:.4f}s  (n={len(Y_bin)}, dim={STATE_DIM}, device={X_encoded.device})"
+    )
+
+    # Step 2: Full kernel matrix (GPU matmul → CPU)
+    torch.cuda.synchronize()
+    t0 = time.perf_counter()
+    K_full = compute_kernel_gpu(X_encoded, X_encoded)
+    kernel_sec = time.perf_counter() - t0
+    print(
+        f"  Step 2: Kernel      ........  {kernel_sec:.4f}s  ({K_full.shape[0]}×{K_full.shape[1]})"
+    )
+
+    # Free GPU memory
+    del X_encoded
+    torch.cuda.empty_cache()
+
+    # Step 3: k-fold cross-validation
+    from sklearn.model_selection import StratifiedKFold
+
+    skf = StratifiedKFold(n_splits=args.folds, shuffle=True, random_state=args.seed)
+
+    fold_accs = []
+    cv_fit_sec = 0.0
+    cv_pred_sec = 0.0
+
+    print(f"\n  Step 3: {args.folds}-fold Cross-Validation")
+    for fold, (train_idx, test_idx) in enumerate(skf.split(K_full, Y_bin), 1):
+        K_train = K_full[np.ix_(train_idx, train_idx)]
+        K_test = K_full[np.ix_(test_idx, train_idx)]
+
+        t0 = time.perf_counter()
+        svm = SVC(kernel="precomputed", C=args.svm_c)
+        svm.fit(K_train, Y_bin[train_idx])
+        cv_fit_sec += time.perf_counter() - t0
+
+        t0 = time.perf_counter()
+        acc = svm.score(K_test, Y_bin[test_idx])
+        cv_pred_sec += time.perf_counter() - t0
+
+        fold_accs.append(acc)
+        n_sv = svm.n_support_.sum()
+        print(
+            f"    Fold {fold}/{args.folds}: acc={acc:.4f}  "
+            f"(train={len(train_idx)}, test={len(test_idx)}, SVs={n_sv})"
+        )
+
+    mean_acc = np.mean(fold_accs)
+    std_acc = np.std(fold_accs)
+
+    total_sec = encode_sec + kernel_sec + cv_fit_sec + cv_pred_sec
+    encode_pct = encode_sec / total_sec * 100
+
+    print(f"\n  {'─' * 50}")
+    print(f"  Encode time:        ........  {encode_sec:.4f}s")
+    print(f"  Kernel time:        ........  {kernel_sec:.4f}s")
+    print(f"  CV fit time:        ........  {cv_fit_sec:.4f}s  ({args.folds} folds)")
+    print(f"  CV predict time:    ........  {cv_pred_sec:.4f}s")
+    print(f"  Total:              ........  {total_sec:.4f}s")
+    print(f"  Encoding fraction:  ........  {encode_pct:.1f}%")
+    print(f"  Accuracy:           ........  {mean_acc:.4f} ± {std_acc:.4f}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/uv.lock b/uv.lock
index 678148684..36e21735a 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2076,12 +2076,8 @@ dev = [
 [[package]]
 name = "qumat-qdp"
 source = { editable = "qdp/qdp-python" }
-dependencies = [
-    { name = "qumat" },
-]
 
 [package.metadata]
-requires-dist = [{ name = "qumat", editable = "." }]
 
 [package.metadata.requires-dev]
 benchmark = [

From b2227b48499107119ea3d6d2acbc6fe2ff856f03 Mon Sep 17 00:00:00 2001
From: rich7420 <rc910420@gmail.com>
Date: Thu, 12 Mar 2026 22:18:06 +0800
Subject: [PATCH 2/2] move to right side

---
 .../{pennylane_baseline => cpu_baseline}/svhn_kernel_amplitude.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename qdp/qdp-python/benchmark/encoding_benchmarks/{pennylane_baseline => cpu_baseline}/svhn_kernel_amplitude.py (100%)

diff --git a/qdp/qdp-python/benchmark/encoding_benchmarks/pennylane_baseline/svhn_kernel_amplitude.py b/qdp/qdp-python/benchmark/encoding_benchmarks/cpu_baseline/svhn_kernel_amplitude.py
similarity index 100%
rename from qdp/qdp-python/benchmark/encoding_benchmarks/pennylane_baseline/svhn_kernel_amplitude.py
rename to qdp/qdp-python/benchmark/encoding_benchmarks/cpu_baseline/svhn_kernel_amplitude.py