diff --git a/.gitignore b/.gitignore index 51efb17c..d3875662 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ # project/repo specific +conf.py advanced/backends/*.bin scipy-tutorial/dask-report-large-chunk.html mydask.png diff --git a/README.md b/README.md index acf1078b..bca652ed 100644 --- a/README.md +++ b/README.md @@ -45,24 +45,20 @@ pixi run tutorial ### Building the Documentation Locally -To build and serve the tutorial website locally as a development server: - -1. Build the Jupyter Book: +To build and serve the tutorial website locally with live reload: ```bash -pixi run build +pixi run watch ``` -2. Serve the built site locally: +This watches for changes, rebuilds, and serves at http://localhost:8000. + +To build without serving: ```bash -pixi run python -m http.server --directory _build/html +pixi run build ``` -Then visit http://localhost:8000 in your browser. - -Note: The `build` command does not have live rebuilding. You'll need to re-run `pixi run build` after making changes to see updates. - ## Contributing Contributions are welcome and greatly appreciated! See our [CONTRIBUTING.md](./CONTRIBUTING.md) document. diff --git a/_toc.yml b/_toc.yml index 0a387442..793de3aa 100644 --- a/_toc.yml +++ b/_toc.yml @@ -66,6 +66,9 @@ parts: - caption: Advanced chapters: + - file: advanced/indexing/indexing.md + sections: + - file: advanced/indexing/why-trees.md - file: advanced/parallel-intro.md - file: advanced/apply_ufunc/apply_ufunc.md sections: diff --git a/advanced/indexing/indexing.md b/advanced/indexing/indexing.md new file mode 100644 index 00000000..72dae2a4 --- /dev/null +++ b/advanced/indexing/indexing.md @@ -0,0 +1,5 @@ +# Indexing + +```{tableofcontents} + +``` diff --git a/advanced/indexing/why-trees.md b/advanced/indexing/why-trees.md new file mode 100644 index 00000000..18565713 --- /dev/null +++ b/advanced/indexing/why-trees.md @@ -0,0 +1,628 @@ +--- +jupytext: + formats: ipynb,md:myst + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.19.1 +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Tree-Based Indexing + +```{seealso} +[NDPointIndex](https://xarray-indexes.readthedocs.io/blocks/ndpoint.html) — use KD-trees and Ball trees with xarray's indexing system for efficient nearest-neighbor lookups on real datasets. +``` + +Imagine you have measurements at irregular locations and want to find the **nearest** data point to your query location. + +**In this notebook you'll learn:** + +- Why naive nearest-neighbor search is slow (O(n) comparisons) +- How KD-trees speed this up dramatically (O(log n) comparisons) +- Why KD-trees can give wrong answers for geographic lat/lon data +- When to use a Ball tree instead + ++++ + +## The nearest neighbor problem in 1D + +Let's start with a simple 1D example: + +**The problem:** What temperature is it at 4.7 km? We need to find the nearest measurement. + +```{code-cell} ipython3 +--- +tags: [hide-input] +--- +import numpy as np +import matplotlib.pyplot as plt + +# Temperature measurements at 7 locations along a transect +locations = np.array([1, 3, 4, 7, 8, 9, 12]) +temperatures = np.array([15, 18, 17, 22, 24, 23, 19]) + +# Plot the data +fig, ax = plt.subplots(figsize=(10, 3)) +ax.scatter(locations, np.zeros_like(locations), s=100, c='blue', zorder=5) +for loc, temp in zip(locations, temperatures): + ax.annotate(f'{temp}°', (loc, 0.15), ha='center', fontsize=10) +ax.set_xlim(0, 14) +ax.set_ylim(-0.5, 0.8) +ax.set_xlabel('Location (km)') +ax.set_yticks([]) +ax.set_title('Temperature measurements at 7 irregular locations') +plt.tight_layout() +plt.show() +``` + +The naive approach checks the distance to every point: + +```{code-cell} ipython3 +--- +tags: [hide-input] +--- +# === Configuration: change this to explore different queries === +query = 4.7 + +# Naive approach: check distance to EVERY point +fig, ax = plt.subplots(figsize=(10, 4)) + +# Draw the data points on the number line +ax.scatter(locations, np.zeros_like(locations), s=100, c='blue', zorder=5) +ax.scatter(query, 0, s=150, c='red', marker='x', zorder=10, lw=3) +ax.axhline(0, color='black', lw=0.5, zorder=1) + +# Draw horizontal distance lines - stacked vertically for visibility +for i, loc in enumerate(locations): + y_offset = 0.12 * (i + 1) + # Horizontal line showing the distance + ax.plot([query, loc], [y_offset, y_offset], 'gray', alpha=0.7, lw=2) + # Vertical ticks at endpoints + ax.plot([query, query], [y_offset - 0.03, y_offset + 0.03], 'gray', alpha=0.7, lw=1) + ax.plot([loc, loc], [y_offset - 0.03, y_offset + 0.03], 'gray', alpha=0.7, lw=1) + # Label + ax.annotate(f'{abs(loc - query):.1f} km', ((query + loc)/2, y_offset + 0.04), + ha='center', fontsize=8, color='gray') + +ax.set_xlim(0, 14) +ax.set_ylim(-0.2, 1.1) +ax.set_xlabel('Location (km)') +ax.set_yticks([]) +ax.set_title(f'Naive search: compute distance to ALL {len(locations)} points (query={query})') +plt.tight_layout() +plt.show() + +print(f"Query: {query} km") +print(f"Nearest point: {locations[np.argmin(np.abs(locations - query))]} km (distance = {np.min(np.abs(locations - query)):.1f} km)") +print(f"Comparisons needed: {len(locations)}") +``` + +With 7 points this is fine, but with millions of points this becomes slow. + +**The solution:** Pre-compute a tree structure that partitions the space. In 1D, this is essentially a binary search tree - each split divides the remaining points in half: + +```{code-cell} ipython3 +--- +tags: [hide-input] +--- +from scipy.spatial import KDTree +from matplotlib.patches import Rectangle + +# === Configuration === + +# Build the tree (this is the pre-computation step) +tree = KDTree(locations.reshape(-1, 1)) + +# Query the tree first to get the result +dist, idx = tree.query([[query]]) +nearest = locations[idx[0]] + +# Map from value to node name for finding the result node +value_to_node = {1: 'LL', 3: 'L1', 4: 'LR', 7: 'root', 8: 'RL', 9: 'R1', 12: 'RR'} +found_node = value_to_node[nearest] + +# Determine the search path based on query value +if query < 7: + if query < 3: + path_nodes = ['root', 'L1', 'LL'] + regions = [(0, 14), (0, 7), (0, 3)] + else: + path_nodes = ['root', 'L1', 'LR'] + regions = [(0, 14), (0, 7), (3, 7)] +else: + if query < 9: + path_nodes = ['root', 'R1', 'RL'] + regions = [(0, 14), (7, 14), (7, 9)] + else: + path_nodes = ['root', 'R1', 'RR'] + regions = [(0, 14), (7, 14), (9, 14)] + +# Create visualization: tree on left, 3 narrowing steps on right +fig = plt.figure(figsize=(16, 9)) + +# Left side: Tree diagram with spatial ranges +ax_tree = fig.add_subplot(1, 2, 1) +ax_tree.set_xlim(0, 16) +ax_tree.set_ylim(-0.5, 5.5) +ax_tree.axis('off') +ax_tree.set_title('KD-tree structure\n(each node shows the spatial range it covers)', fontsize=12, fontweight='bold') + +# Tree node positions - now includes spatial range for each node +nodes = { + 'root': {'pos': (8, 4.5), 'value': 7, 'color': 'steelblue', 'label': 'split=7', 'range': '[0, 14]'}, + 'L1': {'pos': (4, 2.6), 'value': 3, 'color': 'coral', 'label': 'split=3', 'range': '[0, 7)'}, + 'R1': {'pos': (12, 2.6), 'value': 9, 'color': 'seagreen', 'label': 'split=9', 'range': '[7, 14]'}, + 'LL': {'pos': (2, 0.8), 'value': 1, 'color': 'gray', 'label': '1', 'range': '[0, 3)'}, + 'LR': {'pos': (6, 0.8), 'value': 4, 'color': 'gray', 'label': '4', 'range': '[3, 7)'}, + 'RL': {'pos': (10, 0.8), 'value': 8, 'color': 'gray', 'label': '8', 'range': '[7, 9)'}, + 'RR': {'pos': (14, 0.8), 'value': 12, 'color': 'gray', 'label': '12', 'range': '[9, 14]'}, +} + +# Draw edges +edges = [('root', 'L1'), ('root', 'R1'), ('L1', 'LL'), ('L1', 'LR'), ('R1', 'RL'), ('R1', 'RR')] +for parent, child in edges: + px, py = nodes[parent]['pos'] + cx, cy = nodes[child]['pos'] + ax_tree.plot([px, cx], [py, cy], 'k-', lw=2, zorder=1) + +# Draw nodes with spatial range labels +for name, node in nodes.items(): + x, y = node['pos'] + is_split = 'split' in node['label'] + size = 2200 if is_split else 1500 + ax_tree.scatter(x, y, s=size, c=node['color'], zorder=5, edgecolors='black', linewidths=2) + ax_tree.annotate(node['label'], (x, y), ha='center', va='center', + fontsize=11 if is_split else 10, fontweight='bold', color='white') + # Add range label below each node + ax_tree.annotate(node['range'], (x, y - 0.55), ha='center', va='top', + fontsize=9, color='black', style='italic', + bbox=dict(boxstyle='round,pad=0.2', facecolor='white', edgecolor='gray', alpha=0.8)) + +# Highlight the path taken +for i in range(len(path_nodes) - 1): + px, py = nodes[path_nodes[i]]['pos'] + cx, cy = nodes[path_nodes[i+1]]['pos'] + ax_tree.plot([px, cx], [py, cy], 'r-', lw=5, alpha=0.4, zorder=2) + +# Add query annotation +ax_tree.annotate(f'query={query}', (8, 4.5), xytext=(11, 5.2), + fontsize=11, color='red', fontweight='bold', + arrowprops=dict(arrowstyle='->', color='red', lw=2)) + +# Mark the found node +found_x, found_y = nodes[found_node]['pos'] +ax_tree.annotate(f'found {nearest}!', (found_x + 1.2, found_y + 0.3), fontsize=11, ha='left', color='red', fontweight='bold') + +# Right side: 3 subplots showing narrowing search space +steps = [ + ("Step 1: Start with all points", regions[0], 'steelblue', f'{query} < 7? → go left' if query < 7 else f'{query} > 7? → go right'), + ("Step 2: After first split", regions[1], 'coral', f'{query} < 3? → go left' if query < 3 else f'{query} > 3? → go right' if query < 7 else f'{query} < 9? → go left' if query < 9 else f'{query} > 9? → go right'), + (f"Step 3: Found nearest = {nearest}", regions[2], 'gold', None), +] + +for i, (title, (region_start, region_end), color, annotation) in enumerate(steps): + ax = fig.add_subplot(3, 2, 2*(i+1)) + + # Draw all data points + for loc in locations: + in_region = region_start <= loc <= region_end + ax.scatter(loc, 0, s=100 if in_region else 60, + c='blue' if in_region else 'lightgray', + zorder=5, edgecolors='black' if in_region else 'gray', linewidths=1) + if in_region: + ax.annotate(f'{loc}', (loc, -0.25), ha='center', fontsize=9, fontweight='bold') + + # Draw query point + ax.scatter(query, 0, s=150, c='red', marker='x', zorder=10, lw=3) + + # Highlight the active region + rect = Rectangle((region_start, -0.15), region_end - region_start, 0.3, + fill=True, facecolor=color, alpha=0.2, edgecolor=color, lw=2, zorder=2) + ax.add_patch(rect) + + # Draw split lines + if i == 0: + ax.axvline(7, color='steelblue', lw=2, ls='--', alpha=0.8) + ax.annotate('split=7', (7, 0.25), ha='center', fontsize=9, color='steelblue', fontweight='bold') + elif i == 1: + if query < 7: + ax.axvline(3, color='coral', lw=2, ls='--', alpha=0.8) + ax.annotate('split=3', (3, 0.25), ha='center', fontsize=9, color='coral', fontweight='bold') + else: + ax.axvline(9, color='seagreen', lw=2, ls='--', alpha=0.8) + ax.annotate('split=9', (9, 0.25), ha='center', fontsize=9, color='seagreen', fontweight='bold') + + # Add decision annotation + if annotation: + ax.annotate(annotation, (0.98, 0.95), xycoords='axes fraction', ha='right', va='top', + fontsize=10, color='darkgreen', fontweight='bold', + bbox=dict(boxstyle='round', facecolor='lightyellow', edgecolor='green', alpha=0.8)) + + ax.set_xlim(-0.5, 14.5) + ax.set_ylim(-0.4, 0.45) + ax.set_title(title, fontsize=11, fontweight='bold') + ax.set_yticks([]) + if i == 2: + ax.set_xlabel('Location (km)', fontsize=10) + +plt.tight_layout() +plt.show() + +print(f"Nearest point: {nearest} km") +print(f"Comparisons needed: ~{len(path_nodes)} (log₂({len(locations)}) ≈ 3)") +``` + +## Extending to 2D + +The same idea works in higher dimensions. Now our measurements are scattered across a 2D area: + +```{code-cell} ipython3 +--- +tags: [hide-input] +--- +# 2D example: temperature measurements scattered across an area +from matplotlib.patches import Rectangle + +np.random.seed(42) +points_2d = np.random.rand(20, 2) * 10 # 20 points in a 10x10 area + +# === Configuration === +query_2d = np.array([6.5, 4.0]) # Change this to query a different location + +# Build tree - using leafsize=2 to demonstrate meaningful subdivision +# (default leafsize=10 would barely split with only 20 points!) +LEAFSIZE = 2 +tree_2d = KDTree(points_2d, leafsize=LEAFSIZE) +dist, idx = tree_2d.query([query_2d]) +nearest_2d = points_2d[idx[0]] + +# With leafsize=2, we get ~4 levels of splits (log2(20/2) ≈ 3-4) +# Let's show the first 2 splits conceptually, then the final leaf comparison + +# Approximate the splits (KD-tree alternates x, y, x, y...) +x_split = np.median(points_2d[:, 0]) # ~4.0 + +# Determine which half based on query x +if query_2d[0] >= x_split: + # Right half + half_points = points_2d[points_2d[:, 0] >= x_split] + x_decision = f"x={query_2d[0]} > {x_split:.1f}? → go right" + x_region = (x_split, 0, 10, 10) # (x_min, y_min, x_max, y_max) +else: + # Left half + half_points = points_2d[points_2d[:, 0] < x_split] + x_decision = f"x={query_2d[0]} < {x_split:.1f}? → go left" + x_region = (0, 0, x_split, 10) + +y_split = np.median(half_points[:, 1]) + +# Determine which quadrant based on query y +if query_2d[1] >= y_split: + # Upper region + y_decision = f"y={query_2d[1]} > {y_split:.1f}? → go up" + if query_2d[0] >= x_split: + final_region = (x_split, y_split, 10, 10) # top-right + else: + final_region = (0, y_split, x_split, 10) # top-left +else: + # Lower region + y_decision = f"y={query_2d[1]} < {y_split:.1f}? → go down" + if query_2d[0] >= x_split: + final_region = (x_split, 0, 10, y_split) # bottom-right + else: + final_region = (0, 0, x_split, y_split) # bottom-left + +# Define regions for visualization +regions = [ + (0, 0, 10, 10), # Step 1: all points + x_region, # Step 2: half based on x + final_region, # Step 3: quadrant based on y +] + +# Get actual points in final region (these are the leaf candidates) +x_min, y_min, x_max, y_max = final_region +final_candidates = [pt for pt in points_2d + if x_min <= pt[0] <= x_max and y_min <= pt[1] <= y_max] + +# Create figure +fig, axes = plt.subplots(2, 2, figsize=(12, 10)) +axes = axes.flatten() + +step_titles = [ + "Step 1: Start with all 20 points", + f"Step 2: Split on x ≈ {x_split:.1f}", + f"Step 3: Split on y ≈ {y_split:.1f}", + f"Step 4: Compare {len(final_candidates)} candidates in leaf" +] +step_colors = ['steelblue', 'coral', 'gold', 'limegreen'] +decisions = [x_decision, y_decision, None, None] + +for i, ax in enumerate(axes): + x_min, y_min, x_max, y_max = regions[min(i, 2)] + + # Get points in current region + points_in_region = [(pt, x_min <= pt[0] <= x_max and y_min <= pt[1] <= y_max) for pt in points_2d] + + # Draw all points + for pt, in_region in points_in_region: + ax.scatter(pt[0], pt[1], s=80 if in_region else 40, + c='blue' if in_region else 'lightgray', + edgecolors='black' if in_region else 'gray', + zorder=5, linewidths=1) + + # Draw query point + ax.scatter(*query_2d, s=150, c='red', marker='x', zorder=10, lw=3) + + # Draw the active region + rect = Rectangle((x_min, y_min), x_max - x_min, y_max - y_min, + fill=True, facecolor=step_colors[i], alpha=0.15, + edgecolor=step_colors[i], lw=2, zorder=2) + ax.add_patch(rect) + + # Draw split lines + if i >= 1: + ax.axvline(x_split, color='steelblue', lw=2, ls='--', alpha=0.8) + ax.annotate(f'x={x_split:.1f}', (x_split, 9.7), ha='center', fontsize=9, + color='steelblue', fontweight='bold') + if i >= 2: + # Only draw y split line in the relevant half + if query_2d[0] >= x_split: + ax.axhline(y_split, xmin=x_split/10, xmax=1, color='coral', lw=2, ls='--', alpha=0.8) + else: + ax.axhline(y_split, xmin=0, xmax=x_split/10, color='coral', lw=2, ls='--', alpha=0.8) + ax.annotate(f'y={y_split:.1f}', (9.7, y_split + 0.2), + ha='right', va='bottom', fontsize=9, color='coral', fontweight='bold') + + # Final step: draw lines to ALL candidates + if i == 3: + for pt, in_region in points_in_region: + if in_region: + is_nearest = np.allclose(pt, nearest_2d) + ax.plot([query_2d[0], pt[0]], [query_2d[1], pt[1]], + color='limegreen' if is_nearest else 'gray', + lw=3 if is_nearest else 1.5, + alpha=1.0 if is_nearest else 0.6, + zorder=6 if is_nearest else 4) + + ax.scatter(*nearest_2d, s=200, facecolors='none', edgecolors='limegreen', lw=3, zorder=15) + ax.annotate('nearest!', (nearest_2d[0] + 0.3, nearest_2d[1] + 0.3), + ha='left', fontsize=10, color='green', fontweight='bold') + + # Add decision annotation + if decisions[i]: + ax.annotate(decisions[i], (0.98, 0.98), xycoords='axes fraction', + ha='right', va='top', fontsize=10, color='darkgreen', fontweight='bold', + bbox=dict(boxstyle='round', facecolor='lightyellow', edgecolor='green', alpha=0.9)) + + ax.set_xlim(-0.5, 10.5) + ax.set_ylim(-0.5, 10.5) + ax.set_aspect('equal') + ax.set_xlabel('x') + ax.set_ylabel('y') + ax.set_title(step_titles[i], fontsize=11, fontweight='bold') + +plt.tight_layout() +plt.show() + +print(f"Query: ({query_2d[0]}, {query_2d[1]})") +print(f"KDTree built with leafsize={LEAFSIZE}") +print(f"Started with {len(points_2d)} points") +print(f"After 2 tree splits: narrowed to {len(final_candidates)} candidates") +print(f"Final step: {len(final_candidates)} distance calculations") +print(f"Total: 2 splits + {len(final_candidates)} comparisons = {2 + len(final_candidates)} operations (vs 20 naive)") +``` + +## How it scales + +The same principle extends to 3D, 4D, and beyond. Here's how the number of comparisons scales: + +```{code-cell} ipython3 +--- +tags: [hide-input] +--- +# How comparisons scale with data size +data_sizes = np.array([10, 100, 1000, 10000, 100000, 1000000]) +naive_comparisons = data_sizes # O(n) +kdtree_comparisons = np.log2(data_sizes).astype(int) + 1 # O(log n) + +fig, ax = plt.subplots(figsize=(10, 5)) + +ax.plot(data_sizes, naive_comparisons, 'o-', color='gray', lw=2, markersize=8, label='Naive: O(n)') +ax.plot(data_sizes, kdtree_comparisons, 's-', color='steelblue', lw=2, markersize=8, label='KD-tree: O(log n)') + +ax.set_xscale('log') +ax.set_yscale('log') +ax.set_xlabel('Number of data points') +ax.set_ylabel('Comparisons per query') +ax.set_title('Finding nearest neighbor: Naive vs KD-tree') +ax.legend(fontsize=11) +ax.grid(True, alpha=0.3) + +# Annotate key points +for n, naive, kd in zip(data_sizes[::2], naive_comparisons[::2], kdtree_comparisons[::2]): + ax.annotate(f'{naive:,}', (n, naive), textcoords="offset points", xytext=(0,10), ha='center', fontsize=9, color='gray') + ax.annotate(f'{kd}', (n, kd), textcoords="offset points", xytext=(0,-15), ha='center', fontsize=9, color='steelblue') + +plt.tight_layout() +plt.show() + +print("With 1 million points: naive needs 1,000,000 comparisons, KD-tree needs ~20") +``` + +(the-problem-with-geographic-coordinates)= + +## The problem with geographic coordinates + +KD-trees use **Euclidean distance**—they measure straight-line distance in whatever coordinate system you give them. This works perfectly for x/y coordinates in meters or kilometers. + +But for **latitude/longitude coordinates**, Euclidean distance over degrees is wrong! Here's why: + +- **Latitude degrees are constant**: 1° latitude ≈ 111 km everywhere on Earth +- **Longitude degrees shrink toward the poles**: 1° longitude ≈ 111 km at the equator, but only ~19 km at 80°N + +This means a KD-tree treating lat/lon as flat coordinates will systematically pick the **wrong** nearest neighbor at high latitudes: + +```{code-cell} ipython3 +--- +tags: [hide-input] +--- +# Visualize haversine vs Euclidean - 2D circle diagram +from sklearn.neighbors import BallTree + +# At 80°N (near Arctic): longitude degrees are MUCH shorter! +lat = 80 +km_per_deg_lon = 111 * np.cos(np.radians(lat)) # ~19 km at 80°N! +km_per_deg_lat = 111 # always ~111 km + +# Query point and two candidates (in lat/lon degrees) +query_latlon = np.array([[lat, 0]]) +point_a_latlon = np.array([[lat, 2.0]]) +point_b_latlon = np.array([[lat + 0.5, 0]]) +points_latlon = np.vstack([point_a_latlon, point_b_latlon]) + +# Test both trees +kd_tree = KDTree(points_latlon) +kd_dist, kd_idx = kd_tree.query(query_latlon) +kd_picked = "A" if kd_idx[0] == 0 else "B" + +ball_tree = BallTree(np.radians(points_latlon), metric='haversine') +ball_dist, ball_idx = ball_tree.query(np.radians(query_latlon)) +ball_picked = "A" if ball_idx[0] == 0 else "B" + +km_to_a = 2.0 * km_per_deg_lon +km_to_b = 0.5 * km_per_deg_lat + +# Create figure +fig = plt.figure(figsize=(14, 10), constrained_layout=True) + +# === Top: Circle diagram showing arc vs chord === +ax_circle = fig.add_subplot(211) +ax_circle.set_aspect('equal') +ax_circle.axis('off') + +# Draw circle +radius = 1 +theta_full = np.linspace(0, 2*np.pi, 100) +ax_circle.plot(radius * np.cos(theta_full), radius * np.sin(theta_full), 'k-', lw=2.5) + +# Two points on the circle +theta_p = np.radians(120) # Point P +theta_q = np.radians(60) # Point Q + +p_x, p_y = radius * np.cos(theta_p), radius * np.sin(theta_p) +q_x, q_y = radius * np.cos(theta_q), radius * np.sin(theta_q) + +# Draw the ARC (haversine distance) - along the circle surface +arc_theta = np.linspace(theta_q, theta_p, 50) +arc_x = radius * np.cos(arc_theta) +arc_y = radius * np.sin(arc_theta) +ax_circle.plot(arc_x, arc_y, 'r-', lw=6, solid_capstyle='round', label='Arc length (haversine)', zorder=5) + +# Draw the CHORD (Euclidean distance) - straight line +ax_circle.plot([p_x, q_x], [p_y, q_y], 'b--', lw=4, label='Chord length (Euclidean)', zorder=4) + +# Draw points +ax_circle.scatter([p_x, q_x], [p_y, q_y], s=250, c='dodgerblue', edgecolors='black', lw=2, zorder=10) + +# Labels +ax_circle.annotate('P', (p_x - 0.18, p_y + 0.08), fontsize=24, fontweight='bold') +ax_circle.annotate('Q', (q_x + 0.1, q_y + 0.08), fontsize=24, fontweight='bold') + +# Distance annotations - positioned to avoid overlap +# Arc annotation (above the arc) +arc_mid_theta = (theta_p + theta_q) / 2 +arc_mid_x = 1.22 * np.cos(arc_mid_theta) +arc_mid_y = 1.22 * np.sin(arc_mid_theta) +ax_circle.annotate('arc length\n(along surface)', (arc_mid_x, arc_mid_y + 0.05), + fontsize=13, color='darkred', fontweight='bold', ha='center', + bbox=dict(boxstyle='round,pad=0.3', facecolor='mistyrose', edgecolor='red', alpha=0.9)) + +# Chord annotation (below the chord) +chord_mid_x = (p_x + q_x) / 2 +chord_mid_y = (p_y + q_y) / 2 +ax_circle.annotate('chord\n(straight line)', (chord_mid_x, chord_mid_y - 0.25), + fontsize=13, color='darkblue', fontweight='bold', ha='center', + bbox=dict(boxstyle='round,pad=0.3', facecolor='lightblue', edgecolor='blue', alpha=0.9)) + +# Calculate actual distances for display +arc_length = radius * abs(theta_p - theta_q) # s = r * theta +chord_length = np.sqrt((q_x - p_x)**2 + (q_y - p_y)**2) + +ax_circle.set_xlim(-1.5, 1.5) +ax_circle.set_ylim(-1.0, 1.7) +ax_circle.set_title('Haversine vs Euclidean: arc length ≠ chord length', + fontsize=16, fontweight='bold', pad=15) + +# Legend on the left side +ax_circle.legend(loc='center left', fontsize=11, bbox_to_anchor=(-0.15, 0.3)) + +# Formula box on the right side +formula_text = f"Arc length > Chord length\n(arc = {arc_length:.2f}, chord = {chord_length:.2f})" +ax_circle.text(1.15, 0.3, formula_text, fontsize=12, ha='center', + bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.9)) + +# === Bottom row: Side-by-side comparison === +# Bottom-left: KD-tree view (flat degrees) +ax_kd = fig.add_subplot(223) +ax_kd.scatter(0, 0, s=200, c='red', marker='x', zorder=10, lw=3, label='Query') +ax_kd.scatter(2.0, 0, s=140, c='green', zorder=5, label='Point A (2° east)') +ax_kd.scatter(0, 0.5, s=140, c='orange', zorder=5, label='Point B (0.5° north)') +ax_kd.plot([0, 2.0], [0, 0], 'g-', lw=2.5, alpha=0.7) +ax_kd.plot([0, 0], [0, 0.5], color='orange', lw=2.5, alpha=0.7) + +circle_kd = plt.Circle((0, 0.5), 0.15, fill=False, color='black', lw=3, linestyle='--', zorder=15) +ax_kd.add_patch(circle_kd) +ax_kd.annotate('KD-tree picks B\n(smaller in degrees)', (0.3, 0.75), fontsize=12, fontweight='bold') + +ax_kd.set_xlabel('Longitude offset (°)', fontsize=13) +ax_kd.set_ylabel('Latitude offset (°)', fontsize=13) +ax_kd.set_title(f'KD-tree: Euclidean on degrees\n(at {lat}°N latitude)', fontsize=13, fontweight='bold') +ax_kd.legend(loc='upper right', fontsize=10) +ax_kd.set_xlim(-0.5, 2.5) +ax_kd.set_ylim(-0.5, 1.2) +ax_kd.set_aspect('equal') +ax_kd.grid(True, alpha=0.3) +ax_kd.annotate('2.0°', (1.0, -0.15), ha='center', fontsize=14, color='green', fontweight='bold') +ax_kd.annotate('0.5°', (-0.25, 0.25), ha='center', fontsize=14, color='orange', fontweight='bold', rotation=90) + +# Bottom-right: Reality in kilometers +ax_real = fig.add_subplot(224) +ax_real.scatter(0, 0, s=200, c='red', marker='x', zorder=10, lw=3, label=f'Query ({lat}°N)') +ax_real.scatter(km_to_a, 0, s=140, c='green', zorder=5, label='Point A') +ax_real.scatter(0, km_to_b, s=140, c='orange', zorder=5, label='Point B') +ax_real.plot([0, km_to_a], [0, 0], 'g-', lw=2.5, alpha=0.7) +ax_real.plot([0, 0], [0, km_to_b], color='orange', lw=2.5, alpha=0.7) + +circle_ball = plt.Circle((km_to_a, 0), 6, fill=False, color='black', lw=3, linestyle='--', zorder=15) +ax_real.add_patch(circle_ball) +ax_real.annotate('Ball tree picks A\n(smaller in km)', (5, 55), fontsize=12, fontweight='bold') + +ax_real.set_xlabel('East-West distance (km)', fontsize=13) +ax_real.set_ylabel('North-South distance (km)', fontsize=13) +ax_real.set_title(f'Ball tree: haversine (true distance)\n1° longitude = only {km_per_deg_lon:.0f} km at {lat}°N!', + fontsize=13, fontweight='bold') +ax_real.legend(loc='upper right', fontsize=10) +ax_real.set_xlim(-10, 70) +ax_real.set_ylim(-10, 70) +ax_real.set_aspect('equal') +ax_real.grid(True, alpha=0.3) +ax_real.annotate(f'{km_to_a:.0f} km', (km_to_a/2, -6), ha='center', fontsize=14, color='green', fontweight='bold') +ax_real.annotate(f'{km_to_b:.0f} km', (-7, km_to_b/2), ha='center', fontsize=14, color='orange', fontweight='bold', rotation=90) + +plt.show() + +print(f"At {lat}°N: 1° longitude = {km_per_deg_lon:.0f} km, 1° latitude = {km_per_deg_lat} km") +print(f"\nPoint A: 2° east = {km_to_a:.0f} km (along surface)") +print(f"Point B: 0.5° north = {km_to_b:.0f} km") +print(f"\nKD-tree picked: Point {kd_picked} {'✗ WRONG!' if kd_picked == 'B' else '✓'}") +print(f"Ball tree picked: Point {ball_picked} {'✓ CORRECT!' if ball_picked == 'A' else '✗'}") +``` + +## Next steps + +Ready to use tree-based indexing with xarray? See [NDPointIndex](https://xarray-indexes.readthedocs.io/blocks/ndpoint.html) for how to integrate KD-trees and Ball trees with xarray's indexing system. diff --git a/pixi.lock b/pixi.lock index 6f1e3b5b..119d418f 100644 --- a/pixi.lock +++ b/pixi.lock @@ -3,6 +3,8 @@ environments: default: channels: - url: https://conda.anaconda.org/conda-forge/ + options: + pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -172,6 +174,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/jedi-0.19.2-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.5-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jmespath-1.0.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/joblib-1.5.3-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/json-c-0.18-h6688a6e_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/json5-0.10.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/jsonpointer-3.0.0-py312h7900ff3_1.conda @@ -193,6 +196,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab-myst-2.4.2-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_pygments-0.3.0-pyhd8ed1ab_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_server-2.27.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_vim-4.1.4-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_widgets-3.0.13-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.8-py312h84d6215_0.conda @@ -392,6 +396,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/rsa-4.9-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/s2n-1.5.11-h072c03f_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/s3fs-2025.5.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/scikit-learn-1.8.0-np2py312h3226591_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/scipy-1.16.0-py312hf734454_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/send2trash-1.8.3-pyh0d859eb_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/setuptools-75.8.0-pyhff2d567_0.conda @@ -405,6 +410,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/sortedcontainers-2.4.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/soupsieve-2.5-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-7.4.7-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-autobuild-2025.8.25-pyhcf101f3_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-book-theme-1.1.4-pyh29332c3_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-codeautolink-0.17.4-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-comments-0.0.3-pyhd8ed1ab_1.conda @@ -428,10 +434,12 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/sqlalchemy-2.0.38-py312h66e93f0_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.49.1-h9eae976_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/stack_data-0.6.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/starlette-0.52.1-pyhfdc7a7d_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/svt-av1-2.3.0-h5888daf_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tabulate-0.9.0-pyhd8ed1ab_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tblib-3.0.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/terminado-0.18.1-pyh0d859eb_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.6.0-pyhecae5ae_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tinycss2-1.4.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tomli-2.2.1-pyhd8ed1ab_1.conda @@ -452,13 +460,16 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/uriparser-0.9.8-hac33072_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/url-normalize-2.2.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/urllib3-2.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/uvicorn-0.40.0-pyhc90fa1f_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/virtualenv-20.29.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/watchfiles-1.1.1-py312h0ccc70a_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/wayland-1.23.1-h3e06ad9_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/wcwidth-0.2.13-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/webcolors-24.11.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/webencodings-0.5.1-pyhd8ed1ab_3.conda - conda: https://conda.anaconda.org/conda-forge/noarch/webob-1.8.9-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/websocket-client-1.8.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/websockets-16.0-py312h5253ce2_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/widgetsnbextension-4.0.13-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/wrapt-1.17.2-py312h66e93f0_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/x265-3.5-h924138e_3.tar.bz2 @@ -653,6 +664,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/jedi-0.19.2-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.5-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jmespath-1.0.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/joblib-1.5.3-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/json-c-0.18-he4178ee_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/json5-0.10.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/jsonpointer-3.0.0-py312h81bd7bf_1.conda @@ -674,6 +686,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab-myst-2.4.2-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_pygments-0.3.0-pyhd8ed1ab_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_server-2.27.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_vim-4.1.4-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_widgets-3.0.13-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/kiwisolver-1.4.8-py312h2c4a281_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/krb5-1.21.3-h237132a_0.conda @@ -867,6 +880,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/rpds-py-0.22.3-py312hcd83bfe_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/rsa-4.9-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/s3fs-2025.5.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/scikit-learn-1.8.0-np2py312he5ca3e3_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/scipy-1.16.0-py312hcedbd36_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/send2trash-1.8.3-pyh31c8845_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/setuptools-75.8.0-pyhff2d567_0.conda @@ -880,6 +894,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/sortedcontainers-2.4.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/soupsieve-2.5-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-7.4.7-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-autobuild-2025.8.25-pyhcf101f3_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-book-theme-1.1.4-pyh29332c3_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-codeautolink-0.17.4-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-comments-0.0.3-pyhd8ed1ab_1.conda @@ -903,10 +918,12 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/sqlalchemy-2.0.38-py312hea69d52_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/sqlite-3.49.1-hd7222ec_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/stack_data-0.6.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/starlette-0.52.1-pyhfdc7a7d_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/svt-av1-2.3.0-hf24288c_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tabulate-0.9.0-pyhd8ed1ab_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tblib-3.0.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/terminado-0.18.1-pyh31c8845_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.6.0-pyhecae5ae_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tinycss2-1.4.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/tk-8.6.13-h5083fa2_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tomli-2.2.1-pyhd8ed1ab_1.conda @@ -927,12 +944,15 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/uriparser-0.9.8-h00cdb27_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/url-normalize-2.2.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/urllib3-2.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/uvicorn-0.40.0-pyhc90fa1f_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/virtualenv-20.29.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/watchfiles-1.1.1-py312h7a0e18e_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/wcwidth-0.2.13-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/webcolors-24.11.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/webencodings-0.5.1-pyhd8ed1ab_3.conda - conda: https://conda.anaconda.org/conda-forge/noarch/webob-1.8.9-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/websocket-client-1.8.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/websockets-16.0-py312hb3ab3e3_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/widgetsnbextension-4.0.13-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/wrapt-1.17.2-py312hea69d52_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/x265-3.5-hbc6ce65_3.tar.bz2 @@ -1102,6 +1122,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/jedi-0.19.2-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.5-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jmespath-1.0.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/joblib-1.5.3-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/json5-0.10.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/win-64/jsonpointer-3.0.0-py312h2e8e312_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-4.23.0-pyhd8ed1ab_1.conda @@ -1122,6 +1143,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab-myst-2.4.2-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_pygments-0.3.0-pyhd8ed1ab_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_server-2.27.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_vim-4.1.4-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_widgets-3.0.13-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/win-64/kiwisolver-1.4.8-py312hc790b64_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/krb5-1.21.3-hdf4eb48_0.conda @@ -1300,6 +1322,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/win-64/rpds-py-0.22.3-py312h2615798_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/rsa-4.9-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/s3fs-2025.5.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/scikit-learn-1.8.0-np2py312hea30aaf_1.conda - conda: https://conda.anaconda.org/conda-forge/win-64/scipy-1.16.0-py312h1416ca1_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/send2trash-1.8.3-pyh5737063_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/setuptools-75.8.0-pyhff2d567_0.conda @@ -1313,6 +1336,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/sortedcontainers-2.4.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/soupsieve-2.5-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-7.4.7-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-autobuild-2025.8.25-pyhcf101f3_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-book-theme-1.1.4-pyh29332c3_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-codeautolink-0.17.4-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-comments-0.0.3-pyhd8ed1ab_1.conda @@ -1336,11 +1360,13 @@ environments: - conda: https://conda.anaconda.org/conda-forge/win-64/sqlalchemy-2.0.38-py312h4389bb4_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/sqlite-3.49.1-h2466b09_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/stack_data-0.6.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/starlette-0.52.1-pyhfdc7a7d_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/svt-av1-2.3.0-he0c23c2_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tabulate-0.9.0-pyhd8ed1ab_2.conda - conda: https://conda.anaconda.org/conda-forge/win-64/tbb-2021.13.0-h62715c5_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tblib-3.0.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/terminado-0.18.1-pyh5737063_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.6.0-pyhecae5ae_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tinycss2-1.4.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/tk-8.6.13-h5226925_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tomli-2.2.1-pyhd8ed1ab_1.conda @@ -1362,15 +1388,18 @@ environments: - conda: https://conda.anaconda.org/conda-forge/win-64/uriparser-0.9.8-h5a68840_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/url-normalize-2.2.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/urllib3-2.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/uvicorn-0.40.0-pyh6dadd2b_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/vc-14.3-h5fd82a7_24.conda - conda: https://conda.anaconda.org/conda-forge/win-64/vc14_runtime-14.44.35208-h818238b_26.conda - conda: https://conda.anaconda.org/conda-forge/noarch/virtualenv-20.29.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/vs2015_runtime-14.44.35208-h38c0c73_26.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/watchfiles-1.1.1-py312hb0142fd_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/wcwidth-0.2.13-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/webcolors-24.11.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/webencodings-0.5.1-pyhd8ed1ab_3.conda - conda: https://conda.anaconda.org/conda-forge/noarch/webob-1.8.9-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/websocket-client-1.8.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/websockets-16.0-py312he5662c2_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/widgetsnbextension-4.0.13-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/win_inet_pton-1.1.0-pyh7428d3b_8.conda - conda: https://conda.anaconda.org/conda-forge/win-64/winpty-0.4.3-4.tar.bz2 @@ -5190,6 +5219,16 @@ packages: license_family: MIT size: 23708 timestamp: 1733229244590 + - conda: https://conda.anaconda.org/conda-forge/noarch/joblib-1.5.3-pyhd8ed1ab_0.conda + sha256: 301539229d7be6420c084490b8145583291123f0ce6b92f56be5948a2c83a379 + md5: 615de2a4d97af50c350e5cf160149e77 + depends: + - python >=3.10 + - setuptools + license: BSD-3-Clause + license_family: BSD + size: 226448 + timestamp: 1765794135253 - conda: https://conda.anaconda.org/conda-forge/linux-64/json-c-0.18-h6688a6e_0.conda sha256: 09e706cb388d3ea977fabcee8e28384bdaad8ce1fc49340df5f868a2bd95a7da md5: 38f5dbc9ac808e31c00650f7be1db93f @@ -5551,6 +5590,16 @@ packages: license_family: BSD size: 49449 timestamp: 1733599666357 + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_vim-4.1.4-pyhd8ed1ab_1.conda + sha256: f2310f01dcdff9310851a72fc25f3d7ace3dcf7b824891ca4edfcf92c8f17759 + md5: ad35af6b5c0ece478f7511d163a9540f + depends: + - jupyterlab >=4.0.0,<5 + - python >=3.9 + license: MIT + license_family: MIT + size: 64333 + timestamp: 1736162666078 - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_widgets-3.0.13-pyhd8ed1ab_1.conda sha256: 206489e417408d2ffc2a7b245008b4735a8beb59df6c9109d4f77e7bc5969d5d md5: b26e487434032d7f486277beb0cead3a @@ -11221,6 +11270,62 @@ packages: license_family: BSD size: 33659 timestamp: 1748121848057 + - conda: https://conda.anaconda.org/conda-forge/linux-64/scikit-learn-1.8.0-np2py312h3226591_1.conda + sha256: 23c643c37fafa14ba3f2b7a407126ea5e732a3655ea8157cf9f977098f863448 + md5: 38decbeae260892040709cafc0514162 + depends: + - python + - numpy >=1.24.1 + - scipy >=1.10.0 + - joblib >=1.3.0 + - threadpoolctl >=3.2.0 + - libgcc >=14 + - __glibc >=2.17,<3.0.a0 + - libstdcxx >=14 + - _openmp_mutex >=4.5 + - numpy >=1.23,<3 + - python_abi 3.12.* *_cp312 + license: BSD-3-Clause + license_family: BSD + size: 9726193 + timestamp: 1765801245538 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/scikit-learn-1.8.0-np2py312he5ca3e3_1.conda + sha256: 5f640a06e001666f9d4dca7cca992f1753e722e9f6e50899d7d250c02ddf7398 + md5: ed7887c51edfa304c69a424279cec675 + depends: + - python + - numpy >=1.24.1 + - scipy >=1.10.0 + - joblib >=1.3.0 + - threadpoolctl >=3.2.0 + - libcxx >=19 + - python 3.12.* *_cpython + - __osx >=11.0 + - llvm-openmp >=19.1.7 + - numpy >=1.23,<3 + - python_abi 3.12.* *_cp312 + license: BSD-3-Clause + license_family: BSD + size: 9124177 + timestamp: 1766550900752 + - conda: https://conda.anaconda.org/conda-forge/win-64/scikit-learn-1.8.0-np2py312hea30aaf_1.conda + sha256: cc3057fd244a13afe94bdb5e3fb6ecbd7ece78559ebdb55a86ae40202ed813a0 + md5: e5cd920b237e02178573ce47ffa87e8c + depends: + - python + - numpy >=1.24.1 + - scipy >=1.10.0 + - joblib >=1.3.0 + - threadpoolctl >=3.2.0 + - vc >=14.3,<15 + - vc14_runtime >=14.44.35208 + - ucrt >=10.0.20348.0 + - python_abi 3.12.* *_cp312 + - numpy >=1.23,<3 + license: BSD-3-Clause + license_family: BSD + size: 8884013 + timestamp: 1765801252142 - conda: https://conda.anaconda.org/conda-forge/linux-64/scipy-1.16.0-py312hf734454_0.conda sha256: 8406e26bf853e699b1ea97792f63987808783ff4ab6ddeff9cf1ec0b9d1aa342 md5: 7513ac56209d27a85ffa1582033f10a8 @@ -11491,6 +11596,22 @@ packages: license_family: BSD size: 1358660 timestamp: 1721487658869 + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-autobuild-2025.8.25-pyhcf101f3_0.conda + sha256: ad56a36c575f4ccec429e070dd36538cb6cb25f8d8b174a94bb9622858d9e4a4 + md5: 26d9d9a48ff32bca94581d7c91684ab8 + depends: + - colorama >=0.4.6 + - python >=3.11 + - sphinx + - starlette >=0.35 + - uvicorn >=0.25 + - watchfiles >=0.20 + - websockets >=11 + - python + license: MIT + license_family: MIT + size: 19892 + timestamp: 1762270046787 - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-book-theme-1.1.4-pyh29332c3_0.conda sha256: 78581f1ba538186fc4129191a8db4ee7798382b6b4a1a0c55dedb437da1a9fd8 md5: f3d3f4e7e2c9198e88cd524633665081 @@ -11797,6 +11918,18 @@ packages: license_family: MIT size: 26988 timestamp: 1733569565672 + - conda: https://conda.anaconda.org/conda-forge/noarch/starlette-0.52.1-pyhfdc7a7d_0.conda + sha256: ab0d09eaee2e35a969e7fca3b5b2fdba35c1f2abb8eb8c66245485155d41868e + md5: 7ee23ae71c6c1e2f2fe9ea7cf00f1a8e + depends: + - anyio >=3.6.2,<5 + - python >=3.10 + - typing_extensions >=4.10.0 + - python + license: BSD-3-Clause + license_family: BSD + size: 64896 + timestamp: 1768919444896 - conda: https://conda.anaconda.org/conda-forge/linux-64/svt-av1-2.3.0-h5888daf_0.conda sha256: df30a9be29f1a8b5a2e314dd5b16ccfbcbd1cc6a4f659340e8bc2bd4de37bc6f md5: 355898d24394b2af353eb96358db9fdd @@ -11895,6 +12028,15 @@ packages: license_family: BSD size: 22883 timestamp: 1710262943966 + - conda: https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.6.0-pyhecae5ae_0.conda + sha256: 6016672e0e72c4cf23c0cf7b1986283bd86a9c17e8d319212d78d8e9ae42fdfd + md5: 9d64911b31d57ca443e9f1e36b04385f + depends: + - python >=3.9 + license: BSD-3-Clause + license_family: BSD + size: 23869 + timestamp: 1741878358548 - conda: https://conda.anaconda.org/conda-forge/noarch/tinycss2-1.4.0-pyhd8ed1ab_0.conda sha256: cad582d6f978276522f84bd209a5ddac824742fe2d452af6acf900f8650a73a2 md5: f1acf5fdefa8300de697982bcb1761c9 @@ -12249,6 +12391,34 @@ packages: license_family: MIT size: 100102 timestamp: 1734859520452 + - conda: https://conda.anaconda.org/conda-forge/noarch/uvicorn-0.40.0-pyh6dadd2b_0.conda + sha256: 10a53144032ab6671c9a4f4f801448093cd2c4b73d40d13a00837a18824d602e + md5: 518f06ec23263844563845074bd619f3 + depends: + - __win + - click >=7.0 + - h11 >=0.8 + - python >=3.10 + - typing_extensions >=4.0 + - python + license: BSD-3-Clause + license_family: BSD + size: 53657 + timestamp: 1766332935113 + - conda: https://conda.anaconda.org/conda-forge/noarch/uvicorn-0.40.0-pyhc90fa1f_0.conda + sha256: 9cb6777bc67d43184807f8c57bdf8c917830240dd95e66fa9dbb7d65fa81f68e + md5: eb8fdfa0a193cfe804970d1a5470246d + depends: + - __unix + - click >=7.0 + - h11 >=0.8 + - python >=3.10 + - typing_extensions >=4.0 + - python + license: BSD-3-Clause + license_family: BSD + size: 54972 + timestamp: 1766332899903 - conda: https://conda.anaconda.org/conda-forge/win-64/vc-14.3-h5fd82a7_24.conda sha256: 7ce178cf139ccea5079f9c353b3d8415d1d49b0a2f774662c355d3f89163d7b4 md5: 00cf3a61562bd53bd5ea99e6888793d0 @@ -12292,6 +12462,50 @@ packages: license_family: BSD size: 17888 timestamp: 1750371463202 + - conda: https://conda.anaconda.org/conda-forge/linux-64/watchfiles-1.1.1-py312h0ccc70a_0.conda + sha256: 5cc839dafe34e5f7b612e1d4d97bb11546eae8b1842e5b7870b3c6adbe9097e8 + md5: d8ecac58c1cb180296a1dd7de058dbc5 + depends: + - __glibc >=2.17,<3.0.a0 + - anyio >=3.0.0 + - libgcc >=14 + - python >=3.12,<3.13.0a0 + - python_abi 3.12.* *_cp312 + constrains: + - __glibc >=2.17 + license: MIT + license_family: MIT + size: 419919 + timestamp: 1760456820374 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/watchfiles-1.1.1-py312h7a0e18e_0.conda + sha256: 98c48ebccb9009fb6a77e2d0df834f3ed7f148d4d549d39ea060f467234a70f5 + md5: 4f1ed5d39857625bb1124dbeb1c99840 + depends: + - __osx >=11.0 + - anyio >=3.0.0 + - python >=3.12,<3.13.0a0 + - python >=3.12,<3.13.0a0 *_cpython + - python_abi 3.12.* *_cp312 + constrains: + - __osx >=11.0 + license: MIT + license_family: MIT + size: 364002 + timestamp: 1760457732293 + - conda: https://conda.anaconda.org/conda-forge/win-64/watchfiles-1.1.1-py312hb0142fd_0.conda + sha256: 5333e9a859c2e2c233b3fe9797e644d4b7eb88d2f12be4d9aa313fb491a3684e + md5: ccad8991c8fe2f56362e7294a6a0b131 + depends: + - anyio >=3.0.0 + - python >=3.12,<3.13.0a0 + - python_abi 3.12.* *_cp312 + - ucrt >=10.0.20348.0 + - vc >=14.3,<15 + - vc14_runtime >=14.44.35208 + license: MIT + license_family: MIT + size: 303368 + timestamp: 1760457029394 - conda: https://conda.anaconda.org/conda-forge/linux-64/wayland-1.23.1-h3e06ad9_0.conda sha256: 0884b2023a32d2620192cf2e2fc6784b8d1e31cf9f137e49e00802d4daf7d1c1 md5: 0a732427643ae5e0486a727927791da1 @@ -12351,6 +12565,43 @@ packages: license_family: APACHE size: 46718 timestamp: 1733157432924 + - conda: https://conda.anaconda.org/conda-forge/linux-64/websockets-16.0-py312h5253ce2_1.conda + sha256: dd598cab9175a9ab11c8a1798c49ccabe923263d12aababa84a296cb18206464 + md5: e35ffb48178b20ee1a43fbe7abc93746 + depends: + - python + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + - python_abi 3.12.* *_cp312 + license: BSD-3-Clause + license_family: BSD + size: 358659 + timestamp: 1768087389177 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/websockets-16.0-py312hb3ab3e3_1.conda + sha256: 4b15497f3cbc40c6fc9e0f155e9cd31aa13e8d2cb1930355da934af22816a73a + md5: 3da07548ed0e08634abf2b3b878eabc1 + depends: + - python + - python 3.12.* *_cpython + - __osx >=11.0 + - python_abi 3.12.* *_cp312 + license: BSD-3-Clause + license_family: BSD + size: 362390 + timestamp: 1768087403337 + - conda: https://conda.anaconda.org/conda-forge/win-64/websockets-16.0-py312he5662c2_1.conda + sha256: fda4ece1e956169d8c7fed231c52c53fbdb2dc36105d6a1a083174dda804ac0a + md5: 65db5c23f67c34d2ecbd6ede2c8b253e + depends: + - python + - vc >=14.3,<15 + - vc14_runtime >=14.44.35208 + - ucrt >=10.0.20348.0 + - python_abi 3.12.* *_cp312 + license: BSD-3-Clause + license_family: BSD + size: 414775 + timestamp: 1768087427139 - conda: https://conda.anaconda.org/conda-forge/noarch/widgetsnbextension-4.0.13-pyhd8ed1ab_1.conda sha256: a750202ae2a31d8e5ee5a5c127fcc7fa783cd0fbedbc0bf1ab549a109881fa9f md5: 237db148cc37a466e4222d589029b53e diff --git a/pyproject.toml b/pyproject.toml index 2115462c..e45b1274 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ line_length=100 ignore-words-list="nd" skip="pixi.lock,.pixi,qaqc.yaml" -[tool.pixi.project] +[tool.pixi.workspace] name = "xarray-tutorial" channels = ["conda-forge"] platforms = ["osx-arm64", "linux-64", "win-64"] @@ -61,6 +61,7 @@ checklinks = "jupyter-book build ./ --builder linkcheck" clear-cache = "jupyter-book clean ." # To clear a specific notebook pass ID number: jcache cache -p _build/.jupyter_cache remove 28 # To clear all cached notebooks: jupyter-book clean . --all" +watch = { cmd = "jupyter-book config sphinx ./ && sphinx-autobuild ./ _build/html -b html --re-ignore '_build/.*'", description = "Build and serve with live reload" } list-cache = "jcache cache -p _build/.jupyter_cache list" [tool.pixi.dependencies] @@ -90,10 +91,12 @@ pydap = ">=3.5.5,<4" python-graphviz = ">=0.21,<0.22" pooch = ">=1.8.2,<2" rioxarray = ">=0.19.0,<0.20" +scikit-learn = ">=1.6.0,<2" scipy = ">=1.16.0,<2" sphinx-codeautolink = ">=0.17.4,<0.18" sphinxcontrib-mermaid = ">=1.0.0,<2" sphinx-notfound-page = ">=1.1.0,<2" +sphinx-autobuild = ">=2024.10.3,<2026" sphinxext-rediraffe = ">=0.2.7,<0.3" s3fs = ">=2025.5.1,<2026" xarray = ">=2025.7.0,<2026" @@ -101,3 +104,4 @@ zarr = ">=3.0.10,<4" flox = ">=0.10.4,<0.11" numbagg = ">=0.9.0,<0.10" rich = ">=14.0.0,<15" +jupyterlab_vim = ">=4.1.4,<5"