From d7326970b21d97e611f225c8bb629de4b96c419a Mon Sep 17 00:00:00 2001 From: Martin Makaveev Date: Sat, 21 Feb 2026 16:11:34 -0500 Subject: [PATCH 01/12] monitor and track docker health tracks two containers with various variables for each and stores them in influxDB 3 --- installer/data-downloader/backend/services.py | 30 ++- installer/data-downloader/backend/storage.py | 27 ++- installer/docker-compose.yml | 20 ++ installer/health-monitor/Dockerfile | 13 ++ installer/health-monitor/README.md | 46 ++++ installer/health-monitor/monitor.py | 220 ++++++++++++++++++ installer/health-monitor/requirements.txt | 3 + 7 files changed, 356 insertions(+), 3 deletions(-) create mode 100644 installer/health-monitor/Dockerfile create mode 100644 installer/health-monitor/README.md create mode 100644 installer/health-monitor/monitor.py create mode 100644 installer/health-monitor/requirements.txt diff --git a/installer/data-downloader/backend/services.py b/installer/data-downloader/backend/services.py index ab485cc..06c7ae2 100644 --- a/installer/data-downloader/backend/services.py +++ b/installer/data-downloader/backend/services.py @@ -141,8 +141,34 @@ def run_full_scan(self, source: str = "manual") -> Dict[str, dict]: else: self.status_repo.mark_finish(success=True) - return results - + sensors = fetch_unique_sensors( + SensorQueryConfig( + host=self.settings.influx_host, + token=self.settings.influx_token, + database=self.settings.influx_database, + schema=self.settings.influx_schema, + table=self.settings.influx_table, + window_days=self.settings.sensor_window_days, + lookback_days=self.settings.sensor_lookback_days, + fallback_start=fallback_start, + fallback_end=fallback_end, + ) + ) + sensors_payload = self.sensors_repo.write_sensors(sensors) + + runs_list = runs_payload.get("runs", []) + sensors_list = sensors_payload.get("sensors", []) + self.status_repo.mark_finish( + success=True, + runs_count=len(runs_list), + sensors_count=len(sensors_list), + interval_seconds=self.settings.periodic_interval_seconds, + ) + + return { + "runs": runs_payload, + "sensors": sensors_payload, + } except Exception as exc: self.status_repo.mark_finish(success=False, error=str(exc)) raise diff --git a/installer/data-downloader/backend/storage.py b/installer/data-downloader/backend/storage.py index 1a103c9..97d2e97 100644 --- a/installer/data-downloader/backend/storage.py +++ b/installer/data-downloader/backend/storage.py @@ -158,6 +158,12 @@ def __init__(self, data_dir: Path): "source": None, "last_result": None, "error": None, + "last_successful_job_timestamp": None, + "error_count": 0, + "last_scan_runs_count": None, + "last_scan_sensors_count": None, + "scan_interval_seconds": None, + "events_processed_per_minute": None, } self.store = JSONStore(data_dir / "scanner_status.json", default) @@ -178,7 +184,14 @@ def mark_start(self, source: str) -> dict: self.store.write(payload) return payload - def mark_finish(self, success: bool, error: str | None = None) -> dict: + def mark_finish( + self, + success: bool, + error: str | None = None, + runs_count: int | None = None, + sensors_count: int | None = None, + interval_seconds: int | None = None, + ) -> dict: payload = self.store.read() payload.update( { @@ -189,8 +202,20 @@ def mark_finish(self, success: bool, error: str | None = None) -> dict: ) if success: payload.pop("error", None) + payload["last_successful_job_timestamp"] = now_iso() + if runs_count is not None: + payload["last_scan_runs_count"] = runs_count + if sensors_count is not None: + payload["last_scan_sensors_count"] = sensors_count + if interval_seconds is not None: + payload["scan_interval_seconds"] = interval_seconds + if runs_count is not None and interval_seconds > 0: + payload["events_processed_per_minute"] = round( + (runs_count * 60.0) / interval_seconds, 2 + ) else: payload["error"] = error or "scan failed" + payload["error_count"] = payload.get("error_count", 0) + 1 payload["updated_at"] = now_iso() self.store.write(payload) return payload diff --git a/installer/docker-compose.yml b/installer/docker-compose.yml index a58f082..8878c15 100644 --- a/installer/docker-compose.yml +++ b/installer/docker-compose.yml @@ -229,6 +229,26 @@ services: data-downloader-api: condition: service_started + health-monitor: + build: ./health-monitor + container_name: health-monitor + restart: unless-stopped + environment: + HEALTH_MONITOR_INTERVAL_SECONDS: "${HEALTH_MONITOR_INTERVAL_SECONDS:-60}" + INFLUXDB_URL: "${INFLUXDB_URL:-http://influxdb3:8181}" + INFLUXDB_ADMIN_TOKEN: "${INFLUXDB_ADMIN_TOKEN:-apiv3_dev-influxdb-admin-token}" + INFLUXDB_HEALTH_DATABASE: "${INFLUXDB_HEALTH_DATABASE:-health}" + HEALTH_MONITOR_INFLUXDB_CONTAINER: "${HEALTH_MONITOR_INFLUXDB_CONTAINER:-influxdb3}" + HEALTH_MONITOR_SCANNER_CONTAINER: "${HEALTH_MONITOR_SCANNER_CONTAINER:-data-downloader-scanner}" + HEALTH_MONITOR_SCANNER_API_URL: "${HEALTH_MONITOR_SCANNER_API_URL:-http://data-downloader-api:8000}" + volumes: + - /var/run/docker.sock:/var/run/docker.sock + networks: + - datalink + depends_on: + influxdb3: + condition: service_healthy + data-downloader-frontend: build: context: ./data-downloader diff --git a/installer/health-monitor/Dockerfile b/installer/health-monitor/Dockerfile new file mode 100644 index 0000000..1744030 --- /dev/null +++ b/installer/health-monitor/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.11-slim + +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY monitor.py . + +CMD ["python", "-u", "monitor.py"] diff --git a/installer/health-monitor/README.md b/installer/health-monitor/README.md new file mode 100644 index 0000000..319abbe --- /dev/null +++ b/installer/health-monitor/README.md @@ -0,0 +1,46 @@ +# Health Monitor + +Python service that periodically collects Docker container and application metrics and writes them to an InfluxDB 3 database. + +## What it does + +- **Every 60 seconds** (configurable): + - **InfluxDB container** (`influxdb3`): Up/Down, restart count, disk usage of the data volume, write latency (and write errors if any). + - **Scanner container** (`data-downloader-scanner`): Up/Down, and application metrics from the data-downloader API: `events_processed_per_minute`, `last_successful_job_timestamp`, `error_count`. + +- Writes all metrics to InfluxDB 3 as points in the **`container_health`** measurement (tag: `container`), in the database configured by `INFLUXDB_HEALTH_DATABASE` (default: `health`). + +## Requirements + +- Docker socket access so the monitor can inspect containers and volume usage. +- Network access to `influxdb3` and `data-downloader-api` (same `datalink` network in docker-compose). +- InfluxDB 3 database: the target database (e.g. `health`) may need to be created in InfluxDB 3 before the first write, depending on your InfluxDB 3 setup. + +## Environment variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `HEALTH_MONITOR_INTERVAL_SECONDS` | `60` | Seconds between collection cycles. | +| `INFLUXDB_URL` | `http://influxdb3:8181` | InfluxDB 3 URL. | +| `INFLUXDB_ADMIN_TOKEN` | (from env) | Token for writing to InfluxDB 3. | +| `INFLUXDB_HEALTH_DATABASE` | `health` | Database (bucket) name for health metrics. | +| `HEALTH_MONITOR_INFLUXDB_CONTAINER` | `influxdb3` | Container name for InfluxDB. | +| `HEALTH_MONITOR_SCANNER_CONTAINER` | `data-downloader-scanner` | Container name for the scanner. | +| `HEALTH_MONITOR_SCANNER_API_URL` | `http://data-downloader-api:8000` | Base URL of the data-downloader API (for scanner metrics). | +| `HEALTH_MONITOR_INFLUXDB_VOLUME_SUFFIX` | `influxdb3-data` | Volume name suffix used to find InfluxDB data volume for disk usage. | + +## Running + +The service is defined in the main installer `docker-compose.yml` as `health-monitor`. Start the stack (including `influxdb3` and `data-downloader-api` / `data-downloader-scanner`) and the monitor will run automatically. + +```bash +docker compose up -d +# or +docker compose up -d influxdb3 data-downloader-api data-downloader-scanner health-monitor +``` + +Logs: + +```bash +docker compose logs -f health-monitor +``` diff --git a/installer/health-monitor/monitor.py b/installer/health-monitor/monitor.py new file mode 100644 index 0000000..865924e --- /dev/null +++ b/installer/health-monitor/monitor.py @@ -0,0 +1,220 @@ +#!/usr/bin/env python3 +""" +Health monitor: collects Docker container and application metrics, +writes them to an InfluxDB 3 bucket every 60 seconds. +""" + +from __future__ import annotations + +import os +import sys +import time +import logging +from datetime import datetime, timezone + +import docker +import requests +from influxdb_client_3 import InfluxDBClient3, Point + +# Config from environment +INTERVAL_SECONDS = int(os.getenv("HEALTH_MONITOR_INTERVAL_SECONDS", "60")) +INFLUXDB_URL = os.getenv("INFLUXDB_URL", "http://influxdb3:8181") +INFLUXDB_TOKEN = os.getenv("INFLUXDB_ADMIN_TOKEN", os.getenv("INFLUXDB_TOKEN", "")) +INFLUXDB_DATABASE = os.getenv("INFLUXDB_HEALTH_DATABASE", "health") +CONTAINER_INFLUXDB = os.getenv("HEALTH_MONITOR_INFLUXDB_CONTAINER", "influxdb3") +CONTAINER_SCANNER = os.getenv("HEALTH_MONITOR_SCANNER_CONTAINER", "data-downloader-scanner") +SCANNER_API_URL = os.getenv( + "HEALTH_MONITOR_SCANNER_API_URL", + "http://data-downloader-api:8000", +) +INFLUXDB_VOLUME_NAME_SUFFIX = os.getenv("HEALTH_MONITOR_INFLUXDB_VOLUME_SUFFIX", "influxdb3-data") + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + stream=sys.stdout, +) +logger = logging.getLogger(__name__) + + +def _now_ns() -> int: + return int(datetime.now(timezone.utc).timestamp() * 1_000_000_000) + + +def _influx_client_kwargs() -> dict: + """Connection kwargs for InfluxDBClient3 (host may be URL or host:port).""" + url = (INFLUXDB_URL or "").strip().rstrip("/") + return { + "host": url, + "token": INFLUXDB_TOKEN, + "database": INFLUXDB_DATABASE, + "org": "", + } + + +def collect_influxdb_metrics(client: docker.DockerClient) -> dict: + """Collect metrics for the InfluxDB container: status, restart count, disk usage, write latency.""" + out = { + "up": False, + "restart_count": None, + "disk_usage_bytes": None, + "write_latency_seconds": None, + "write_error": None, + } + try: + container = client.containers.get(CONTAINER_INFLUXDB) + out["up"] = container.attrs["State"]["Running"] + out["restart_count"] = container.attrs.get("RestartCount", 0) + except docker.errors.NotFound: + logger.warning("Container %s not found", CONTAINER_INFLUXDB) + return out + except Exception as e: + logger.exception("Error inspecting %s: %s", CONTAINER_INFLUXDB, e) + out["write_error"] = str(e) + return out + + # Disk usage: find volume matching suffix in Docker system df + try: + df = client.api.df() + for vol in df.get("Volumes") or []: + name = vol.get("Name") or "" + if INFLUXDB_VOLUME_NAME_SUFFIX in name or name.endswith("_" + INFLUXDB_VOLUME_NAME_SUFFIX): + usage = (vol.get("UsageData") or {}).get("Size") + if usage is not None: + out["disk_usage_bytes"] = usage + break + except Exception as e: + logger.debug("Could not get volume disk usage: %s", e) + + # Write latency: time a single point write + if out["up"] and INFLUXDB_TOKEN: + try: + start = time.perf_counter() + with InfluxDBClient3(**(_influx_client_kwargs())) as influx: + ping = Point("health_ping").field("check", 1).time(_now_ns(), write_precision="ns") + influx.write(ping) + out["write_latency_seconds"] = round(time.perf_counter() - start, 4) + except Exception as e: + out["write_error"] = str(e)[:500] + logger.debug("InfluxDB latency check failed: %s", e) + + return out + + +def collect_scanner_metrics(client: docker.DockerClient) -> dict: + """Collect metrics for the scanner container: status and app metrics from API.""" + out = { + "up": False, + "events_processed_per_minute": None, + "last_successful_job_timestamp": None, + "error_count": None, + "api_error": None, + } + try: + container = client.containers.get(CONTAINER_SCANNER) + out["up"] = container.attrs["State"]["Running"] + except docker.errors.NotFound: + logger.warning("Container %s not found", CONTAINER_SCANNER) + return out + except Exception as e: + logger.exception("Error inspecting %s: %s", CONTAINER_SCANNER, e) + out["api_error"] = str(e) + return out + + # Application metrics from data-downloader API (reads shared scanner_status.json) + try: + r = requests.get( + f"{SCANNER_API_URL.rstrip('/')}/api/scanner-status", + timeout=10, + ) + r.raise_for_status() + data = r.json() + out["events_processed_per_minute"] = data.get("events_processed_per_minute") + out["last_successful_job_timestamp"] = data.get("last_successful_job_timestamp") + out["error_count"] = data.get("error_count") + except requests.RequestException as e: + out["api_error"] = str(e)[:500] + logger.debug("Scanner API request failed: %s", e) + except (ValueError, KeyError) as e: + out["api_error"] = str(e)[:500] + + return out + + +def write_health_to_influx(influx_metrics: dict, scanner_metrics: dict) -> None: + """Write collected metrics to InfluxDB 3 as points.""" + if not INFLUXDB_TOKEN: + logger.warning("INFLUXDB_ADMIN_TOKEN/INFLUXDB_TOKEN not set; skipping write") + return + try: + with InfluxDBClient3(**_influx_client_kwargs()) as client: + ts_ns = _now_ns() + + # Container: influxdb + p_influx = ( + Point("container_health") + .tag("container", CONTAINER_INFLUXDB) + .field("up", influx_metrics["up"]) + .time(ts_ns, write_precision="ns") + ) + if influx_metrics["restart_count"] is not None: + p_influx = p_influx.field("restart_count", influx_metrics["restart_count"]) + if influx_metrics["disk_usage_bytes"] is not None: + p_influx = p_influx.field("disk_usage_bytes", influx_metrics["disk_usage_bytes"]) + if influx_metrics["write_latency_seconds"] is not None: + p_influx = p_influx.field( + "write_latency_seconds", influx_metrics["write_latency_seconds"] + ) + if influx_metrics.get("write_error"): + p_influx = p_influx.field("write_error", influx_metrics["write_error"]) + client.write(p_influx) + + # Container: scanner + p_scanner = ( + Point("container_health") + .tag("container", CONTAINER_SCANNER) + .field("up", scanner_metrics["up"]) + .time(ts_ns, write_precision="ns") + ) + if scanner_metrics.get("events_processed_per_minute") is not None: + p_scanner = p_scanner.field( + "events_processed_per_minute", + scanner_metrics["events_processed_per_minute"], + ) + if scanner_metrics.get("last_successful_job_timestamp"): + p_scanner = p_scanner.field( + "last_successful_job_timestamp", + scanner_metrics["last_successful_job_timestamp"], + ) + if scanner_metrics.get("error_count") is not None: + p_scanner = p_scanner.field("error_count", scanner_metrics["error_count"]) + if scanner_metrics.get("api_error"): + p_scanner = p_scanner.field("api_error", scanner_metrics["api_error"]) + client.write(p_scanner) + + logger.info("Wrote health points for %s and %s", CONTAINER_INFLUXDB, CONTAINER_SCANNER) + except Exception as e: + logger.exception("Failed to write health to InfluxDB: %s", e) + + +def main() -> None: + logger.info( + "Health monitor started (interval=%ss, influx=%s, database=%s)", + INTERVAL_SECONDS, + INFLUXDB_URL, + INFLUXDB_DATABASE, + ) + docker_client = docker.from_env() + + while True: + try: + influx_metrics = collect_influxdb_metrics(docker_client) + scanner_metrics = collect_scanner_metrics(docker_client) + write_health_to_influx(influx_metrics, scanner_metrics) + except Exception: + logger.exception("Health collection cycle failed") + time.sleep(INTERVAL_SECONDS) + + +if __name__ == "__main__": + main() diff --git a/installer/health-monitor/requirements.txt b/installer/health-monitor/requirements.txt new file mode 100644 index 0000000..69100c8 --- /dev/null +++ b/installer/health-monitor/requirements.txt @@ -0,0 +1,3 @@ +docker>=7.0.0 +influxdb3-python>=0.16.0 +requests>=2.28.0 From 88525ce51c998607bf0080c9b6820ca63ffc56b5 Mon Sep 17 00:00:00 2001 From: Martin Makaveev Date: Sat, 21 Feb 2026 16:19:29 -0500 Subject: [PATCH 02/12] fixed naming convention --- installer/docker-compose.yml | 2 +- installer/health-monitor/README.md | 8 +++--- installer/health-monitor/monitor.py | 40 ++++++++++++++++++++--------- 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/installer/docker-compose.yml b/installer/docker-compose.yml index 8878c15..8e506a1 100644 --- a/installer/docker-compose.yml +++ b/installer/docker-compose.yml @@ -237,7 +237,7 @@ services: HEALTH_MONITOR_INTERVAL_SECONDS: "${HEALTH_MONITOR_INTERVAL_SECONDS:-60}" INFLUXDB_URL: "${INFLUXDB_URL:-http://influxdb3:8181}" INFLUXDB_ADMIN_TOKEN: "${INFLUXDB_ADMIN_TOKEN:-apiv3_dev-influxdb-admin-token}" - INFLUXDB_HEALTH_DATABASE: "${INFLUXDB_HEALTH_DATABASE:-health}" + INFLUXDB_HEALTH_DATABASE: "${INFLUXDB_HEALTH_DATABASE:-monitoring}" HEALTH_MONITOR_INFLUXDB_CONTAINER: "${HEALTH_MONITOR_INFLUXDB_CONTAINER:-influxdb3}" HEALTH_MONITOR_SCANNER_CONTAINER: "${HEALTH_MONITOR_SCANNER_CONTAINER:-data-downloader-scanner}" HEALTH_MONITOR_SCANNER_API_URL: "${HEALTH_MONITOR_SCANNER_API_URL:-http://data-downloader-api:8000}" diff --git a/installer/health-monitor/README.md b/installer/health-monitor/README.md index 319abbe..33464f0 100644 --- a/installer/health-monitor/README.md +++ b/installer/health-monitor/README.md @@ -8,13 +8,15 @@ Python service that periodically collects Docker container and application metri - **InfluxDB container** (`influxdb3`): Up/Down, restart count, disk usage of the data volume, write latency (and write errors if any). - **Scanner container** (`data-downloader-scanner`): Up/Down, and application metrics from the data-downloader API: `events_processed_per_minute`, `last_successful_job_timestamp`, `error_count`. -- Writes all metrics to InfluxDB 3 as points in the **`container_health`** measurement (tag: `container`), in the database configured by `INFLUXDB_HEALTH_DATABASE` (default: `health`). +- Writes all metrics to InfluxDB 3 in the **`monitoring`** database (configurable via `INFLUXDB_HEALTH_DATABASE`): + - **`monitor.container`** — Docker-level metrics (up, restart_count, disk_usage, write_latency) with tag `container` + - **`monitor.service`** — Application-level metrics (events_processed_per_minute, last_successful_job_timestamp, error_count) with tag `service` ## Requirements - Docker socket access so the monitor can inspect containers and volume usage. - Network access to `influxdb3` and `data-downloader-api` (same `datalink` network in docker-compose). -- InfluxDB 3 database: the target database (e.g. `health`) may need to be created in InfluxDB 3 before the first write, depending on your InfluxDB 3 setup. +- InfluxDB 3 database: the target database (e.g. `monitoring`) may need to be created in InfluxDB 3 before the first write, depending on your InfluxDB 3 setup. ## Environment variables @@ -23,7 +25,7 @@ Python service that periodically collects Docker container and application metri | `HEALTH_MONITOR_INTERVAL_SECONDS` | `60` | Seconds between collection cycles. | | `INFLUXDB_URL` | `http://influxdb3:8181` | InfluxDB 3 URL. | | `INFLUXDB_ADMIN_TOKEN` | (from env) | Token for writing to InfluxDB 3. | -| `INFLUXDB_HEALTH_DATABASE` | `health` | Database (bucket) name for health metrics. | +| `INFLUXDB_HEALTH_DATABASE` | `monitoring` | Database (bucket) name for monitoring metrics. | | `HEALTH_MONITOR_INFLUXDB_CONTAINER` | `influxdb3` | Container name for InfluxDB. | | `HEALTH_MONITOR_SCANNER_CONTAINER` | `data-downloader-scanner` | Container name for the scanner. | | `HEALTH_MONITOR_SCANNER_API_URL` | `http://data-downloader-api:8000` | Base URL of the data-downloader API (for scanner metrics). | diff --git a/installer/health-monitor/monitor.py b/installer/health-monitor/monitor.py index 865924e..481c4e9 100644 --- a/installer/health-monitor/monitor.py +++ b/installer/health-monitor/monitor.py @@ -20,7 +20,7 @@ INTERVAL_SECONDS = int(os.getenv("HEALTH_MONITOR_INTERVAL_SECONDS", "60")) INFLUXDB_URL = os.getenv("INFLUXDB_URL", "http://influxdb3:8181") INFLUXDB_TOKEN = os.getenv("INFLUXDB_ADMIN_TOKEN", os.getenv("INFLUXDB_TOKEN", "")) -INFLUXDB_DATABASE = os.getenv("INFLUXDB_HEALTH_DATABASE", "health") +INFLUXDB_DATABASE = os.getenv("INFLUXDB_HEALTH_DATABASE", "monitoring") CONTAINER_INFLUXDB = os.getenv("HEALTH_MONITOR_INFLUXDB_CONTAINER", "influxdb3") CONTAINER_SCANNER = os.getenv("HEALTH_MONITOR_SCANNER_CONTAINER", "data-downloader-scanner") SCANNER_API_URL = os.getenv( @@ -91,7 +91,7 @@ def collect_influxdb_metrics(client: docker.DockerClient) -> dict: try: start = time.perf_counter() with InfluxDBClient3(**(_influx_client_kwargs())) as influx: - ping = Point("health_ping").field("check", 1).time(_now_ns(), write_precision="ns") + ping = Point("monitor.ping").field("check", 1).time(_now_ns(), write_precision="ns") influx.write(ping) out["write_latency_seconds"] = round(time.perf_counter() - start, 4) except Exception as e: @@ -150,9 +150,9 @@ def write_health_to_influx(influx_metrics: dict, scanner_metrics: dict) -> None: with InfluxDBClient3(**_influx_client_kwargs()) as client: ts_ns = _now_ns() - # Container: influxdb + # monitor.container.* — Docker/container-level metrics p_influx = ( - Point("container_health") + Point("monitor.container") .tag("container", CONTAINER_INFLUXDB) .field("up", influx_metrics["up"]) .time(ts_ns, write_precision="ns") @@ -169,28 +169,44 @@ def write_health_to_influx(influx_metrics: dict, scanner_metrics: dict) -> None: p_influx = p_influx.field("write_error", influx_metrics["write_error"]) client.write(p_influx) - # Container: scanner - p_scanner = ( - Point("container_health") + p_scanner_container = ( + Point("monitor.container") .tag("container", CONTAINER_SCANNER) .field("up", scanner_metrics["up"]) .time(ts_ns, write_precision="ns") ) + if scanner_metrics.get("api_error"): + p_scanner_container = p_scanner_container.field( + "api_error", scanner_metrics["api_error"] + ) + client.write(p_scanner_container) + + # monitor.service.* — Application/service-level metrics (scanner) + p_scanner_service = ( + Point("monitor.service") + .tag("service", CONTAINER_SCANNER) + .field("up", scanner_metrics["up"]) + .time(ts_ns, write_precision="ns") + ) if scanner_metrics.get("events_processed_per_minute") is not None: - p_scanner = p_scanner.field( + p_scanner_service = p_scanner_service.field( "events_processed_per_minute", scanner_metrics["events_processed_per_minute"], ) if scanner_metrics.get("last_successful_job_timestamp"): - p_scanner = p_scanner.field( + p_scanner_service = p_scanner_service.field( "last_successful_job_timestamp", scanner_metrics["last_successful_job_timestamp"], ) if scanner_metrics.get("error_count") is not None: - p_scanner = p_scanner.field("error_count", scanner_metrics["error_count"]) + p_scanner_service = p_scanner_service.field( + "error_count", scanner_metrics["error_count"] + ) if scanner_metrics.get("api_error"): - p_scanner = p_scanner.field("api_error", scanner_metrics["api_error"]) - client.write(p_scanner) + p_scanner_service = p_scanner_service.field( + "api_error", scanner_metrics["api_error"] + ) + client.write(p_scanner_service) logger.info("Wrote health points for %s and %s", CONTAINER_INFLUXDB, CONTAINER_SCANNER) except Exception as e: From 931567a403b26ba6ded286aaca26195fee4b3d07 Mon Sep 17 00:00:00 2001 From: Martin Makaveev Date: Tue, 10 Mar 2026 21:21:42 -0400 Subject: [PATCH 03/12] modified READme, easier to understand --- installer/health-monitor/README.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/installer/health-monitor/README.md b/installer/health-monitor/README.md index 33464f0..ad3f9d1 100644 --- a/installer/health-monitor/README.md +++ b/installer/health-monitor/README.md @@ -20,16 +20,16 @@ Python service that periodically collects Docker container and application metri ## Environment variables -| Variable | Default | Description | -|----------|---------|-------------| -| `HEALTH_MONITOR_INTERVAL_SECONDS` | `60` | Seconds between collection cycles. | -| `INFLUXDB_URL` | `http://influxdb3:8181` | InfluxDB 3 URL. | -| `INFLUXDB_ADMIN_TOKEN` | (from env) | Token for writing to InfluxDB 3. | -| `INFLUXDB_HEALTH_DATABASE` | `monitoring` | Database (bucket) name for monitoring metrics. | -| `HEALTH_MONITOR_INFLUXDB_CONTAINER` | `influxdb3` | Container name for InfluxDB. | -| `HEALTH_MONITOR_SCANNER_CONTAINER` | `data-downloader-scanner` | Container name for the scanner. | -| `HEALTH_MONITOR_SCANNER_API_URL` | `http://data-downloader-api:8000` | Base URL of the data-downloader API (for scanner metrics). | -| `HEALTH_MONITOR_INFLUXDB_VOLUME_SUFFIX` | `influxdb3-data` | Volume name suffix used to find InfluxDB data volume for disk usage. | +| Variable | Default | Description | +|-----------------------------------------|-----------------------------------|----------------------------------------------------------------------| +| `HEALTH_MONITOR_INTERVAL_SECONDS` | `60` | Seconds between collection cycles. | +| `INFLUXDB_URL` | `http://influxdb3:8181` | InfluxDB 3 URL. | +| `INFLUXDB_ADMIN_TOKEN` | (from env) | Token for writing to InfluxDB 3. | +| `INFLUXDB_HEALTH_DATABASE` | `monitoring` | Database (bucket) name for monitoring metrics. | +| `HEALTH_MONITOR_INFLUXDB_CONTAINER` | `influxdb3` | Container name for InfluxDB. | +| `HEALTH_MONITOR_SCANNER_CONTAINER` | `data-downloader-scanner` | Container name for the scanner. | +| `HEALTH_MONITOR_SCANNER_API_URL` | `http://data-downloader-api:8000` | Base URL of the data-downloader API (for scanner metrics). | +| `HEALTH_MONITOR_INFLUXDB_VOLUME_SUFFIX` | `influxdb3-data` | Volume name suffix used to find InfluxDB data volume for disk usage. | ## Running From 91307722af8a8e6ec355a410951e85be2b98207c Mon Sep 17 00:00:00 2001 From: Martin Makaveev Date: Wed, 18 Mar 2026 21:47:26 -0400 Subject: [PATCH 04/12] exposed an http endpoint for daq website fetch request --- installer/data-downloader/backend/app.py | 82 ++++++++++++++++++- .../data-downloader/backend/requirements.txt | 1 + installer/docker-compose.yml | 1 + 3 files changed, 83 insertions(+), 1 deletion(-) diff --git a/installer/data-downloader/backend/app.py b/installer/data-downloader/backend/app.py index b4b4dea..7bbf0f5 100644 --- a/installer/data-downloader/backend/app.py +++ b/installer/data-downloader/backend/app.py @@ -1,6 +1,12 @@ from __future__ import annotations -from datetime import datetime +from datetime import datetime, timezone +import os +import logging +from typing import List + +import docker +from influxdb_client_3 import InfluxDBClient3, Point from fastapi import BackgroundTasks, FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware @@ -25,6 +31,7 @@ class DataQueryPayload(BaseModel): settings = get_settings() service = DataDownloaderService(settings) +logger = logging.getLogger(__name__) app = FastAPI(title="DAQ Data Downloader API") app.add_middleware( @@ -41,6 +48,79 @@ def healthcheck() -> dict: return {"status": "ok"} +def _now_ns() -> int: + """Current unix time in nanoseconds (Influx write_precision='ns').""" + return int(datetime.now(timezone.utc).timestamp() * 1_000_000_000) + + +def _docker_container_running(container_name: str) -> bool: + """Return True if Docker container is in Running state.""" + try: + docker_client = docker.from_env() + container = docker_client.containers.get(container_name) + return bool(container.attrs.get("State", {}).get("Running", False)) + except docker.errors.NotFound: + return False + except Exception as e: + raise RuntimeError(f"Docker inspection failed for {container_name}: {e}") from e + + +def _best_effort_write_health_to_influx(up_by_tag: dict[str, bool]) -> None: + """ + Best-effort write of monitor.container.{up} points into InfluxDB. + Endpoint responses must not fail if InfluxDB is down. + """ + influx_url = (os.getenv("INFLUXDB_URL") or os.getenv("INFLUX_URL") or "").strip().rstrip("/") + token = ( + os.getenv("INFLUXDB_TOKEN") + or os.getenv("INFLUXDB_ADMIN_TOKEN") + or os.getenv("INFLUX_TOKEN") + or os.getenv("INFLUX_ADMIN_TOKEN") + or "" + ) + database = os.getenv("INFLUXDB_HEALTH_DATABASE", "monitoring") + + if not influx_url or not token: + return + + try: + ts_ns = _now_ns() + with InfluxDBClient3(host=influx_url, token=token, database=database) as client: + for container_tag, is_up in up_by_tag.items(): + p = ( + Point("monitor.container") + .tag("container", container_tag) + .field("up", bool(is_up)) + .time(ts_ns, write_precision="ns") + ) + client.write(p) + except Exception: + # Intentionally ignore all write failures; health endpoint should still answer. + logger.debug("InfluxDB write failed during health-status request", exc_info=True) + + +@app.get("/api/health-status") +def health_status(background_tasks: BackgroundTasks) -> dict: + """Container health derived from live Docker inspection (with best-effort Influx writes).""" + try: + up_by_tag = { + "influxdb3": _docker_container_running("influxdb3"), + "data-downloader-scanner": _docker_container_running("data-downloader-scanner"), + } + background_tasks.add_task(_best_effort_write_health_to_influx, up_by_tag) + + now = datetime.now(timezone.utc).isoformat() + return { + "influxdb3": bool(up_by_tag["influxdb3"]), + "scanner": bool(up_by_tag["data-downloader-scanner"]), + "last_updated": now, + } + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=503, detail=str(e)) + + @app.get("/api/seasons") def list_seasons() -> List[dict]: return service.get_seasons() diff --git a/installer/data-downloader/backend/requirements.txt b/installer/data-downloader/backend/requirements.txt index 6f2deab..b56694b 100644 --- a/installer/data-downloader/backend/requirements.txt +++ b/installer/data-downloader/backend/requirements.txt @@ -3,3 +3,4 @@ uvicorn[standard]==0.23.2 influxdb3-python==0.16.0 pydantic==2.9.2 slicks>=0.1.5 +docker>=7.0.0 diff --git a/installer/docker-compose.yml b/installer/docker-compose.yml index 8e506a1..f51d70e 100644 --- a/installer/docker-compose.yml +++ b/installer/docker-compose.yml @@ -198,6 +198,7 @@ services: - "8000:8000" volumes: - ./data-downloader/data:/app/data + - /var/run/docker.sock:/var/run/docker.sock restart: unless-stopped networks: - datalink From aca4e1daef768f2ddb23fe0a1a54c2e3bde0dcec Mon Sep 17 00:00:00 2001 From: Martino <162070213+shark1Martin@users.noreply.github.com> Date: Thu, 19 Mar 2026 22:19:28 -0400 Subject: [PATCH 05/12] updated allowed origins --- installer/data-downloader/.env.example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/installer/data-downloader/.env.example b/installer/data-downloader/.env.example index 5345fa3..26c5173 100644 --- a/installer/data-downloader/.env.example +++ b/installer/data-downloader/.env.example @@ -12,4 +12,4 @@ SENSOR_WINDOW_DAYS=7 SENSOR_LOOKBACK_DAYS=30 SCAN_INTERVAL_SECONDS=3600 VITE_API_BASE_URL=http://localhost:8000 -ALLOWED_ORIGINS=http://localhost:3000,http://localhost:5173 +ALLOWED_ORIGINS=http://localhost:3000,http://localhost:5173,https://daq.westernformularacing.org From 44eead374e8e15e9fa45896608734d3d719b9cb5 Mon Sep 17 00:00:00 2001 From: "WFR DAQ Server (ovh)" Date: Fri, 20 Mar 2026 03:47:56 +0000 Subject: [PATCH 06/12] Update .gitignore --- .gitignore | 3 + installer/grafana-bridge/package-lock.json | 759 +++++++++++++++++++++ 2 files changed, 762 insertions(+) create mode 100644 installer/grafana-bridge/package-lock.json diff --git a/.gitignore b/.gitignore index a2d0baf..56a9aad 100644 --- a/.gitignore +++ b/.gitignore @@ -219,3 +219,6 @@ installer/slackbot/*.jpeg # Generated CSV data files generated-days/ + +# Node dependencies +node_modules/ diff --git a/installer/grafana-bridge/package-lock.json b/installer/grafana-bridge/package-lock.json new file mode 100644 index 0000000..2a33935 --- /dev/null +++ b/installer/grafana-bridge/package-lock.json @@ -0,0 +1,759 @@ +{ + "name": "pecan-grafana-bridge", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "pecan-grafana-bridge", + "version": "1.0.0", + "dependencies": { + "express": "^4.21.0" + } + }, + "node_modules/accepts": { + "version": "1.3.8", + "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz", + "integrity": "sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw==", + "dependencies": { + "mime-types": "~2.1.34", + "negotiator": "0.6.3" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/array-flatten": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", + "integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==" + }, + "node_modules/body-parser": { + "version": "1.20.4", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.4.tgz", + "integrity": "sha512-ZTgYYLMOXY9qKU/57FAo8F+HA2dGX7bqGc71txDRC1rS4frdFI5R7NhluHxH6M0YItAP0sHB4uqAOcYKxO6uGA==", + "dependencies": { + "bytes": "~3.1.2", + "content-type": "~1.0.5", + "debug": "2.6.9", + "depd": "2.0.0", + "destroy": "~1.2.0", + "http-errors": "~2.0.1", + "iconv-lite": "~0.4.24", + "on-finished": "~2.4.1", + "qs": "~6.14.0", + "raw-body": "~2.5.3", + "type-is": "~1.6.18", + "unpipe": "~1.0.0" + }, + "engines": { + "node": ">= 0.8", + "npm": "1.2.8000 || >= 1.4.16" + } + }, + "node_modules/bytes": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", + "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/call-bound": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.4.tgz", + "integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "get-intrinsic": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/content-disposition": { + "version": "0.5.4", + "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz", + "integrity": "sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ==", + "dependencies": { + "safe-buffer": "5.2.1" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/content-type": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz", + "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie": { + "version": "0.7.2", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.2.tgz", + "integrity": "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie-signature": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.7.tgz", + "integrity": "sha512-NXdYc3dLr47pBkpUCHtKSwIOQXLVn8dZEuywboCOJY/osA0wFSLlSawr3KN8qXJEyX66FcONTH8EIlVuK0yyFA==" + }, + "node_modules/debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "dependencies": { + "ms": "2.0.0" + } + }, + "node_modules/depd": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", + "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/destroy": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.2.0.tgz", + "integrity": "sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg==", + "engines": { + "node": ">= 0.8", + "npm": "1.2.8000 || >= 1.4.16" + } + }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/ee-first": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", + "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==" + }, + "node_modules/encodeurl": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz", + "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/escape-html": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", + "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==" + }, + "node_modules/etag": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", + "integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/express": { + "version": "4.22.1", + "resolved": "https://registry.npmjs.org/express/-/express-4.22.1.tgz", + "integrity": "sha512-F2X8g9P1X7uCPZMA3MVf9wcTqlyNp7IhH5qPCI0izhaOIYXaW9L535tGA3qmjRzpH+bZczqq7hVKxTR4NWnu+g==", + "dependencies": { + "accepts": "~1.3.8", + "array-flatten": "1.1.1", + "body-parser": "~1.20.3", + "content-disposition": "~0.5.4", + "content-type": "~1.0.4", + "cookie": "~0.7.1", + "cookie-signature": "~1.0.6", + "debug": "2.6.9", + "depd": "2.0.0", + "encodeurl": "~2.0.0", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "finalhandler": "~1.3.1", + "fresh": "~0.5.2", + "http-errors": "~2.0.0", + "merge-descriptors": "1.0.3", + "methods": "~1.1.2", + "on-finished": "~2.4.1", + "parseurl": "~1.3.3", + "path-to-regexp": "~0.1.12", + "proxy-addr": "~2.0.7", + "qs": "~6.14.0", + "range-parser": "~1.2.1", + "safe-buffer": "5.2.1", + "send": "~0.19.0", + "serve-static": "~1.16.2", + "setprototypeof": "1.2.0", + "statuses": "~2.0.1", + "type-is": "~1.6.18", + "utils-merge": "1.0.1", + "vary": "~1.1.2" + }, + "engines": { + "node": ">= 0.10.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/finalhandler": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.3.2.tgz", + "integrity": "sha512-aA4RyPcd3badbdABGDuTXCMTtOneUCAYH/gxoYRTZlIJdF0YPWuGqiAsIrhNnnqdXGswYk6dGujem4w80UJFhg==", + "dependencies": { + "debug": "2.6.9", + "encodeurl": "~2.0.0", + "escape-html": "~1.0.3", + "on-finished": "~2.4.1", + "parseurl": "~1.3.3", + "statuses": "~2.0.2", + "unpipe": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/forwarded": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", + "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/fresh": { + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz", + "integrity": "sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/http-errors": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz", + "integrity": "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==", + "dependencies": { + "depd": "~2.0.0", + "inherits": "~2.0.4", + "setprototypeof": "~1.2.0", + "statuses": "~2.0.2", + "toidentifier": "~1.0.1" + }, + "engines": { + "node": ">= 0.8" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/iconv-lite": { + "version": "0.4.24", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", + "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" + }, + "node_modules/ipaddr.js": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", + "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==", + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/media-typer": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", + "integrity": "sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/merge-descriptors": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.3.tgz", + "integrity": "sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ==", + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/methods": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz", + "integrity": "sha512-iclAHeNqNm68zFtnZ0e+1L2yUIdvzNoauKU4WBA3VvH/vPFieF7qfRlwUZU+DA9P9bPXIS90ulxoUoCH23sV2w==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz", + "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==", + "bin": { + "mime": "cli.js" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==" + }, + "node_modules/negotiator": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz", + "integrity": "sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/object-inspect": { + "version": "1.13.4", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz", + "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/on-finished": { + "version": "2.4.1", + "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz", + "integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==", + "dependencies": { + "ee-first": "1.1.1" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/parseurl": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", + "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/path-to-regexp": { + "version": "0.1.12", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.12.tgz", + "integrity": "sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ==" + }, + "node_modules/proxy-addr": { + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz", + "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==", + "dependencies": { + "forwarded": "0.2.0", + "ipaddr.js": "1.9.1" + }, + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/qs": { + "version": "6.14.2", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.2.tgz", + "integrity": "sha512-V/yCWTTF7VJ9hIh18Ugr2zhJMP01MY7c5kh4J870L7imm6/DIzBsNLTXzMwUA3yZ5b/KBqLx8Kp3uRvd7xSe3Q==", + "dependencies": { + "side-channel": "^1.1.0" + }, + "engines": { + "node": ">=0.6" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/range-parser": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz", + "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/raw-body": { + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.3.tgz", + "integrity": "sha512-s4VSOf6yN0rvbRZGxs8Om5CWj6seneMwK3oDb4lWDH0UPhWcxwOWw5+qk24bxq87szX1ydrwylIOp2uG1ojUpA==", + "dependencies": { + "bytes": "~3.1.2", + "http-errors": "~2.0.1", + "iconv-lite": "~0.4.24", + "unpipe": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" + }, + "node_modules/send": { + "version": "0.19.2", + "resolved": "https://registry.npmjs.org/send/-/send-0.19.2.tgz", + "integrity": "sha512-VMbMxbDeehAxpOtWJXlcUS5E8iXh6QmN+BkRX1GARS3wRaXEEgzCcB10gTQazO42tpNIya8xIyNx8fll1OFPrg==", + "dependencies": { + "debug": "2.6.9", + "depd": "2.0.0", + "destroy": "1.2.0", + "encodeurl": "~2.0.0", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "fresh": "~0.5.2", + "http-errors": "~2.0.1", + "mime": "1.6.0", + "ms": "2.1.3", + "on-finished": "~2.4.1", + "range-parser": "~1.2.1", + "statuses": "~2.0.2" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/send/node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" + }, + "node_modules/serve-static": { + "version": "1.16.3", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.16.3.tgz", + "integrity": "sha512-x0RTqQel6g5SY7Lg6ZreMmsOzncHFU7nhnRWkKgWuMTu5NN0DR5oruckMqRvacAN9d5w6ARnRBXl9xhDCgfMeA==", + "dependencies": { + "encodeurl": "~2.0.0", + "escape-html": "~1.0.3", + "parseurl": "~1.3.3", + "send": "~0.19.1" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/setprototypeof": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", + "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==" + }, + "node_modules/side-channel": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz", + "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3", + "side-channel-list": "^1.0.0", + "side-channel-map": "^1.0.1", + "side-channel-weakmap": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-list": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz", + "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-map": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz", + "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-weakmap": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz", + "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3", + "side-channel-map": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/statuses": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.2.tgz", + "integrity": "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/toidentifier": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", + "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==", + "engines": { + "node": ">=0.6" + } + }, + "node_modules/type-is": { + "version": "1.6.18", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz", + "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==", + "dependencies": { + "media-typer": "0.3.0", + "mime-types": "~2.1.24" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/unpipe": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", + "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/utils-merge": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz", + "integrity": "sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA==", + "engines": { + "node": ">= 0.4.0" + } + }, + "node_modules/vary": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", + "integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==", + "engines": { + "node": ">= 0.8" + } + } + } +} From 0dbbab84b3224697b8f972bd040197e77dbd13b2 Mon Sep 17 00:00:00 2001 From: DAQ Server Date: Fri, 20 Mar 2026 03:49:58 +0000 Subject: [PATCH 07/12] Edit .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 56a9aad..558d34e 100644 --- a/.gitignore +++ b/.gitignore @@ -222,3 +222,4 @@ generated-days/ # Node dependencies node_modules/ +.claude/settings.local.json From 300e9ec72740ae4fe78a8366e4b25a2b9db22621 Mon Sep 17 00:00:00 2001 From: "WFR DAQ Server (ovh)" Date: Fri, 20 Mar 2026 18:32:43 +0000 Subject: [PATCH 08/12] Review fixes for docker-health-v2: remove dual-write, add scan duration metric MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Drop _best_effort_write_health_to_influx from /api/health-status — health-monitor sidecar is the authoritative writer; frontend calls were creating duplicate points - Add depends_on: data-downloader-api to health-monitor in docker-compose so it doesn't log API errors on cold start - Replace meaningless runs_per_minute (run count / poll interval) with last_scan_duration_seconds, computed from started_at/finished_at already tracked in scanner_status.json — measures how long the slicks InfluxDB scan actually took - Expose last_scan_duration_seconds in /api/health-status response - Add health-monitor env vars to .env.example --- installer/data-downloader/.env.example | 8 +++ installer/data-downloader/backend/app.py | 59 +++---------------- installer/data-downloader/backend/services.py | 1 - installer/data-downloader/backend/storage.py | 26 ++++---- installer/docker-compose.yml | 2 + installer/health-monitor/README.md | 4 +- installer/health-monitor/monitor.py | 10 ++-- 7 files changed, 38 insertions(+), 72 deletions(-) diff --git a/installer/data-downloader/.env.example b/installer/data-downloader/.env.example index 26c5173..17b08a0 100644 --- a/installer/data-downloader/.env.example +++ b/installer/data-downloader/.env.example @@ -13,3 +13,11 @@ SENSOR_LOOKBACK_DAYS=30 SCAN_INTERVAL_SECONDS=3600 VITE_API_BASE_URL=http://localhost:8000 ALLOWED_ORIGINS=http://localhost:3000,http://localhost:5173,https://daq.westernformularacing.org + +# Health monitor (optional — defaults work for standard docker-compose stack) +INFLUXDB_HEALTH_DATABASE=monitoring +HEALTH_MONITOR_INTERVAL_SECONDS=60 +HEALTH_MONITOR_INFLUXDB_CONTAINER=influxdb3 +HEALTH_MONITOR_SCANNER_CONTAINER=data-downloader-scanner +HEALTH_MONITOR_SCANNER_API_URL=http://data-downloader-api:8000 +HEALTH_MONITOR_INFLUXDB_VOLUME_SUFFIX=influxdb3-data diff --git a/installer/data-downloader/backend/app.py b/installer/data-downloader/backend/app.py index 7bbf0f5..6d00989 100644 --- a/installer/data-downloader/backend/app.py +++ b/installer/data-downloader/backend/app.py @@ -1,14 +1,12 @@ from __future__ import annotations from datetime import datetime, timezone -import os import logging from typing import List import docker -from influxdb_client_3 import InfluxDBClient3, Point -from fastapi import BackgroundTasks, FastAPI, HTTPException +from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import HTMLResponse from pydantic import BaseModel @@ -48,11 +46,6 @@ def healthcheck() -> dict: return {"status": "ok"} -def _now_ns() -> int: - """Current unix time in nanoseconds (Influx write_precision='ns').""" - return int(datetime.now(timezone.utc).timestamp() * 1_000_000_000) - - def _docker_container_running(container_name: str) -> bool: """Return True if Docker container is in Running state.""" try: @@ -65,55 +58,17 @@ def _docker_container_running(container_name: str) -> bool: raise RuntimeError(f"Docker inspection failed for {container_name}: {e}") from e -def _best_effort_write_health_to_influx(up_by_tag: dict[str, bool]) -> None: - """ - Best-effort write of monitor.container.{up} points into InfluxDB. - Endpoint responses must not fail if InfluxDB is down. - """ - influx_url = (os.getenv("INFLUXDB_URL") or os.getenv("INFLUX_URL") or "").strip().rstrip("/") - token = ( - os.getenv("INFLUXDB_TOKEN") - or os.getenv("INFLUXDB_ADMIN_TOKEN") - or os.getenv("INFLUX_TOKEN") - or os.getenv("INFLUX_ADMIN_TOKEN") - or "" - ) - database = os.getenv("INFLUXDB_HEALTH_DATABASE", "monitoring") - - if not influx_url or not token: - return - - try: - ts_ns = _now_ns() - with InfluxDBClient3(host=influx_url, token=token, database=database) as client: - for container_tag, is_up in up_by_tag.items(): - p = ( - Point("monitor.container") - .tag("container", container_tag) - .field("up", bool(is_up)) - .time(ts_ns, write_precision="ns") - ) - client.write(p) - except Exception: - # Intentionally ignore all write failures; health endpoint should still answer. - logger.debug("InfluxDB write failed during health-status request", exc_info=True) - - @app.get("/api/health-status") -def health_status(background_tasks: BackgroundTasks) -> dict: - """Container health derived from live Docker inspection (with best-effort Influx writes).""" +def health_status() -> dict: + """Container health derived from live Docker inspection.""" try: - up_by_tag = { - "influxdb3": _docker_container_running("influxdb3"), - "data-downloader-scanner": _docker_container_running("data-downloader-scanner"), - } - background_tasks.add_task(_best_effort_write_health_to_influx, up_by_tag) - + scanner_status = service.get_scanner_status() now = datetime.now(timezone.utc).isoformat() return { - "influxdb3": bool(up_by_tag["influxdb3"]), - "scanner": bool(up_by_tag["data-downloader-scanner"]), + "influxdb3": _docker_container_running("influxdb3"), + "scanner": _docker_container_running("data-downloader-scanner"), "last_updated": now, + "last_scan_duration_seconds": scanner_status.get("last_scan_duration_seconds"), } except HTTPException: raise diff --git a/installer/data-downloader/backend/services.py b/installer/data-downloader/backend/services.py index 06c7ae2..a04f7f7 100644 --- a/installer/data-downloader/backend/services.py +++ b/installer/data-downloader/backend/services.py @@ -162,7 +162,6 @@ def run_full_scan(self, source: str = "manual") -> Dict[str, dict]: success=True, runs_count=len(runs_list), sensors_count=len(sensors_list), - interval_seconds=self.settings.periodic_interval_seconds, ) return { diff --git a/installer/data-downloader/backend/storage.py b/installer/data-downloader/backend/storage.py index 97d2e97..4bfbcc2 100644 --- a/installer/data-downloader/backend/storage.py +++ b/installer/data-downloader/backend/storage.py @@ -162,8 +162,7 @@ def __init__(self, data_dir: Path): "error_count": 0, "last_scan_runs_count": None, "last_scan_sensors_count": None, - "scan_interval_seconds": None, - "events_processed_per_minute": None, + "last_scan_duration_seconds": None, } self.store = JSONStore(data_dir / "scanner_status.json", default) @@ -190,32 +189,35 @@ def mark_finish( error: str | None = None, runs_count: int | None = None, sensors_count: int | None = None, - interval_seconds: int | None = None, ) -> dict: payload = self.store.read() + now = now_iso() payload.update( { "scanning": False, - "finished_at": now_iso(), + "finished_at": now, "last_result": "success" if success else "error", } ) if success: payload.pop("error", None) - payload["last_successful_job_timestamp"] = now_iso() + payload["last_successful_job_timestamp"] = now if runs_count is not None: payload["last_scan_runs_count"] = runs_count if sensors_count is not None: payload["last_scan_sensors_count"] = sensors_count - if interval_seconds is not None: - payload["scan_interval_seconds"] = interval_seconds - if runs_count is not None and interval_seconds > 0: - payload["events_processed_per_minute"] = round( - (runs_count * 60.0) / interval_seconds, 2 - ) + started_at = payload.get("started_at") + if started_at: + try: + duration = ( + datetime.fromisoformat(now) - datetime.fromisoformat(started_at) + ).total_seconds() + payload["last_scan_duration_seconds"] = round(duration, 2) + except ValueError: + pass else: payload["error"] = error or "scan failed" payload["error_count"] = payload.get("error_count", 0) + 1 - payload["updated_at"] = now_iso() + payload["updated_at"] = now self.store.write(payload) return payload diff --git a/installer/docker-compose.yml b/installer/docker-compose.yml index 6878753..09271f0 100644 --- a/installer/docker-compose.yml +++ b/installer/docker-compose.yml @@ -250,6 +250,8 @@ services: depends_on: influxdb3: condition: service_healthy + data-downloader-api: + condition: service_started data-downloader-frontend: build: diff --git a/installer/health-monitor/README.md b/installer/health-monitor/README.md index ad3f9d1..cfdad16 100644 --- a/installer/health-monitor/README.md +++ b/installer/health-monitor/README.md @@ -6,11 +6,11 @@ Python service that periodically collects Docker container and application metri - **Every 60 seconds** (configurable): - **InfluxDB container** (`influxdb3`): Up/Down, restart count, disk usage of the data volume, write latency (and write errors if any). - - **Scanner container** (`data-downloader-scanner`): Up/Down, and application metrics from the data-downloader API: `events_processed_per_minute`, `last_successful_job_timestamp`, `error_count`. + - **Scanner container** (`data-downloader-scanner`): Up/Down, and application metrics from the data-downloader API: `last_scan_duration_seconds`, `last_successful_job_timestamp`, `error_count`. - Writes all metrics to InfluxDB 3 in the **`monitoring`** database (configurable via `INFLUXDB_HEALTH_DATABASE`): - **`monitor.container`** — Docker-level metrics (up, restart_count, disk_usage, write_latency) with tag `container` - - **`monitor.service`** — Application-level metrics (events_processed_per_minute, last_successful_job_timestamp, error_count) with tag `service` + - **`monitor.service`** — Application-level metrics (last_scan_duration_seconds, last_successful_job_timestamp, error_count) with tag `service` ## Requirements diff --git a/installer/health-monitor/monitor.py b/installer/health-monitor/monitor.py index 481c4e9..022d208 100644 --- a/installer/health-monitor/monitor.py +++ b/installer/health-monitor/monitor.py @@ -105,7 +105,7 @@ def collect_scanner_metrics(client: docker.DockerClient) -> dict: """Collect metrics for the scanner container: status and app metrics from API.""" out = { "up": False, - "events_processed_per_minute": None, + "last_scan_duration_seconds": None, "last_successful_job_timestamp": None, "error_count": None, "api_error": None, @@ -129,7 +129,7 @@ def collect_scanner_metrics(client: docker.DockerClient) -> dict: ) r.raise_for_status() data = r.json() - out["events_processed_per_minute"] = data.get("events_processed_per_minute") + out["last_scan_duration_seconds"] = data.get("last_scan_duration_seconds") out["last_successful_job_timestamp"] = data.get("last_successful_job_timestamp") out["error_count"] = data.get("error_count") except requests.RequestException as e: @@ -188,10 +188,10 @@ def write_health_to_influx(influx_metrics: dict, scanner_metrics: dict) -> None: .field("up", scanner_metrics["up"]) .time(ts_ns, write_precision="ns") ) - if scanner_metrics.get("events_processed_per_minute") is not None: + if scanner_metrics.get("last_scan_duration_seconds") is not None: p_scanner_service = p_scanner_service.field( - "events_processed_per_minute", - scanner_metrics["events_processed_per_minute"], + "last_scan_duration_seconds", + scanner_metrics["last_scan_duration_seconds"], ) if scanner_metrics.get("last_successful_job_timestamp"): p_scanner_service = p_scanner_service.field( From c3fe20dff906c61c43e363771bef20012f0f7151 Mon Sep 17 00:00:00 2001 From: "WFR DAQ Server (ovh)" Date: Fri, 20 Mar 2026 18:52:25 +0000 Subject: [PATCH 09/12] Fix missing BackgroundTasks import removed during health-status cleanup --- installer/data-downloader/backend/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/installer/data-downloader/backend/app.py b/installer/data-downloader/backend/app.py index 6d00989..84f2e31 100644 --- a/installer/data-downloader/backend/app.py +++ b/installer/data-downloader/backend/app.py @@ -6,7 +6,7 @@ import docker -from fastapi import FastAPI, HTTPException +from fastapi import BackgroundTasks, FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import HTMLResponse from pydantic import BaseModel From c9f8b0074bbabd698157dee708220f2aa3b8eacf Mon Sep 17 00:00:00 2001 From: "WFR DAQ Server (ovh)" Date: Fri, 20 Mar 2026 19:13:34 +0000 Subject: [PATCH 10/12] Updates to Data Downloader, allow past season scans --- installer/data-downloader/backend/app.py | 5 +- installer/data-downloader/backend/config.py | 15 +++--- .../backend/periodic_worker.py | 5 +- installer/data-downloader/backend/services.py | 47 ++++++------------- .../data-downloader/frontend/src/App.tsx | 16 ++++++- installer/data-downloader/frontend/src/api.ts | 5 +- 6 files changed, 46 insertions(+), 47 deletions(-) diff --git a/installer/data-downloader/backend/app.py b/installer/data-downloader/backend/app.py index 84f2e31..2e6b86b 100644 --- a/installer/data-downloader/backend/app.py +++ b/installer/data-downloader/backend/app.py @@ -105,8 +105,9 @@ def save_note(key: str, payload: NotePayload, season: str | None = None) -> dict @app.post("/api/scan") -def trigger_scan(background_tasks: BackgroundTasks) -> dict: - background_tasks.add_task(service.run_full_scan, "manual") +def trigger_scan(background_tasks: BackgroundTasks, season: str | None = None) -> dict: + season_names = [season] if season else None + background_tasks.add_task(service.run_full_scan, "manual", season_names) return {"status": "scheduled"} diff --git a/installer/data-downloader/backend/config.py b/installer/data-downloader/backend/config.py index 653f70e..27683d3 100644 --- a/installer/data-downloader/backend/config.py +++ b/installer/data-downloader/backend/config.py @@ -13,17 +13,18 @@ def _parse_origins(raw: str | None) -> List[str]: class SeasonConfig(BaseModel): - name: str # e.g. "WFR25" - year: int # e.g. 2025 + name: str # e.g. "WFR25" + year: int # e.g. 2025 database: str # e.g. "WFR25" - color: str | None = None # e.g. "222 76 153" + table: str # e.g. "WFR25" — InfluxDB table name inside the database + color: str | None = None def _parse_seasons(raw: str | None) -> List[SeasonConfig]: """Parse SEASONS env var: "WFR25:2025:222 76 153,WFR26:2026:...".""" if not raw: # Default fallback if not set - return [SeasonConfig(name="WFR25", year=2025, database="WFR25", color="#DE4C99")] + return [SeasonConfig(name="WFR25", year=2025, database="WFR25", table="WFR25", color="#DE4C99")] seasons = [] for part in raw.split(","): @@ -45,13 +46,13 @@ def _parse_seasons(raw: str | None) -> List[SeasonConfig]: color = parts[2] if len(parts) > 2 else None - # Assume DB name matches Season Name - seasons.append(SeasonConfig(name=name, year=year, database=name, color=color)) + # DB and table name both match season name by convention (WFR25→WFR25, WFR26→WFR26) + seasons.append(SeasonConfig(name=name, year=year, database=name, table=name, color=color)) except ValueError: continue if not seasons: - return [SeasonConfig(name="WFR25", year=2025, database="WFR25")] + return [SeasonConfig(name="WFR25", year=2025, database="WFR25", table="WFR25")] # Sort by year descending (newest first) seasons.sort(key=lambda s: s.year, reverse=True) diff --git a/installer/data-downloader/backend/periodic_worker.py b/installer/data-downloader/backend/periodic_worker.py index b2e18a5..9b91926 100644 --- a/installer/data-downloader/backend/periodic_worker.py +++ b/installer/data-downloader/backend/periodic_worker.py @@ -24,8 +24,9 @@ async def run_worker(): while True: try: - logging.info("Running scheduled scan...") - service.run_full_scan(source="periodic") + active_season = settings.seasons[0] # sorted descending by year; first = active + logging.info(f"Running scheduled scan for active season: {active_season.name}") + service.run_full_scan(source="periodic", season_names=[active_season.name]) logging.info("Finished scheduled scan.") if daily_time: diff --git a/installer/data-downloader/backend/services.py b/installer/data-downloader/backend/services.py index a04f7f7..2ff5a7b 100644 --- a/installer/data-downloader/backend/services.py +++ b/installer/data-downloader/backend/services.py @@ -80,14 +80,15 @@ def get_seasons(self) -> List[dict]: for s in self.settings.seasons ] - def run_full_scan(self, source: str = "manual") -> Dict[str, dict]: + def run_full_scan(self, source: str = "manual", season_names: list[str] | None = None) -> Dict[str, dict]: self.status_repo.mark_start(source) results = {} errors = [] - + try: - # Sort seasons by year descending to ensure most recent is scanned first sorted_seasons = sorted(self.settings.seasons, key=lambda s: s.year, reverse=True) + if season_names is not None: + sorted_seasons = [s for s in sorted_seasons if s.name in season_names] for season in sorted_seasons: try: logger.info(f"Scanning season {season.name} (DB: {season.database})...") @@ -97,7 +98,7 @@ def run_full_scan(self, source: str = "manual") -> Dict[str, dict]: host=self.settings.influx_host, token=self.settings.influx_token, database=season.database, - table=f"{self.settings.influx_schema}.{self.settings.influx_table}", + table=f"{self.settings.influx_schema}.{season.table}", year=season.year, bin_size=self.settings.scanner_bin, include_counts=self.settings.scanner_include_counts, @@ -116,7 +117,7 @@ def run_full_scan(self, source: str = "manual") -> Dict[str, dict]: token=self.settings.influx_token, database=season.database, schema=self.settings.influx_schema, - table=self.settings.influx_table, + table=season.table, window_days=self.settings.sensor_window_days, lookback_days=self.settings.sensor_lookback_days, fallback_start=fallback_start, @@ -136,38 +137,18 @@ def run_full_scan(self, source: str = "manual") -> Dict[str, dict]: errors.append(f"{season.name}: {str(e)}") # Continue scanning other seasons even if one fails + total_runs = sum(v["runs"] for v in results.values()) + total_sensors = sum(v["sensors"] for v in results.values()) if errors: self.status_repo.mark_finish(success=False, error="; ".join(errors)) else: - self.status_repo.mark_finish(success=True) - - sensors = fetch_unique_sensors( - SensorQueryConfig( - host=self.settings.influx_host, - token=self.settings.influx_token, - database=self.settings.influx_database, - schema=self.settings.influx_schema, - table=self.settings.influx_table, - window_days=self.settings.sensor_window_days, - lookback_days=self.settings.sensor_lookback_days, - fallback_start=fallback_start, - fallback_end=fallback_end, + self.status_repo.mark_finish( + success=True, + runs_count=total_runs, + sensors_count=total_sensors, ) - ) - sensors_payload = self.sensors_repo.write_sensors(sensors) - - runs_list = runs_payload.get("runs", []) - sensors_list = sensors_payload.get("sensors", []) - self.status_repo.mark_finish( - success=True, - runs_count=len(runs_list), - sensors_count=len(sensors_list), - ) - - return { - "runs": runs_payload, - "sensors": sensors_payload, - } + + return results except Exception as exc: self.status_repo.mark_finish(success=False, error=str(exc)) raise diff --git a/installer/data-downloader/frontend/src/App.tsx b/installer/data-downloader/frontend/src/App.tsx index 8a54cb9..36311d4 100644 --- a/installer/data-downloader/frontend/src/App.tsx +++ b/installer/data-downloader/frontend/src/App.tsx @@ -24,6 +24,7 @@ export default function App() { const [noteDrafts, setNoteDrafts] = useState>({}); const [savingKey, setSavingKey] = useState(null); const [scanState, setScanState] = useState("idle"); + const [scanSeason, setScanSeason] = useState(""); const [downloaderSelection, setDownloaderSelection] = useState(null); const [scannerStatus, setScannerStatus] = useState(null); const sensorsSectionRef = useRef(null); @@ -43,6 +44,7 @@ export default function App() { if (seasonsList.length > 0 && !currentSeason) { currentSeason = seasonsList[0].name; setSelectedSeason(currentSeason); + setScanSeason(currentSeason); } } @@ -114,7 +116,7 @@ export default function App() { updated_at: new Date().toISOString() })); try { - await triggerScan(); + await triggerScan(scanSeason || undefined); setScanState("success"); if (typeof window !== "undefined") { window.setTimeout(() => { @@ -259,6 +261,18 @@ export default function App() { )}
+ {seasons.length > 1 && ( + + )} diff --git a/installer/data-downloader/frontend/src/api.ts b/installer/data-downloader/frontend/src/api.ts index bb585cd..437c4af 100644 --- a/installer/data-downloader/frontend/src/api.ts +++ b/installer/data-downloader/frontend/src/api.ts @@ -52,8 +52,9 @@ export function fetchScannerStatus(): Promise { return request("/api/scanner-status"); } -export function triggerScan(): Promise<{ status: string }> { - return request("/api/scan", { method: "POST" }); +export function triggerScan(season?: string): Promise<{ status: string }> { + const query = season ? `?season=${encodeURIComponent(season)}` : ""; + return request(`/api/scan${query}`, { method: "POST" }); } export function updateNote(key: string, note: string, season?: string): Promise { From 66c80ef155c40069f53988ec1bf9aee26ffc4fe6 Mon Sep 17 00:00:00 2001 From: "WFR DAQ Server (ovh)" Date: Fri, 20 Mar 2026 19:27:14 +0000 Subject: [PATCH 11/12] Bump slicks to 0.2.1 and fix ghost run accumulation in storage Old pre-slicks entries with non-round-hour timestamps would persist forever because merge_scanned_runs kept all vanished runs, not just ones with user notes. Now only preserves vanished entries that have a note, so noise artifacts are cleaned out on each fresh scan. --- installer/data-downloader/backend/requirements.txt | 2 +- installer/data-downloader/backend/storage.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/installer/data-downloader/backend/requirements.txt b/installer/data-downloader/backend/requirements.txt index 56c1f1b..57e0186 100644 --- a/installer/data-downloader/backend/requirements.txt +++ b/installer/data-downloader/backend/requirements.txt @@ -2,5 +2,5 @@ fastapi==0.115.4 uvicorn[standard]==0.23.2 influxdb3-python==0.16.0 pydantic==2.9.2 -slicks>=0.2.0 +slicks>=0.2.1 docker>=7.0.0 diff --git a/installer/data-downloader/backend/storage.py b/installer/data-downloader/backend/storage.py index 4bfbcc2..869a22a 100644 --- a/installer/data-downloader/backend/storage.py +++ b/installer/data-downloader/backend/storage.py @@ -69,7 +69,7 @@ def merge_scanned_runs(self, scanned: List[dict]) -> dict: # Keep runs that vanished but still have notes to preserve manual metadata for key, run in existing.items(): - if key not in merged: + if key not in merged and run.get("note"): merged[key] = run runs_list = sorted( From de9da8fbba3d643f396c17b811b75a4fcb60a6e0 Mon Sep 17 00:00:00 2001 From: "WFR DAQ Server (ovh)" Date: Fri, 20 Mar 2026 19:28:55 +0000 Subject: [PATCH 12/12] Fix CI slicks version typo: 2.0.1 -> 0.2.1 --- .github/workflows/sandbox-integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sandbox-integration.yml b/.github/workflows/sandbox-integration.yml index fea5f70..c834b43 100644 --- a/.github/workflows/sandbox-integration.yml +++ b/.github/workflows/sandbox-integration.yml @@ -22,7 +22,7 @@ jobs: python-version: "3.11" - name: Install slicks - run: pip install "slicks>=2.0.1" + run: pip install "slicks>=0.2.1" - name: Discover sensors for September 2025 env: