diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index ea3f16c..a2dd0eb 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -12,11 +12,12 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Commands +- Run `uv run task install` to install all dependencies - Run `uv run task lint` to lint the code using Ruff - Run `uv run task format` to auto-fix formatting issues with Ruff - Run `uv run task type` to run type checking -- Run `uv run task test` to run the full test suite including linting, type checking, and tests - Run `uv run task spec` to run only the pytest tests +- Run `uv run task test` to run the full test suite (lint/type/spec) - Run `uv run pytest path/to/test.py` to run a single test file - Run `uv run pytest -k "test name"` to run a single test by name diff --git a/.claude/settings.local.json b/.claude/settings.local.json deleted file mode 100644 index 6f1c654..0000000 --- a/.claude/settings.local.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "permissions": { - "allow": [ - "Bash(uv add:*)", - "Bash(gh api:*)" - ] - } -} diff --git a/.github/ISSUE_TEMPLATE/01-bug-report.yaml b/.github/ISSUE_TEMPLATE/01-bug-report.yaml new file mode 100644 index 0000000..2bf62b1 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/01-bug-report.yaml @@ -0,0 +1,30 @@ +name: "🐞 Bug Report" +description: Report an issue or possible bug +labels: [] +assignees: [] +body: + - type: markdown + attributes: + value: | + Thank you for taking the time to file a bug report! Please fill out this form as completely as possible. + - type: input + id: version + attributes: + label: What version are you using? + placeholder: 1.0.0 + validations: + required: true + - type: textarea + id: description + attributes: + label: Describe the Bug + description: A clear and concise description of what the bug is. + validations: + required: true + - type: checkboxes + id: contribution + attributes: + label: Participation + options: + - label: I am willing to submit a pull request for this issue. + required: false diff --git a/.github/ISSUE_TEMPLATE/02-general-issue.yaml b/.github/ISSUE_TEMPLATE/02-general-issue.yaml new file mode 100644 index 0000000..2a6fbe3 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/02-general-issue.yaml @@ -0,0 +1,30 @@ +name: "🌳 General Issue" +description: Project, website, or documentation related issue or improvement +labels: [] +assignees: [] +body: + - type: markdown + attributes: + value: | + Thank you for taking the time to improve the project! Please fill out this form as completely as possible. + - type: input + id: version + attributes: + label: What version are you using? + placeholder: 1.0.0 + validations: + required: true + - type: textarea + id: description + attributes: + label: Describe the Issue + description: A clear and concise description of what the issue or possible improvement is. + validations: + required: true + - type: checkboxes + id: contribution + attributes: + label: Participation + options: + - label: I am willing to submit a pull request for this issue. + required: false diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..bd2a8db --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,14 @@ +blank_issues_enabled: false +contact_links: + - name: 👀 Explore Discussions + url: https://github.com/orgs/fairspec/discussions + about: Before opening a discussion, please check if the topic has already been discussed. Please vote on existing discussions to show your support. + - name: 💡 Feature Request + url: https://github.com/orgs/fairspec/discussions/new?category=ideas + about: Suggest an improvement you'd like to see added to Fairspec Python + - name: 🚀 Implementation + url: https://github.com/orgs/fairspec/discussions/new?category=show-and-tell + about: Share your Fairspec Python related project with the community + - name: 💁 Question + url: https://github.com/orgs/fairspec/discussions/new?category=q-a + about: Ask a question about Fairspec Python diff --git a/.github/codecov.yaml b/.github/codecov.yaml new file mode 100644 index 0000000..dff1212 --- /dev/null +++ b/.github/codecov.yaml @@ -0,0 +1,11 @@ +comment: no + +coverage: + range: 80..90 + status: + project: + default: + informational: true + patch: + default: + informational: true diff --git a/.github/workflows/general.yaml b/.github/workflows/general.yaml new file mode 100644 index 0000000..765dc5e --- /dev/null +++ b/.github/workflows/general.yaml @@ -0,0 +1,103 @@ +name: general + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + test-linux: + runs-on: ubuntu-latest + + permissions: + id-token: write + + steps: + - name: Checkout Repo + uses: actions/checkout@v5 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Setup uv + uses: astral-sh/setup-uv@v5 + - name: Install Dependencies + run: uv run task install + - name: Test Packages + run: uv run task test + - name: Upload Coverage + uses: codecov/codecov-action@v5 + with: + use_oidc: true + + test-macos: + runs-on: macos-latest + + steps: + - name: Checkout Repo + uses: actions/checkout@v5 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Setup uv + uses: astral-sh/setup-uv@v5 + - name: Install Dependencies + run: uv run task install + - name: Test Packages + run: uv run task test + + test-windows: + runs-on: windows-latest + + steps: + - name: Checkout Repo + uses: actions/checkout@v5 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Setup uv + uses: astral-sh/setup-uv@v5 + - name: Install Dependencies + run: uv run task install + - name: Test Packages + run: uv run task test + + release: + needs: [test-linux, test-macos, test-windows] + environment: release + runs-on: ubuntu-latest + if: github.event_name == 'push' && !contains(github.ref, 'refs/tags/') + + outputs: + created: ${{ steps.release.outputs.new_release_published }} + version: ${{ steps.release.outputs.new_release_version }} + + permissions: + id-token: write + contents: write + issues: write + pull-requests: write + + steps: + - name: Checkout Repo + uses: actions/checkout@v5 + with: + fetch-depth: 0 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Setup uv + uses: astral-sh/setup-uv@v5 + - name: Install Dependencies + run: uv run task install + - name: Release Packages + uses: python-semantic-release/python-semantic-release@v9 + id: release + with: + github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index 42f20c5..46650cc 100644 --- a/.gitignore +++ b/.gitignore @@ -214,3 +214,16 @@ __marimo__/ # Streamlit .streamlit/secrets.toml + +# Node +node_modules/ +jspm_packages/ +.lock-wscript +build/Release +.node_repl_history +*.tgz +.npm +*.so + +# User +/.claude/settings.local.json diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..895c70a --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,10 @@ +repos: + - repo: local + hooks: + - id: test + name: test + entry: uv run task test + language: system + always_run: true + pass_filenames: false + stages: [pre-push] diff --git a/dataset/README.md b/dataset/README.md new file mode 100644 index 0000000..5facb78 --- /dev/null +++ b/dataset/README.md @@ -0,0 +1,3 @@ +# fairspec-dataset + +Fairspec Python is a fast data management framework built on top of the Fairspec standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please read the [project's documentation](https://python.fairspec.org). diff --git a/dataset/fairspec_dataset/__init__.py b/dataset/fairspec_dataset/__init__.py new file mode 100644 index 0000000..ea2cee6 --- /dev/null +++ b/dataset/fairspec_dataset/__init__.py @@ -0,0 +1,92 @@ +from .actions.dataset.basepath import get_common_local_basepath, get_dataset_basepath +from .actions.dataset.merge import merge_datasets +from .actions.file.copy import copy_file +from .actions.file.describe import describe_file +from .models.file import FileDescription +from .actions.file.infer import infer_bytes, infer_hash, infer_integrity, infer_textual +from .actions.file.load import load_file +from .actions.file.path import assert_local_path_vacant, get_is_local_path_exist +from .actions.file.prefetch import prefetch_file, prefetch_files +from .actions.file.save import save_file +from .actions.file.temp import get_temp_file_path, write_temp_file +from .actions.file.validate import validate_file +from .actions.folder.create import create_folder +from .actions.folder.temp import get_temp_folder_path +from .actions.resource.save import SaveFileCallback, SaveFileProps, save_resource_files +from .actions.stream.concat import concat_file_streams +from .actions.stream.load import load_file_stream +from .actions.stream.save import save_file_stream +from .models.dataset import SaveDatasetOptions +from .models.file_dialect import InferFileDialectOptions +from .models.dataset import SaveDatasetResult +from .plugin import DatasetPlugin +from .plugins.descriptor import DescriptorPlugin +from .plugins.ckan import CkanPlugin, load_dataset_from_ckan, save_dataset_to_ckan +from .plugins.folder import ( + FolderPlugin, + load_dataset_from_folder, + save_dataset_to_folder, +) +from .plugins.github import ( + GithubPlugin, + load_dataset_from_github, + save_dataset_to_github, +) +from .plugins.zenodo import ( + ZenodoPlugin, + load_dataset_from_zenodo, + save_dataset_to_zenodo, +) +from .plugins.zip import ZipPlugin, load_dataset_from_zip, save_dataset_to_zip +from fairspec_metadata.plugin import MetadataPlugin + +__all__ = [ + "CkanPlugin", + "DatasetPlugin", + "DescriptorPlugin", + "FileDescription", + "FolderPlugin", + "GithubPlugin", + "InferFileDialectOptions", + "MetadataPlugin", + "SaveDatasetOptions", + "SaveDatasetResult", + "SaveFileCallback", + "SaveFileProps", + "ZenodoPlugin", + "ZipPlugin", + "assert_local_path_vacant", + "concat_file_streams", + "copy_file", + "create_folder", + "describe_file", + "get_common_local_basepath", + "get_dataset_basepath", + "get_is_local_path_exist", + "get_temp_file_path", + "get_temp_folder_path", + "infer_bytes", + "infer_hash", + "infer_integrity", + "infer_textual", + "load_dataset_from_ckan", + "load_dataset_from_folder", + "load_dataset_from_github", + "load_dataset_from_zenodo", + "load_dataset_from_zip", + "load_file", + "load_file_stream", + "merge_datasets", + "prefetch_file", + "prefetch_files", + "save_dataset_to_ckan", + "save_dataset_to_folder", + "save_dataset_to_github", + "save_dataset_to_zenodo", + "save_dataset_to_zip", + "save_file", + "save_file_stream", + "save_resource_files", + "validate_file", + "write_temp_file", +] diff --git a/dataset/fairspec_dataset/actions/dataset/basepath.py b/dataset/fairspec_dataset/actions/dataset/basepath.py new file mode 100644 index 0000000..c90aa70 --- /dev/null +++ b/dataset/fairspec_dataset/actions/dataset/basepath.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +import os +from typing import TYPE_CHECKING + +from fairspec_metadata import get_basepath, get_data_paths, get_is_remote_path +from fairspec_metadata.actions.path.general import safe_relpath + +if TYPE_CHECKING: + from fairspec_metadata import Dataset + + +def get_dataset_basepath(dataset: Dataset) -> str | None: + paths: list[str] = [] + + for resource in dataset.resources or []: + resource_paths = get_data_paths(resource) + paths.extend(resource_paths) + + return get_common_local_basepath(paths) + + +def get_common_local_basepath(paths: list[str]) -> str | None: + absolute_basepaths = [ + os.path.abspath(get_basepath(path)) + for path in paths + if not get_is_remote_path(path) + ] + + if not absolute_basepaths: + return None + + segment_table = [ + [segment or "/" for segment in path.split(os.sep)] + for path in absolute_basepaths + ] + + column = 0 + segments: list[str] = [] + + while True: + segment_column = [ + row[column] if column < len(row) else None for row in segment_table + ] + unique_segments = set(segment_column) + + if len(unique_segments) != 1: + break + if segment_column[0] is None: + break + + column += 1 + segments.append(segment_column[0]) + + if not segments: + raise ValueError("Cannot find common basepath") + + if segments[0].endswith(":"): + segments[0] += os.sep + + basepath = safe_relpath(os.path.join(*segments)) + return "" if basepath == "." else basepath diff --git a/dataset/fairspec_dataset/actions/dataset/basepath_spec.py b/dataset/fairspec_dataset/actions/dataset/basepath_spec.py new file mode 100644 index 0000000..7d3d259 --- /dev/null +++ b/dataset/fairspec_dataset/actions/dataset/basepath_spec.py @@ -0,0 +1,60 @@ +import pytest + +from .basepath import get_common_local_basepath + + +class TestGetCommonLocalBasepath: + @pytest.mark.parametrize( + "description, paths, expected", + [ + ( + "same directory different files", + ["data/table1.csv", "data/table2.csv"], + "data", + ), + ( + "nested directories", + ["data/nested/file1.csv", "data/nested/file2.csv", "data/file3.csv"], + "data", + ), + ( + "single path", + ["data/file.csv"], + "data", + ), + ( + "root level files", + ["file1.csv", "file2.csv"], + "", + ), + ( + "different top-level directories", + ["data1/file1.csv", "data2/file2.csv"], + "", + ), + ( + "empty paths array", + [], + None, + ), + ( + "some paths are remote", + ["https://example.com/table.csv", "data/table.csv"], + "data", + ), + ( + "all paths are remote", + [ + "https://example.com/table1.csv", + "https://example.com/table2.csv", + ], + None, + ), + ], + ) + def test_get_common_local_basepath(self, description, paths, expected): + result = get_common_local_basepath(paths) + if expected is None: + assert result is None + else: + assert result == expected diff --git a/dataset/fairspec_dataset/actions/dataset/merge.py b/dataset/fairspec_dataset/actions/dataset/merge.py new file mode 100644 index 0000000..ae75ebf --- /dev/null +++ b/dataset/fairspec_dataset/actions/dataset/merge.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from fairspec_metadata import load_dataset_descriptor +from fairspec_metadata import Dataset + + +def merge_datasets( + *, + system_dataset: Dataset, + user_dataset_path: str | None = None, +) -> Dataset: + system = system_dataset.model_dump(by_alias=True, exclude_none=True) + + user_dataset = ( + load_dataset_descriptor(user_dataset_path).model_dump( + by_alias=True, exclude_none=True + ) + if user_dataset_path + else None + ) + + merged = {**system, **user_dataset} if user_dataset else {**system} + return Dataset(**merged) diff --git a/dataset/fairspec_dataset/actions/file/copy.py b/dataset/fairspec_dataset/actions/file/copy.py new file mode 100644 index 0000000..1616940 --- /dev/null +++ b/dataset/fairspec_dataset/actions/file/copy.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +from fairspec_dataset.actions.stream.load import load_file_stream +from fairspec_dataset.actions.stream.save import save_file_stream + + +def copy_file( + *, + source_path: str, + target_path: str, + max_bytes: int | None = None, +) -> None: + stream = load_file_stream(source_path, max_bytes=max_bytes) + save_file_stream(stream, path=target_path) diff --git a/dataset/fairspec_dataset/actions/file/copy_spec.py b/dataset/fairspec_dataset/actions/file/copy_spec.py new file mode 100644 index 0000000..f8462e8 --- /dev/null +++ b/dataset/fairspec_dataset/actions/file/copy_spec.py @@ -0,0 +1,69 @@ +import os + +from fairspec_dataset.actions.file.temp import write_temp_file + +from .copy import copy_file + + +class TestCopyFile: + def test_copies_file(self, tmp_path): + source = write_temp_file("test content") + target = str(tmp_path / "target.txt") + copy_file(source_path=source, target_path=target) + assert os.path.exists(target) + with open(target, encoding="utf-8") as f: + assert f.read() == "test content" + + def test_copies_exact_content(self, tmp_path): + content = "Hello, World! This is a test file." + source = write_temp_file(content) + target = str(tmp_path / "copy.txt") + copy_file(source_path=source, target_path=target) + with open(target, encoding="utf-8") as f: + assert f.read() == content + + def test_copies_binary_file(self, tmp_path): + binary_data = bytes([0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10]) + source = write_temp_file(binary_data) + target = str(tmp_path / "binary.bin") + copy_file(source_path=source, target_path=target) + with open(target, "rb") as f: + assert f.read() == binary_data + + def test_copies_empty_file(self, tmp_path): + source = write_temp_file("") + target = str(tmp_path / "empty.txt") + copy_file(source_path=source, target_path=target) + with open(target, encoding="utf-8") as f: + assert f.read() == "" + + def test_copies_large_file(self, tmp_path): + content = "x" * 100000 + source = write_temp_file(content) + target = str(tmp_path / "large.txt") + copy_file(source_path=source, target_path=target) + with open(target, encoding="utf-8") as f: + assert f.read() == content + + def test_copies_special_characters(self, tmp_path): + content = "Special characters: é, ñ, ü, ö, à, 中文, 日本語" + source = write_temp_file(content) + target = str(tmp_path / "special.txt") + copy_file(source_path=source, target_path=target) + with open(target, encoding="utf-8") as f: + assert f.read() == content + + def test_copies_to_nested_directory(self, tmp_path): + source = write_temp_file("nested content") + target = str(tmp_path / "nested" / "dir" / "file.txt") + copy_file(source_path=source, target_path=target) + with open(target, encoding="utf-8") as f: + assert f.read() == "nested content" + + def test_copies_with_newlines(self, tmp_path): + content = "Line 1\nLine 2\nLine 3\n" + source = write_temp_file(content) + target = str(tmp_path / "multiline.txt") + copy_file(source_path=source, target_path=target) + with open(target, encoding="utf-8") as f: + assert f.read() == content diff --git a/dataset/fairspec_dataset/actions/file/describe.py b/dataset/fairspec_dataset/actions/file/describe.py new file mode 100644 index 0000000..75df67d --- /dev/null +++ b/dataset/fairspec_dataset/actions/file/describe.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from fairspec_metadata import Resource + +from fairspec_dataset.models.file import FileDescription + +from .infer import infer_bytes, infer_integrity, infer_textual +from .prefetch import prefetch_file + + +def describe_file( + path: str, + *, + hash_type: str = "sha256", +) -> FileDescription: + local_path = prefetch_file(path) + resource = Resource(data=local_path) + + return FileDescription( + bytes=infer_bytes(resource), + textual=infer_textual(resource), + integrity=infer_integrity(resource, hash_type=hash_type), + ) diff --git a/dataset/fairspec_dataset/actions/file/fixtures/generated/TestPrefetchFiles.test_prefetches_remote_file.yaml b/dataset/fairspec_dataset/actions/file/fixtures/generated/TestPrefetchFiles.test_prefetches_remote_file.yaml new file mode 100644 index 0000000..b7b1369 --- /dev/null +++ b/dataset/fairspec_dataset/actions/file/fixtures/generated/TestPrefetchFiles.test_prefetches_remote_file.yaml @@ -0,0 +1,68 @@ +interactions: +- request: + body: null + headers: + Connection: + - close + Host: + - raw.githubusercontent.com + User-Agent: + - Python-urllib/3.12 + method: GET + uri: https://raw.githubusercontent.com/fairspec/fairspec-typescript/refs/heads/main/table/plugins/csv/actions/table/fixtures/table.csv + response: + body: + string: "id,name\n1,english\n2,\u4E2D\u6587\n" + headers: + Accept-Ranges: + - bytes + Access-Control-Allow-Origin: + - '*' + Cache-Control: + - max-age=300 + Connection: + - close + Content-Length: + - '27' + Content-Security-Policy: + - default-src 'none'; style-src 'unsafe-inline'; sandbox + Content-Type: + - text/plain; charset=utf-8 + Cross-Origin-Resource-Policy: + - cross-origin + Date: + - Tue, 10 Feb 2026 15:25:06 GMT + ETag: + - '"f32e41a0928646326054d6fac4ec63789daf58505efb50237c672db52692e86e"' + Expires: + - Tue, 10 Feb 2026 15:30:06 GMT + Source-Age: + - '0' + Strict-Transport-Security: + - max-age=31536000 + Vary: + - Authorization,Accept-Encoding + Via: + - 1.1 varnish + X-Cache: + - MISS + X-Cache-Hits: + - '0' + X-Content-Type-Options: + - nosniff + X-Fastly-Request-ID: + - 53bfbb3d5d4a7589325b5a5e917af77c465ca0d7 + X-Frame-Options: + - deny + X-GitHub-Request-Id: + - 9622:3B543:C6F20F:DB1A96:698B4DD1 + X-Served-By: + - cache-lis1490031-LIS + X-Timer: + - S1770737106.006396,VS0,VE190 + X-XSS-Protection: + - 1; mode=block + status: + code: 200 + message: OK +version: 1 diff --git a/dataset/fairspec_dataset/actions/file/fixtures/generated/TestPrefetchFiles.test_prefetches_remote_file_with_max_bytes.yaml b/dataset/fairspec_dataset/actions/file/fixtures/generated/TestPrefetchFiles.test_prefetches_remote_file_with_max_bytes.yaml new file mode 100644 index 0000000..989a2c5 --- /dev/null +++ b/dataset/fairspec_dataset/actions/file/fixtures/generated/TestPrefetchFiles.test_prefetches_remote_file_with_max_bytes.yaml @@ -0,0 +1,68 @@ +interactions: +- request: + body: null + headers: + Connection: + - close + Host: + - raw.githubusercontent.com + User-Agent: + - Python-urllib/3.12 + method: GET + uri: https://raw.githubusercontent.com/fairspec/fairspec-typescript/refs/heads/main/table/plugins/csv/actions/table/fixtures/table.csv + response: + body: + string: "id,name\n1,english\n2,\u4E2D\u6587\n" + headers: + Accept-Ranges: + - bytes + Access-Control-Allow-Origin: + - '*' + Cache-Control: + - max-age=300 + Connection: + - close + Content-Length: + - '27' + Content-Security-Policy: + - default-src 'none'; style-src 'unsafe-inline'; sandbox + Content-Type: + - text/plain; charset=utf-8 + Cross-Origin-Resource-Policy: + - cross-origin + Date: + - Tue, 10 Feb 2026 15:25:06 GMT + ETag: + - '"f32e41a0928646326054d6fac4ec63789daf58505efb50237c672db52692e86e"' + Expires: + - Tue, 10 Feb 2026 15:30:06 GMT + Source-Age: + - '0' + Strict-Transport-Security: + - max-age=31536000 + Vary: + - Authorization,Accept-Encoding + Via: + - 1.1 varnish + X-Cache: + - HIT + X-Cache-Hits: + - '1' + X-Content-Type-Options: + - nosniff + X-Fastly-Request-ID: + - 4a03a62d3828c8347f86ca6cc586a7ad070d6da8 + X-Frame-Options: + - deny + X-GitHub-Request-Id: + - 9622:3B543:C6F20F:DB1A96:698B4DD1 + X-Served-By: + - cache-lis1490027-LIS + X-Timer: + - S1770737107.574729,VS0,VE2 + X-XSS-Protection: + - 1; mode=block + status: + code: 200 + message: OK +version: 1 diff --git a/dataset/fairspec_dataset/actions/file/infer.py b/dataset/fairspec_dataset/actions/file/infer.py new file mode 100644 index 0000000..ff5cb40 --- /dev/null +++ b/dataset/fairspec_dataset/actions/file/infer.py @@ -0,0 +1,96 @@ +from __future__ import annotations + +import hashlib +import os +from typing import TYPE_CHECKING + +from charset_normalizer import from_bytes + +from fairspec_metadata import Integrity, get_data_first_path +from fairspec_metadata import IntegrityType + +from fairspec_dataset.actions.file.load import load_file +from fairspec_dataset.actions.file.prefetch import prefetch_files +from fairspec_dataset.actions.stream.concat import concat_file_streams +from fairspec_dataset.actions.stream.load import load_file_stream + +if TYPE_CHECKING: + from fairspec_metadata import Resource + + +def infer_textual( + resource: Resource, + *, + sample_bytes: int = 10_000, +) -> bool: + first_path = get_data_first_path(resource) + if not first_path: + return False + + buffer = load_file(first_path, max_bytes=sample_bytes) + + if len(buffer) == 0: + return True + + if _is_binary(buffer): + return False + + try: + buffer.decode("utf-8") + return True + except UnicodeDecodeError: + pass + + results = from_bytes(buffer) + best = results.best() + if best is not None: + encoding = best.encoding.lower() + return encoding in ("utf-8", "ascii") + + return False + + +def infer_integrity( + resource: Resource, + *, + hash_type: str = "sha256", +) -> Integrity | None: + hash_value = infer_hash(resource, hash_type=hash_type) + + if not hash_value: + return None + + return Integrity(type=IntegrityType(hash_type), hash=hash_value) + + +def infer_hash( + resource: Resource, + *, + hash_type: str = "sha256", +) -> str: + local_paths = prefetch_files(resource) + + if not local_paths: + return "" + + streams = [load_file_stream(path) for path in local_paths] + stream = concat_file_streams(streams) + + h = hashlib.new(hash_type) + h.update(stream.read()) + return h.hexdigest() + + +def infer_bytes(resource: Resource) -> int: + local_paths = prefetch_files(resource) + + total = 0 + for local_path in local_paths: + total += os.stat(local_path).st_size + + return total + + +def _is_binary(data: bytes) -> bool: + control_chars = set(range(0, 8)) | set(range(14, 32)) + return any(byte in control_chars for byte in data) diff --git a/dataset/fairspec_dataset/actions/file/infer_spec.py b/dataset/fairspec_dataset/actions/file/infer_spec.py new file mode 100644 index 0000000..da38405 --- /dev/null +++ b/dataset/fairspec_dataset/actions/file/infer_spec.py @@ -0,0 +1,174 @@ +from fairspec_metadata import Resource + +from fairspec_dataset.actions.file.temp import write_temp_file + +from .infer import infer_bytes, infer_hash, infer_textual + + +class TestInferHash: + def test_computes_sha256_hash_by_default(self): + path = write_temp_file("Hello, World!") + result = infer_hash(Resource(data=path)) + assert len(result) == 64 + + def test_computes_md5_hash(self): + path = write_temp_file("Hello, World!") + result = infer_hash(Resource(data=path), hash_type="md5") + assert len(result) == 32 + + def test_computes_sha1_hash(self): + path = write_temp_file("Hello, World!") + result = infer_hash(Resource(data=path), hash_type="sha1") + assert len(result) == 40 + + def test_computes_sha512_hash(self): + path = write_temp_file("Hello, World!") + result = infer_hash(Resource(data=path), hash_type="sha512") + assert len(result) == 128 + + def test_consistent_hashes_for_same_content(self): + path = write_temp_file("Hello, World!") + result1 = infer_hash(Resource(data=path)) + result2 = infer_hash(Resource(data=path)) + assert result1 == result2 + + +class TestInferBytes: + def test_returns_file_size(self): + path = write_temp_file("Hello, World!") + result = infer_bytes(Resource(data=path)) + assert result == 13 + + def test_handles_empty_files(self): + path = write_temp_file("") + result = infer_bytes(Resource(data=path)) + assert result == 0 + + def test_handles_larger_files(self): + path = write_temp_file("x" * 10000) + result = infer_bytes(Resource(data=path)) + assert result == 10000 + + def test_handles_binary_data(self): + path = write_temp_file(bytes([0xFF, 0xD8, 0xFF, 0xE0])) + result = infer_bytes(Resource(data=path)) + assert result == 4 + + +class TestInferTextual: + def test_returns_true_for_utf8_text(self): + path = write_temp_file("Hello, World! This is UTF-8 text.") + assert infer_textual(Resource(data=path)) is True + + def test_returns_false_for_binary(self): + path = write_temp_file(bytes([0xFF, 0xD8, 0xFF, 0xE0, 0x00])) + assert infer_textual(Resource(data=path)) is False + + def test_uses_custom_sample_bytes(self): + path = write_temp_file("This is a test file with UTF-8 content.") + assert infer_textual(Resource(data=path), sample_bytes=20) is True + + def test_handles_large_text_files(self): + path = write_temp_file("Hello World! " * 1000) + assert infer_textual(Resource(data=path)) is True + + def test_handles_empty_files(self): + path = write_temp_file("") + assert infer_textual(Resource(data=path)) is True + + def test_handles_special_characters(self): + path = write_temp_file("Special: é, ñ, ü, ö, à") + assert infer_textual(Resource(data=path)) is True + + def test_returns_true_for_ascii(self): + path = write_temp_file(b"Simple ASCII text only") + assert infer_textual(Resource(data=path)) is True + + def test_returns_true_for_utf8_with_unicode(self): + path = write_temp_file("Héllo, Wörld! 你好 مرحبا 🌍") + assert infer_textual(Resource(data=path)) is True + + def test_returns_true_for_cyrillic(self): + path = write_temp_file("Привет мир") + assert infer_textual(Resource(data=path)) is True + + def test_returns_true_for_japanese(self): + path = write_temp_file("こんにちは世界") + assert infer_textual(Resource(data=path)) is True + + def test_returns_true_for_arabic(self): + path = write_temp_file("مرحبا بالعالم") + assert infer_textual(Resource(data=path)) is True + + def test_returns_false_for_latin1(self): + buffer = bytes( + [ + 0x43, + 0x61, + 0x66, + 0xE9, + 0x20, + 0x72, + 0xE9, + 0x73, + 0x75, + 0x6D, + 0xE9, + 0x20, + 0x6E, + 0x61, + 0xEF, + 0x76, + 0x65, + 0x20, + 0xE0, + 0x20, + 0x50, + 0x61, + 0x72, + 0x69, + 0x73, + 0x2E, + 0x20, + 0xC7, + 0x61, + 0x20, + 0x63, + 0x27, + 0x65, + 0x73, + 0x74, + 0x20, + 0x62, + 0x6F, + 0x6E, + 0x21, + ] + ) + path = write_temp_file(buffer) + assert infer_textual(Resource(data=path)) is False + + def test_returns_false_for_windows_1252(self): + buffer = bytes( + [ + 0x43, + 0x61, + 0x66, + 0xE9, + 0x20, + 0x6E, + 0x61, + 0xEF, + 0x76, + 0x65, + 0x20, + 0x72, + 0xE9, + 0x73, + 0x75, + 0x6D, + 0xE9, + ] + ) + path = write_temp_file(buffer) + assert infer_textual(Resource(data=path)) is False diff --git a/dataset/fairspec_dataset/actions/file/load.py b/dataset/fairspec_dataset/actions/file/load.py new file mode 100644 index 0000000..d47e5cd --- /dev/null +++ b/dataset/fairspec_dataset/actions/file/load.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from fairspec_dataset.actions.stream.load import load_file_stream + + +def load_file(path: str, *, max_bytes: int | None = None) -> bytes: + stream = load_file_stream(path, max_bytes=max_bytes) + return stream.read() diff --git a/dataset/fairspec_dataset/actions/file/path.py b/dataset/fairspec_dataset/actions/file/path.py new file mode 100644 index 0000000..ccbd750 --- /dev/null +++ b/dataset/fairspec_dataset/actions/file/path.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +import os + + +def get_is_local_path_exist(path: str) -> bool: + return os.path.exists(path) + + +def assert_local_path_vacant(path: str) -> None: + if os.path.exists(path): + raise FileExistsError(f'Path "{path}" already exists') diff --git a/dataset/fairspec_dataset/actions/file/prefetch.py b/dataset/fairspec_dataset/actions/file/prefetch.py new file mode 100644 index 0000000..66a5705 --- /dev/null +++ b/dataset/fairspec_dataset/actions/file/prefetch.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata import get_data_paths, get_is_remote_path + +from fairspec_dataset.actions.file.copy import copy_file +from fairspec_dataset.actions.file.temp import get_temp_file_path + +if TYPE_CHECKING: + from fairspec_metadata import Resource + + +def prefetch_files( + resource: Resource, + *, + max_bytes: int | None = None, +) -> list[str]: + paths = get_data_paths(resource) + if not paths: + return [] + return [prefetch_file(path, max_bytes=max_bytes) for path in paths] + + +def prefetch_file( + path: str, + *, + max_bytes: int | None = None, +) -> str: + if not get_is_remote_path(path): + return path + + new_path = get_temp_file_path() + copy_file(source_path=path, target_path=new_path, max_bytes=max_bytes) + return new_path diff --git a/dataset/fairspec_dataset/actions/file/prefetch_spec.py b/dataset/fairspec_dataset/actions/file/prefetch_spec.py new file mode 100644 index 0000000..a5b4bf8 --- /dev/null +++ b/dataset/fairspec_dataset/actions/file/prefetch_spec.py @@ -0,0 +1,38 @@ +import os + +import pytest +from fairspec_metadata import Resource + +from .prefetch import prefetch_files + +REMOTE_URL = "https://raw.githubusercontent.com/fairspec/fairspec-typescript/refs/heads/main/table/plugins/csv/actions/table/fixtures/table.csv" + + +@pytest.mark.vcr +class TestPrefetchFiles: + def test_prefetches_remote_file(self): + resource = Resource(data=REMOTE_URL) + result = prefetch_files(resource) + assert len(result) == 1 + path = result[0] + assert os.path.exists(path) + stats = os.stat(path) + assert stats.st_size == 27 + with open(path, encoding="utf-8") as f: + content = f.read() + assert "id,name" in content + + def test_prefetches_remote_file_with_max_bytes(self): + resource = Resource(data=REMOTE_URL) + max_bytes = 18 + result = prefetch_files(resource, max_bytes=max_bytes) + assert len(result) == 1 + path = result[0] + assert os.path.exists(path) + stats = os.stat(path) + assert stats.st_size == max_bytes + with open(path, encoding="utf-8") as f: + content = f.read() + assert "id,name" in content + assert "1,english" in content + assert "中文" not in content diff --git a/dataset/fairspec_dataset/actions/file/save.py b/dataset/fairspec_dataset/actions/file/save.py new file mode 100644 index 0000000..668667c --- /dev/null +++ b/dataset/fairspec_dataset/actions/file/save.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from io import BytesIO + +from fairspec_dataset.actions.stream.save import save_file_stream + + +def save_file(path: str, data: bytes, *, overwrite: bool = False) -> None: + save_file_stream(BytesIO(data), path=path, overwrite=overwrite) diff --git a/dataset/fairspec_dataset/actions/file/temp.py b/dataset/fairspec_dataset/actions/file/temp.py new file mode 100644 index 0000000..c55ae9f --- /dev/null +++ b/dataset/fairspec_dataset/actions/file/temp.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +import atexit +import os +import tempfile + + +def write_temp_file( + content: str | bytes, + *, + persist: bool = False, + filename: str | None = None, + format: str | None = None, +) -> str: + path = get_temp_file_path(persist=persist, filename=filename, format=format) + mode = "wb" if isinstance(content, bytes) else "w" + encoding = None if isinstance(content, bytes) else "utf-8" + newline = None if isinstance(content, bytes) else "" + with open(path, mode, encoding=encoding, newline=newline) as f: + f.write(content) + return path + + +def get_temp_file_path( + *, + persist: bool = False, + filename: str | None = None, + format: str | None = None, +) -> str: + if filename: + dir_path = tempfile.mkdtemp() + path = os.path.join(dir_path, filename) + else: + suffix = f".{format}" if format else "" + fd, path = tempfile.mkstemp(suffix=suffix) + os.close(fd) + os.unlink(path) + + if not persist: + atexit.register(_cleanup_file, path) + + return path + + +def _cleanup_file(path: str) -> None: + try: + os.unlink(path) + except OSError: + pass diff --git a/dataset/fairspec_dataset/actions/file/validate.py b/dataset/fairspec_dataset/actions/file/validate.py new file mode 100644 index 0000000..d01e529 --- /dev/null +++ b/dataset/fairspec_dataset/actions/file/validate.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata import ( + IntegrityError, + Report, + TextualError, + create_report, +) +from fairspec_metadata import FairspecError + +from .infer import infer_hash, infer_textual + +if TYPE_CHECKING: + from fairspec_metadata import Resource + + +def validate_file(resource: Resource) -> Report: + errors: list[FairspecError] = [] + + if resource.textual: + actual_textual = infer_textual(resource) + + if not actual_textual: + errors.append(TextualError(type="file/textual")) + + integrity = resource.integrity + if integrity: + expected_hash = integrity.hash + actual_hash = infer_hash(resource, hash_type=integrity.type) + + if actual_hash != expected_hash: + errors.append( + IntegrityError( + type="file/integrity", + hashType=integrity.type, + expectedHash=expected_hash, + actualHash=actual_hash or "", + ) + ) + + return create_report(errors) diff --git a/dataset/fairspec_dataset/actions/file/validate_spec.py b/dataset/fairspec_dataset/actions/file/validate_spec.py new file mode 100644 index 0000000..1add806 --- /dev/null +++ b/dataset/fairspec_dataset/actions/file/validate_spec.py @@ -0,0 +1,215 @@ +from fairspec_metadata import Integrity, Resource +from fairspec_metadata import IntegrityType + +from fairspec_dataset.actions.file.temp import write_temp_file + +from .infer import infer_hash +from .validate import validate_file + + +class TestValidateFile: + def test_validates_textual_for_utf8(self): + path = write_temp_file("Hello, World!") + report = validate_file(Resource(data=path, textual=True)) + assert report.valid is True + assert report.errors == [] + + def test_validates_textual_for_ascii(self): + path = write_temp_file(b"Simple ASCII text only") + report = validate_file(Resource(data=path, textual=True)) + assert report.valid is True + assert report.errors == [] + + def test_returns_error_when_textual_expected_but_binary(self): + path = write_temp_file(bytes([0xFF, 0xD8, 0xFF, 0xE0, 0x00])) + report = validate_file(Resource(data=path, textual=True)) + assert report.valid is False + assert len(report.errors) == 1 + assert report.errors[0].type == "file/textual" + + def test_returns_error_when_textual_expected_but_latin1(self): + buffer = bytes( + [ + 0x43, + 0x61, + 0x66, + 0xE9, + 0x20, + 0x72, + 0xE9, + 0x73, + 0x75, + 0x6D, + 0xE9, + 0x20, + 0x6E, + 0x61, + 0xEF, + 0x76, + 0x65, + 0x20, + 0xE0, + 0x20, + 0x50, + 0x61, + 0x72, + 0x69, + 0x73, + 0x2E, + 0x20, + 0xC7, + 0x61, + 0x20, + 0x63, + 0x27, + 0x65, + 0x73, + 0x74, + 0x20, + 0x62, + 0x6F, + 0x6E, + 0x21, + ] + ) + path = write_temp_file(buffer) + report = validate_file(Resource(data=path, textual=True)) + assert report.valid is False + assert len(report.errors) == 1 + assert report.errors[0].type == "file/textual" + + def test_validates_integrity_md5(self): + path = write_temp_file("Hello, World!") + actual_hash = infer_hash(Resource(data=path), hash_type="md5") + report = validate_file( + Resource( + data=path, + integrity=Integrity(type=IntegrityType.md5, hash=actual_hash), + ) + ) + assert report.valid is True + assert report.errors == [] + + def test_returns_error_when_integrity_mismatch(self): + path = write_temp_file("Hello, World!") + actual_hash = infer_hash(Resource(data=path), hash_type="md5") + report = validate_file( + Resource( + data=path, + integrity=Integrity(type=IntegrityType.md5, hash="wronghash"), + ) + ) + assert report.valid is False + assert len(report.errors) == 1 + assert report.errors[0].type == "file/integrity" + assert report.errors[0].hashType == "md5" # type: ignore[union-attr] + assert report.errors[0].expectedHash == "wronghash" # type: ignore[union-attr] + assert report.errors[0].actualHash == actual_hash # type: ignore[union-attr] + + def test_validates_sha256_integrity(self): + path = write_temp_file("Hello, World!") + actual_hash = infer_hash(Resource(data=path), hash_type="sha256") + report = validate_file( + Resource( + data=path, + integrity=Integrity(type=IntegrityType.sha256, hash=actual_hash), + ) + ) + assert report.valid is True + assert report.errors == [] + + def test_validates_sha1_integrity(self): + path = write_temp_file("Hello, World!") + actual_hash = infer_hash(Resource(data=path), hash_type="sha1") + report = validate_file( + Resource( + data=path, + integrity=Integrity(type=IntegrityType.sha1, hash=actual_hash), + ) + ) + assert report.valid is True + assert report.errors == [] + + def test_validates_sha512_integrity(self): + path = write_temp_file("Hello, World!") + actual_hash = infer_hash(Resource(data=path), hash_type="sha512") + report = validate_file( + Resource( + data=path, + integrity=Integrity(type=IntegrityType.sha512, hash=actual_hash), + ) + ) + assert report.valid is True + assert report.errors == [] + + def test_validates_both_textual_and_integrity(self): + path = write_temp_file("Hello, World!") + actual_hash = infer_hash(Resource(data=path), hash_type="md5") + report = validate_file( + Resource( + data=path, + textual=True, + integrity=Integrity(type=IntegrityType.md5, hash=actual_hash), + ) + ) + assert report.valid is True + assert report.errors == [] + + def test_returns_multiple_errors(self): + path = write_temp_file(bytes([0xFF, 0xD8, 0xFF, 0xE0, 0x00])) + report = validate_file( + Resource( + data=path, + textual=True, + integrity=Integrity(type=IntegrityType.md5, hash="wronghash"), + ) + ) + assert report.valid is False + assert len(report.errors) == 2 + assert report.errors[0].type == "file/textual" + assert report.errors[1].type == "file/integrity" + + def test_returns_error_only_textual_mismatch(self): + path = write_temp_file(bytes([0xFF, 0xD8, 0xFF, 0xE0, 0x00])) + actual_hash = infer_hash(Resource(data=path), hash_type="md5") + report = validate_file( + Resource( + data=path, + textual=True, + integrity=Integrity(type=IntegrityType.md5, hash=actual_hash), + ) + ) + assert report.valid is False + assert len(report.errors) == 1 + assert report.errors[0].type == "file/textual" + + def test_returns_error_only_integrity_mismatch(self): + path = write_temp_file("Hello, World!") + report = validate_file( + Resource( + data=path, + textual=True, + integrity=Integrity(type=IntegrityType.md5, hash="wronghash"), + ) + ) + assert report.valid is False + assert len(report.errors) == 1 + assert report.errors[0].type == "file/integrity" + + def test_handles_empty_file(self): + path = write_temp_file("") + actual_hash = infer_hash(Resource(data=path), hash_type="sha256") + report = validate_file( + Resource( + data=path, + integrity=Integrity(type=IntegrityType.sha256, hash=actual_hash), + ) + ) + assert report.valid is True + assert report.errors == [] + + def test_validates_textual_with_special_characters(self): + path = write_temp_file("Special: é, ñ, ü, ö, à") + report = validate_file(Resource(data=path, textual=True)) + assert report.valid is True + assert report.errors == [] diff --git a/dataset/fairspec_dataset/actions/folder/create.py b/dataset/fairspec_dataset/actions/folder/create.py new file mode 100644 index 0000000..3527677 --- /dev/null +++ b/dataset/fairspec_dataset/actions/folder/create.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +import os + + +def create_folder(path: str) -> None: + os.makedirs(path, exist_ok=True) diff --git a/dataset/fairspec_dataset/actions/folder/create_spec.py b/dataset/fairspec_dataset/actions/folder/create_spec.py new file mode 100644 index 0000000..52652e6 --- /dev/null +++ b/dataset/fairspec_dataset/actions/folder/create_spec.py @@ -0,0 +1,51 @@ +import os + +from .create import create_folder + + +class TestCreateFolder: + def test_creates_simple_folder(self, tmp_path): + path = str(tmp_path / "test-folder") + create_folder(path) + assert os.path.isdir(path) + + def test_creates_nested_folders(self, tmp_path): + path = str(tmp_path / "parent" / "child" / "grandchild") + create_folder(path) + assert os.path.isdir(path) + assert os.path.isdir(str(tmp_path / "parent")) + assert os.path.isdir(str(tmp_path / "parent" / "child")) + + def test_no_error_when_folder_exists(self, tmp_path): + path = str(tmp_path / "existing") + create_folder(path) + create_folder(path) + assert os.path.isdir(path) + + def test_creates_deeply_nested_directories(self, tmp_path): + path = str(tmp_path / "l1" / "l2" / "l3" / "l4" / "l5") + create_folder(path) + assert os.path.isdir(path) + + def test_creates_folder_with_special_characters(self, tmp_path): + path = str(tmp_path / "folder-with_special.chars") + create_folder(path) + assert os.path.isdir(path) + + def test_creates_sibling_folders(self, tmp_path): + for name in ("folder1", "folder2", "folder3"): + create_folder(str(tmp_path / name)) + for name in ("folder1", "folder2", "folder3"): + assert os.path.isdir(str(tmp_path / name)) + + def test_creates_complex_directory_structure(self, tmp_path): + dirs = [ + str(tmp_path / "project" / "src" / "components"), + str(tmp_path / "project" / "src" / "utils"), + str(tmp_path / "project" / "tests" / "unit"), + str(tmp_path / "project" / "tests" / "integration"), + ] + for d in dirs: + create_folder(d) + for d in dirs: + assert os.path.isdir(d) diff --git a/dataset/fairspec_dataset/actions/folder/temp.py b/dataset/fairspec_dataset/actions/folder/temp.py new file mode 100644 index 0000000..ac1c548 --- /dev/null +++ b/dataset/fairspec_dataset/actions/folder/temp.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +import atexit +import shutil +import tempfile + + +def get_temp_folder_path(*, persist: bool = False) -> str: + path = tempfile.mkdtemp() + + if not persist: + atexit.register(_cleanup_dir, path) + + return path + + +def _cleanup_dir(path: str) -> None: + try: + shutil.rmtree(path) + except OSError: + pass diff --git a/dataset/fairspec_dataset/actions/resource/save.py b/dataset/fairspec_dataset/actions/resource/save.py new file mode 100644 index 0000000..9517791 --- /dev/null +++ b/dataset/fairspec_dataset/actions/resource/save.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +import re +from typing import TYPE_CHECKING, Callable + +from fairspec_metadata import ( + denormalize_path, + get_file_name, + get_is_remote_path, +) + +if TYPE_CHECKING: + from fairspec_metadata import Descriptor + from fairspec_metadata import Resource + + +class SaveFileProps: + def __init__( + self, + *, + property_name: str, + property_index: int, + normalized_path: str, + denormalized_path: str, + ) -> None: + self.property_name = property_name + self.property_index = property_index + self.normalized_path = normalized_path + self.denormalized_path = denormalized_path + + +SaveFileCallback = Callable[[SaveFileProps], str] + + +def save_resource_files( + resource: Resource, + *, + save_file: SaveFileCallback, + basepath: str | None = None, + with_remote: bool = False, + without_folders: bool = False, +) -> Descriptor: + resource = resource.model_copy(deep=True) + dedup_indexes: dict[str, int] = {} + + def _save_file(path: str, name: str, index: int) -> str: + is_remote = get_is_remote_path(path) + + denormalized_path = denormalize_path(path, basepath=basepath) + normalized_path = path + + if is_remote: + if not with_remote: + return path + filename = get_file_name(path) + if not filename: + return path + denormalized_path = filename + elif without_folders: + denormalized_path = denormalized_path.replace("/", "-") + + dedup_index = dedup_indexes.get(denormalized_path, 0) + dedup_indexes[denormalized_path] = dedup_index + 1 + + if dedup_index: + denormalized_path = re.sub( + r"^(.*?)([^/]+?)(\.[^/]+(?:\.[^/]+)*)$", + rf"\1\2-{dedup_index}\3", + denormalized_path, + ) + + return save_file( + SaveFileProps( + property_name=name, + property_index=index, + normalized_path=normalized_path, + denormalized_path=denormalized_path, + ) + ) + + if isinstance(resource.data, str): + resource.data = _save_file(resource.data, "data", 0) + + if isinstance(resource.data, list): + for i, item in enumerate(resource.data): + if isinstance(item, str): + resource.data[i] = _save_file(item, "data", i) + + for name in ("dataSchema", "tableSchema"): + prop = getattr(resource, name, None) + if isinstance(prop, str): + setattr(resource, name, _save_file(prop, name, 0)) + + return resource.model_dump(by_alias=True, exclude_none=True) diff --git a/dataset/fairspec_dataset/actions/resource/save_spec.py b/dataset/fairspec_dataset/actions/resource/save_spec.py new file mode 100644 index 0000000..96be289 --- /dev/null +++ b/dataset/fairspec_dataset/actions/resource/save_spec.py @@ -0,0 +1,137 @@ +import pytest +from fairspec_metadata import Resource + +from .save import SaveFileProps, save_resource_files + + +def _identity_save(props: SaveFileProps) -> str: + return props.denormalized_path + + +class TestSaveResourceFiles: + @pytest.mark.parametrize( + "description, basepath, resource, expected, with_remote, without_folders", + [ + ( + "local path", + "data", + { + "data": "data/table.csv", + "dataSchema": "data/data-schema.json", + "tableSchema": "data/table-schema.json", + }, + { + "data": "table.csv", + "dataSchema": "data-schema.json", + "tableSchema": "table-schema.json", + }, + False, + False, + ), + ( + "local paths", + "data", + {"data": ["data/table1.csv", "data/table2.csv"]}, + {"data": ["table1.csv", "table2.csv"]}, + False, + False, + ), + ( + "local path and remote path", + "data", + { + "data": "data/table.csv", + "tableSchema": "https://example.com/schema.json", + }, + { + "data": "table.csv", + "tableSchema": "https://example.com/schema.json", + }, + False, + False, + ), + ( + "local path and remote path using with_remote", + "data", + { + "data": "data/table.csv", + "tableSchema": "https://example.com/schema.json", + }, + { + "data": "table.csv", + "tableSchema": "schema.json", + }, + True, + False, + ), + ( + "remote paths with same filename using with_remote", + "data", + { + "data": [ + "http://example1.com/table.csv", + "http://example2.com/table.csv", + "http://example3.com/table.csv", + "http://example4.com/table.csv.zip", + "http://example5.com/table.csv.zip", + ], + }, + { + "data": [ + "table.csv", + "table-1.csv", + "table-2.csv", + "table.csv.zip", + "table-1.csv.zip", + ], + }, + True, + False, + ), + ( + "local paths in different folders", + "data", + { + "data": "data/folder1/table.csv", + "tableSchema": "data/folder2/schema.json", + }, + { + "data": "folder1/table.csv", + "tableSchema": "folder2/schema.json", + }, + False, + False, + ), + ( + "local paths in different folders using without_folders", + "data", + { + "data": "data/folder1/table.csv", + "tableSchema": "data/folder2/schema.json", + }, + { + "data": "folder1-table.csv", + "tableSchema": "folder2-schema.json", + }, + False, + True, + ), + ], + ) + def test_save_resource_files( + self, + description, + basepath, + resource, + expected, + with_remote, + without_folders, + ): + result = save_resource_files( + Resource(**resource), + basepath=basepath, + with_remote=with_remote, + without_folders=without_folders, + save_file=_identity_save, + ) + assert result == expected diff --git a/dataset/fairspec_dataset/actions/stream/concat.py b/dataset/fairspec_dataset/actions/stream/concat.py new file mode 100644 index 0000000..3e8e899 --- /dev/null +++ b/dataset/fairspec_dataset/actions/stream/concat.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from io import BytesIO +from typing import BinaryIO + + +def concat_file_streams(streams: list[BinaryIO]) -> BinaryIO: + parts: list[bytes] = [] + for stream in streams: + parts.append(stream.read()) + return BytesIO(b"".join(parts)) diff --git a/dataset/fairspec_dataset/actions/stream/concat_spec.py b/dataset/fairspec_dataset/actions/stream/concat_spec.py new file mode 100644 index 0000000..334e372 --- /dev/null +++ b/dataset/fairspec_dataset/actions/stream/concat_spec.py @@ -0,0 +1,70 @@ +from io import BytesIO + +from .concat import concat_file_streams + + +class TestConcatFileStreams: + def test_concatenates_multiple_streams_in_order(self): + stream1 = BytesIO(b"Hello, ") + stream2 = BytesIO(b"World") + stream3 = BytesIO(b"!") + + result = concat_file_streams([stream1, stream2, stream3]) + assert result.read() == b"Hello, World!" + + def test_handles_single_stream(self): + stream = BytesIO(b"Single stream content") + + result = concat_file_streams([stream]) + assert result.read() == b"Single stream content" + + def test_handles_empty_array(self): + result = concat_file_streams([]) + assert result.read() == b"" + + def test_handles_streams_with_empty_content(self): + stream1 = BytesIO(b"") + stream2 = BytesIO(b"Content") + stream3 = BytesIO(b"") + + result = concat_file_streams([stream1, stream2, stream3]) + assert result.read() == b"Content" + + def test_concatenates_streams_with_multiple_chunks(self): + stream1 = BytesIO(b"ABC") + stream2 = BytesIO(b"DEF") + + result = concat_file_streams([stream1, stream2]) + assert result.read() == b"ABCDEF" + + def test_handles_binary_data(self): + stream1 = BytesIO(bytes([0x00, 0x01, 0x02])) + stream2 = BytesIO(bytes([0x03, 0x04, 0x05])) + + result = concat_file_streams([stream1, stream2]) + assert result.read() == bytes([0x00, 0x01, 0x02, 0x03, 0x04, 0x05]) + + def test_handles_large_streams(self): + content1 = b"A" * 5000 + content2 = b"B" * 5000 + stream1 = BytesIO(content1) + stream2 = BytesIO(content2) + + result = concat_file_streams([stream1, stream2]) + data = result.read() + assert len(data) == 10000 + assert data == content1 + content2 + + def test_preserves_unicode_characters(self): + stream1 = BytesIO("Hello 世界".encode()) + stream2 = BytesIO(" مرحبا".encode()) + stream3 = BytesIO(" 🌍".encode()) + + result = concat_file_streams([stream1, stream2, stream3]) + assert result.read().decode() == "Hello 世界 مرحبا 🌍" + + def test_maintains_stream_order_with_many_streams(self): + streams = [BytesIO(str(i).encode()) for i in range(10)] + + result = concat_file_streams(streams) + assert result.read() == b"0123456789" diff --git a/dataset/fairspec_dataset/actions/stream/load.py b/dataset/fairspec_dataset/actions/stream/load.py new file mode 100644 index 0000000..779d214 --- /dev/null +++ b/dataset/fairspec_dataset/actions/stream/load.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +import urllib.request +from io import BytesIO +from typing import BinaryIO + +from fairspec_metadata import ResourceDataPath, get_is_remote_path + + +def load_file_stream( + data_path: ResourceDataPath, + *, + index: int = 0, + max_bytes: int | None = None, +) -> BinaryIO: + paths = data_path if isinstance(data_path, list) else [data_path] + + if index >= len(paths) or index < 0: + raise ValueError( + f"Cannot stream resource {paths[index] if index < len(paths) else None} at index {index}" + ) + + path = paths[index] + is_remote = get_is_remote_path(path) + + if is_remote: + return _load_remote_file_stream(path, max_bytes=max_bytes) + + return _load_local_file_stream(path, max_bytes=max_bytes) + + +def _load_local_file_stream( + path: str, + *, + max_bytes: int | None = None, +) -> BinaryIO: + if max_bytes is not None: + with open(path, "rb") as f: + data = f.read(max_bytes) + return BytesIO(data) + + return open(path, "rb") + + +def _load_remote_file_stream( + path: str, + *, + max_bytes: int | None = None, +) -> BinaryIO: + with urllib.request.urlopen(path) as response: + if max_bytes is not None: + data = response.read(max_bytes) + else: + data = response.read() + + return BytesIO(data) diff --git a/dataset/fairspec_dataset/actions/stream/load_spec.py b/dataset/fairspec_dataset/actions/stream/load_spec.py new file mode 100644 index 0000000..7abc75b --- /dev/null +++ b/dataset/fairspec_dataset/actions/stream/load_spec.py @@ -0,0 +1,69 @@ +import pytest + +from fairspec_dataset.actions.file.temp import write_temp_file + +from .load import load_file_stream + + +class TestLoadFileStream: + def test_loads_stream_from_single_local_file(self): + path = write_temp_file("Hello, World!") + stream = load_file_stream(path) + assert stream.read() == b"Hello, World!" + + def test_loads_stream_from_array_using_default_index(self): + file1 = write_temp_file("First file content") + file2 = write_temp_file("Second file content") + stream = load_file_stream([file1, file2]) + assert stream.read() == b"First file content" + + def test_loads_stream_from_array_using_specified_index(self): + file1 = write_temp_file("First file content") + file2 = write_temp_file("Second file content") + stream = load_file_stream([file1, file2], index=1) + assert stream.read() == b"Second file content" + + def test_limits_stream_to_max_bytes(self): + path = write_temp_file("This is a long content that should be truncated") + stream = load_file_stream(path, max_bytes=10) + data = stream.read() + assert data == b"This is a " + assert len(data) == 10 + + def test_raises_error_for_invalid_index(self): + path = write_temp_file("content") + with pytest.raises(ValueError, match="Cannot stream resource"): + load_file_stream([path], index=5) + + def test_raises_error_for_empty_array(self): + with pytest.raises(ValueError, match="Cannot stream resource"): + load_file_stream([], index=0) + + def test_handles_large_files(self): + content = "A" * 10000 + path = write_temp_file(content) + stream = load_file_stream(path) + data = stream.read() + assert len(data) == 10000 + assert data == content.encode() + + def test_handles_binary_content(self): + binary_data = bytes([0x00, 0x01, 0x02, 0x03, 0xFF]) + path = write_temp_file(binary_data) + stream = load_file_stream(path) + assert stream.read() == binary_data + + def test_handles_empty_files(self): + path = write_temp_file("") + stream = load_file_stream(path) + assert stream.read() == b"" + + def test_limits_bytes_correctly(self): + path = write_temp_file("0123456789ABCDEFGHIJ") + stream = load_file_stream(path, max_bytes=5) + assert stream.read() == b"01234" + + def test_handles_max_bytes_larger_than_file(self): + path = write_temp_file("Short") + stream = load_file_stream(path, max_bytes=1000) + assert stream.read() == b"Short" diff --git a/dataset/fairspec_dataset/actions/stream/save.py b/dataset/fairspec_dataset/actions/stream/save.py new file mode 100644 index 0000000..fe5fa8c --- /dev/null +++ b/dataset/fairspec_dataset/actions/stream/save.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +import os +import shutil +from typing import BinaryIO + + +def save_file_stream( + stream: BinaryIO, + *, + path: str, + overwrite: bool = False, +) -> None: + os.makedirs(os.path.dirname(path) or ".", exist_ok=True) + + if not overwrite and os.path.exists(path): + raise FileExistsError(f'Path "{path}" already exists') + + with open(path, "wb") as f: + shutil.copyfileobj(stream, f) diff --git a/dataset/fairspec_dataset/actions/stream/save_spec.py b/dataset/fairspec_dataset/actions/stream/save_spec.py new file mode 100644 index 0000000..e6e0565 --- /dev/null +++ b/dataset/fairspec_dataset/actions/stream/save_spec.py @@ -0,0 +1,73 @@ +import os +from io import BytesIO + +import pytest + +from .save import save_file_stream + + +class TestSaveFileStream: + def test_saves_stream_to_file(self, tmp_path): + path = str(tmp_path / "test.txt") + save_file_stream(BytesIO(b"Hello, World!"), path=path) + assert os.path.exists(path) + with open(path, "rb") as f: + assert f.read() == b"Hello, World!" + + def test_saves_stream_with_multiple_chunks(self, tmp_path): + path = str(tmp_path / "chunks.txt") + save_file_stream(BytesIO(b"Hello, World!"), path=path) + with open(path, "rb") as f: + assert f.read() == b"Hello, World!" + + def test_creates_nested_directories(self, tmp_path): + path = str(tmp_path / "nested" / "dir" / "file.txt") + save_file_stream(BytesIO(b"Nested content"), path=path) + with open(path, "rb") as f: + assert f.read() == b"Nested content" + + def test_raises_when_file_exists_and_overwrite_false(self, tmp_path): + path = str(tmp_path / "existing.txt") + save_file_stream(BytesIO(b"Initial content"), path=path) + with pytest.raises(FileExistsError): + save_file_stream(BytesIO(b"New content"), path=path, overwrite=False) + + def test_raises_when_file_exists_and_overwrite_not_specified(self, tmp_path): + path = str(tmp_path / "existing2.txt") + save_file_stream(BytesIO(b"Initial content"), path=path) + with pytest.raises(FileExistsError): + save_file_stream(BytesIO(b"New content"), path=path) + + def test_overwrites_when_overwrite_true(self, tmp_path): + path = str(tmp_path / "overwrite.txt") + save_file_stream(BytesIO(b"Initial content"), path=path) + save_file_stream(BytesIO(b"New content"), path=path, overwrite=True) + with open(path, "rb") as f: + assert f.read() == b"New content" + + def test_saves_binary_data(self, tmp_path): + binary_data = bytes([0x00, 0x01, 0x02, 0x03, 0xFF]) + path = str(tmp_path / "binary.bin") + save_file_stream(BytesIO(binary_data), path=path) + with open(path, "rb") as f: + assert f.read() == binary_data + + def test_saves_empty_stream(self, tmp_path): + path = str(tmp_path / "empty.txt") + save_file_stream(BytesIO(b""), path=path) + with open(path, "rb") as f: + assert f.read() == b"" + + def test_saves_large_stream(self, tmp_path): + content = b"A" * 10000 + path = str(tmp_path / "large.txt") + save_file_stream(BytesIO(content), path=path) + with open(path, "rb") as f: + assert f.read() == content + + def test_saves_unicode_characters(self, tmp_path): + content = "Unicode: 你好世界 🌍 مرحبا".encode() + path = str(tmp_path / "unicode.txt") + save_file_stream(BytesIO(content), path=path) + with open(path, "rb") as f: + assert f.read() == content diff --git a/dataset/fairspec_dataset/conftest.py b/dataset/fairspec_dataset/conftest.py new file mode 100644 index 0000000..7736f83 --- /dev/null +++ b/dataset/fairspec_dataset/conftest.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +import os + +import pytest + + +@pytest.fixture(scope="module") +def vcr_cassette_dir(request): + return os.path.join(os.path.dirname(request.fspath), "fixtures", "generated") diff --git a/dataset/fairspec_dataset/models/__init__.py b/dataset/fairspec_dataset/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dataset/fairspec_dataset/models/dataset.py b/dataset/fairspec_dataset/models/dataset.py new file mode 100644 index 0000000..9763fab --- /dev/null +++ b/dataset/fairspec_dataset/models/dataset.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +from typing import Required, TypedDict + +from fairspec_metadata.models.base import FairspecModel + + +class SaveDatasetOptions(TypedDict, total=False): + target: Required[str] + with_remote: bool + + +class SaveDatasetResult(FairspecModel): + path: str | None = None diff --git a/dataset/fairspec_dataset/models/file.py b/dataset/fairspec_dataset/models/file.py new file mode 100644 index 0000000..86f436d --- /dev/null +++ b/dataset/fairspec_dataset/models/file.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from fairspec_metadata.models.base import FairspecModel + +from fairspec_metadata import Integrity + + +class FileDescription(FairspecModel): + bytes: int + textual: bool + integrity: Integrity | None diff --git a/dataset/fairspec_dataset/models/file_dialect.py b/dataset/fairspec_dataset/models/file_dialect.py new file mode 100644 index 0000000..9c7ce9b --- /dev/null +++ b/dataset/fairspec_dataset/models/file_dialect.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from typing import TypedDict + + +class InferFileDialectOptions(TypedDict, total=False): + sample_bytes: int diff --git a/dataset/fairspec_dataset/plugin.py b/dataset/fairspec_dataset/plugin.py new file mode 100644 index 0000000..06ac479 --- /dev/null +++ b/dataset/fairspec_dataset/plugin.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack + +from fairspec_metadata.plugin import MetadataPlugin + +if TYPE_CHECKING: + from fairspec_metadata import Dataset + from fairspec_metadata import Descriptor + from fairspec_metadata import FileDialect + from fairspec_metadata import Resource + + from .models.dataset import SaveDatasetOptions, SaveDatasetResult + from .models.file_dialect import InferFileDialectOptions + + +class DatasetPlugin(MetadataPlugin): + def load_dataset(self, source: str) -> Descriptor | None: + return None + + def save_dataset( + self, dataset: Dataset, **options: Unpack[SaveDatasetOptions] + ) -> SaveDatasetResult | None: + return None + + def infer_file_dialect( + self, + resource: Resource, + **options: Unpack[InferFileDialectOptions], + ) -> FileDialect | None: + return None diff --git a/dataset/fairspec_dataset/plugins/__init__.py b/dataset/fairspec_dataset/plugins/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dataset/fairspec_dataset/plugins/ckan/__init__.py b/dataset/fairspec_dataset/plugins/ckan/__init__.py new file mode 100644 index 0000000..2fd3611 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/__init__.py @@ -0,0 +1,9 @@ +from .actions.dataset.load import load_dataset_from_ckan +from .actions.dataset.save import save_dataset_to_ckan +from .plugin import CkanPlugin + +__all__ = [ + "CkanPlugin", + "load_dataset_from_ckan", + "save_dataset_to_ckan", +] diff --git a/dataset/fairspec_dataset/plugins/ckan/actions/__init__.py b/dataset/fairspec_dataset/plugins/ckan/actions/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/actions/__init__.py @@ -0,0 +1 @@ + diff --git a/dataset/fairspec_dataset/plugins/ckan/actions/dataset/__init__.py b/dataset/fairspec_dataset/plugins/ckan/actions/dataset/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/actions/dataset/__init__.py @@ -0,0 +1 @@ + diff --git a/dataset/fairspec_dataset/plugins/ckan/actions/dataset/fixtures/ckan-dataset.json b/dataset/fairspec_dataset/plugins/ckan/actions/dataset/fixtures/ckan-dataset.json new file mode 100644 index 0000000..87a8b44 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/actions/dataset/fixtures/ckan-dataset.json @@ -0,0 +1,308 @@ +{ + "author": "Test Author", + "author_email": "test@email.com", + "creator_user_id": "47c7f1b1-0ef5-4d7b-b43c-811c51c9e349", + "id": "c322307a-b871-44fe-a602-32ee8437ff04", + "isopen": true, + "license_id": "cc-by", + "license_title": "Creative Commons Attribution", + "license_url": "http://www.opendefinition.org/licenses/cc-by", + "maintainer": "Test Maintainer", + "maintainer_email": "test@email.com", + "metadata_created": "2021-04-09T11:39:37.657233", + "metadata_modified": "2022-05-20T09:20:43.998956", + "name": "sample-dataset-1", + "notes": "A CKAN Dataset is a collection of data resources (such as files), together with a description and other information (what is known as metadata), at a fixed URL. \r\n\r\n", + "num_resources": 9, + "num_tags": 8, + "organization": { + "id": "1fa89238-ee96-4439-a885-22d15244d070", + "name": "sample-organization", + "title": "Sample Organization", + "type": "organization", + "description": "This is a sample organization.", + "image_url": "2022-05-20-084702.929838siurana.jpg", + "created": "2021-04-09T14:27:17.753798", + "is_organization": true, + "approval_status": "approved", + "state": "active" + }, + "owner_org": "1fa89238-ee96-4439-a885-22d15244d070", + "private": false, + "state": "active", + "title": "Sample Dataset", + "type": "dataset", + "url": "", + "version": "1.0", + "groups": [ + { + "description": "", + "display_name": "Test Group", + "id": "5d423f6b-137e-4d15-a156-868763fa7a64", + "image_display_url": "https://demo.ckan.org/uploads/group/2021-04-21-153504.571229064c7c.png", + "name": "test-group", + "title": "Test Group" + } + ], + "resources": [ + { + "cache_last_updated": null, + "cache_url": null, + "created": "2021-04-09T14:31:09.032858", + "datastore_active": true, + "description": "This is a sample resource added via url.", + "format": "CSV", + "hash": "", + "id": "e687245d-7835-44b0-8ed3-0827de123895", + "last_modified": null, + "metadata_modified": "2021-04-09T14:31:09.021596", + "mimetype": "text/csv", + "mimetype_inner": null, + "name": "sample-linked.csv", + "package_id": "c322307a-b871-44fe-a602-32ee8437ff04", + "position": 0, + "resource_type": null, + "size": null, + "state": "active", + "url": "https://raw.githubusercontent.com/datopian/CKAN_Demo_Datasets/main/resources/org1_sample.csv", + "url_type": null + }, + { + "cache_last_updated": null, + "cache_url": null, + "created": "2021-04-09T14:31:45.092631", + "datastore_active": true, + "description": "Sample csv (uploaded).", + "format": "CSV", + "hash": "", + "id": "b53c9e72-6b59-4cda-8c0c-7d6a51dad12a", + "last_modified": "2021-04-09T16:13:57.353205", + "metadata_modified": "2021-04-09T16:13:57.367140", + "mimetype": "application/csv", + "mimetype_inner": null, + "name": "sample.csv", + "package_id": "c322307a-b871-44fe-a602-32ee8437ff04", + "position": 1, + "resource_type": null, + "size": 6731, + "state": "active", + "url": "https://demo.ckan.org/dataset/c322307a-b871-44fe-a602-32ee8437ff04/resource/b53c9e72-6b59-4cda-8c0c-7d6a51dad12a/download/sample.csv", + "url_type": "upload" + }, + { + "cache_last_updated": null, + "cache_url": null, + "created": "2021-04-09T16:21:17.140402", + "datastore_active": true, + "description": "Sample views for csv.", + "format": "CSV", + "hash": "", + "id": "9ce6650b-6ff0-4a52-9b10-09cfc29bbd7e", + "last_modified": "2021-04-09T16:21:17.106693", + "metadata_modified": "2021-04-13T12:41:06.751746", + "mimetype": null, + "mimetype_inner": null, + "name": "views.csv", + "package_id": "c322307a-b871-44fe-a602-32ee8437ff04", + "position": 2, + "resource_type": null, + "size": 32773, + "state": "active", + "url": "https://demo.ckan.org/dataset/c322307a-b871-44fe-a602-32ee8437ff04/resource/9ce6650b-6ff0-4a52-9b10-09cfc29bbd7e/download/co2-mm-mlo_csv.csv", + "url_type": "upload" + }, + { + "cache_last_updated": null, + "cache_url": null, + "created": "2021-04-09T14:49:24.711541", + "datastore_active": false, + "description": "Sample pdf file.", + "format": "PDF", + "hash": "", + "id": "8aa53505-3b7f-4b9c-9b54-cf674eadc3f1", + "last_modified": "2021-04-09T16:11:46.261373", + "metadata_modified": "2021-04-13T12:39:41.141419", + "mimetype": null, + "mimetype_inner": null, + "name": "sample.pdf", + "package_id": "c322307a-b871-44fe-a602-32ee8437ff04", + "position": 3, + "resource_type": null, + "size": 712352, + "state": "active", + "url": "https://demo.ckan.org/dataset/c322307a-b871-44fe-a602-32ee8437ff04/resource/8aa53505-3b7f-4b9c-9b54-cf674eadc3f1/download/sample.pdf", + "url_type": "upload" + }, + { + "cache_last_updated": null, + "cache_url": null, + "created": "2021-04-09T16:17:05.189302", + "datastore_active": false, + "description": "Sample txt file.", + "format": "TXT", + "hash": "", + "id": "0185907b-2812-437f-9c64-eae24771ef5f", + "last_modified": "2021-04-09T16:17:05.136426", + "metadata_modified": "2021-04-13T12:39:24.524530", + "mimetype": null, + "mimetype_inner": null, + "name": "sample.txt", + "package_id": "c322307a-b871-44fe-a602-32ee8437ff04", + "position": 4, + "resource_type": null, + "size": 85, + "state": "active", + "url": "https://demo.ckan.org/dataset/c322307a-b871-44fe-a602-32ee8437ff04/resource/0185907b-2812-437f-9c64-eae24771ef5f/download/sample.txt", + "url_type": "upload" + }, + { + "cache_last_updated": null, + "cache_url": null, + "created": "2021-04-13T12:19:02.178513", + "datastore_active": false, + "description": "Sample GeoJSON resource for the list of countries.\r\n\r\nResource taken from https://openlayers.org/ \r\nLicensed under the 2-Clause BSD (https://www.tldrlegal.com/l/freebsd)", + "format": "GeoJSON", + "hash": "", + "id": "ecd4a62d-998b-46e4-8a64-cadac2125c64", + "last_modified": "2021-04-13T12:19:01.921374", + "metadata_modified": "2021-04-13T12:29:29.067536", + "mimetype": null, + "mimetype_inner": null, + "name": "sample.geojson", + "package_id": "c322307a-b871-44fe-a602-32ee8437ff04", + "position": 5, + "resource_type": null, + "size": 255943, + "state": "active", + "url": "https://demo.ckan.org/dataset/c322307a-b871-44fe-a602-32ee8437ff04/resource/ecd4a62d-998b-46e4-8a64-cadac2125c64/download/countries.geojson", + "url_type": "upload" + }, + { + "cache_last_updated": null, + "cache_url": null, + "created": "2021-04-13T12:32:17.823512", + "datastore_active": false, + "description": "Sample kml file for Earthquakes of magnitude 5 in 2012.\r\n\r\nResource taken from https://openlayers.org/ \r\nLicensed under the 2-Clause BSD (https://www.tldrlegal.com/l/freebsd)\r\n", + "format": "KML", + "hash": "", + "id": "048333ab-9608-42dc-901b-a7dd9fca3dda", + "last_modified": "2021-04-13T12:32:17.769578", + "metadata_modified": "2021-04-13T12:40:09.731123", + "mimetype": null, + "mimetype_inner": null, + "name": "sample.kml", + "package_id": "c322307a-b871-44fe-a602-32ee8437ff04", + "position": 6, + "resource_type": null, + "size": 474000, + "state": "active", + "url": "https://demo.ckan.org/dataset/c322307a-b871-44fe-a602-32ee8437ff04/resource/048333ab-9608-42dc-901b-a7dd9fca3dda/download/2012_earthquakes_mag5.kml", + "url_type": "upload" + }, + { + "cache_last_updated": null, + "cache_url": null, + "created": "2022-04-11T18:35:24.435997", + "datastore_active": false, + "description": "", + "format": "JPEG", + "hash": "", + "id": "b6c22c1d-e789-490d-b935-989093bbb173", + "last_modified": "2022-04-11T18:35:24.381740", + "metadata_modified": "2022-05-20T09:03:31.574365", + "mimetype": "image/png", + "mimetype_inner": null, + "name": "avoid-crowds-when-buying-materials-social-media-post.jpeg", + "package_id": "c322307a-b871-44fe-a602-32ee8437ff04", + "position": 7, + "resource_type": null, + "size": 444695, + "state": "active", + "url": "https://demo.ckan.org/dataset/c322307a-b871-44fe-a602-32ee8437ff04/resource/b6c22c1d-e789-490d-b935-989093bbb173/download/avoid-crowds-when-buying-materials-social-media-post.jpeg", + "url_type": "upload" + }, + { + "cache_last_updated": null, + "cache_url": null, + "created": "2022-05-20T09:03:31.617635", + "datastore_active": false, + "description": "", + "format": "WMS", + "hash": "", + "id": "664e5e2c-bd7d-4972-a245-a747f7d61cc9", + "last_modified": null, + "metadata_modified": "2022-05-20T09:03:42.450846", + "mimetype": null, + "mimetype_inner": null, + "name": "Sample WMS", + "package_id": "c322307a-b871-44fe-a602-32ee8437ff04", + "position": 8, + "resource_type": null, + "size": null, + "state": "active", + "url": "https://geoserveis.icgc.cat/icc_mapesbase/wms/service?", + "url_type": null + } + ], + "tags": [ + { + "display_name": "csv", + "id": "b5e651dd-8f42-445c-b9c4-2f09a3268427", + "name": "csv", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "economy", + "id": "0c4f9ad5-a372-4bda-a59b-e560cf264b0f", + "name": "economy", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "geojson", + "id": "a91d1c52-bd37-40fc-a1f2-6610ac7f39f3", + "name": "geojson", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "kml", + "id": "f29e8c38-be23-430b-bae0-7898f59d0089", + "name": "kml", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "pdf", + "id": "a4ba9601-bfa5-4322-8a62-38a83f2348c1", + "name": "pdf", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "sample", + "id": "1e6aa0c9-0dfc-448a-8c13-b5fd8ab2fefd", + "name": "sample", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "txt", + "id": "19e5f926-d769-49cf-a82c-80870baa3528", + "name": "txt", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "wms", + "id": "1279dad1-9397-48e1-94b8-0999bac75aee", + "name": "wms", + "state": "active", + "vocabulary_id": null + } + ], + "extras": [], + "relationships_as_subject": [], + "relationships_as_object": [] +} diff --git a/dataset/fairspec_dataset/plugins/ckan/actions/dataset/from_ckan.py b/dataset/fairspec_dataset/plugins/ckan/actions/dataset/from_ckan.py new file mode 100644 index 0000000..633ed42 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/actions/dataset/from_ckan.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata import ( + ContributorType, + CreatorNameType, + DateType, + DescriptionType, +) +from fairspec_metadata import Contributor +from fairspec_metadata import Creator +from fairspec_metadata import DataciteDate +from fairspec_metadata import DataciteDescription +from fairspec_metadata import Rights +from fairspec_metadata import Subject +from fairspec_metadata import Title +from fairspec_metadata import Dataset + +from fairspec_dataset.plugins.ckan.actions.resource.from_ckan import ( + convert_resource_from_ckan, +) + +if TYPE_CHECKING: + from fairspec_dataset.plugins.ckan.models.dataset import CkanDataset + + +def convert_dataset_from_ckan(ckan_dataset: CkanDataset) -> Dataset: + titles = [Title(title=ckan_dataset.title)] if ckan_dataset.title else None + + descriptions = ( + [ + DataciteDescription( + description=ckan_dataset.notes, descriptionType=DescriptionType.Abstract + ) + ] + if ckan_dataset.notes + else None + ) + + version = ckan_dataset.version + + resources = ckan_dataset.resources or [] + resource_list = ( + [convert_resource_from_ckan(r) for r in resources] if resources else [] + ) + + rights_list = None + if ckan_dataset.license_id or ckan_dataset.license_title: + rights_list = [ + Rights( + rights=ckan_dataset.license_title or ckan_dataset.license_id or "", + rightsUri=ckan_dataset.license_url, + rightsIdentifier=ckan_dataset.license_id, + ) + ] + + creators = ( + [Creator(name=ckan_dataset.author, nameType=CreatorNameType.Personal)] + if ckan_dataset.author + else None + ) + + contributors = None + if ckan_dataset.maintainer: + contributors = [ + Contributor( + name=ckan_dataset.maintainer, + nameType=CreatorNameType.Personal, + contributorType=ContributorType.ContactPerson, + ) + ] + + tags = ckan_dataset.tags or [] + subjects = [Subject(subject=tag.name or "") for tag in tags] if tags else None + + dates: list[DataciteDate] = [] + if ckan_dataset.metadata_created: + dates.append( + DataciteDate(date=ckan_dataset.metadata_created, dateType=DateType.Created) + ) + if ckan_dataset.metadata_modified: + dates.append( + DataciteDate(date=ckan_dataset.metadata_modified, dateType=DateType.Updated) + ) + + return Dataset( + titles=titles, + descriptions=descriptions, + version=version, + resources=resource_list, + rightsList=rights_list, + creators=creators, + contributors=contributors, + subjects=subjects, + dates=dates if dates else None, + ) diff --git a/dataset/fairspec_dataset/plugins/ckan/actions/dataset/from_ckan_spec.py b/dataset/fairspec_dataset/plugins/ckan/actions/dataset/from_ckan_spec.py new file mode 100644 index 0000000..7588592 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/actions/dataset/from_ckan_spec.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +import json +import os +import re + +from fairspec_dataset.plugins.ckan.models.dataset import CkanDataset +from .from_ckan import convert_dataset_from_ckan + + +def _load_fixture() -> CkanDataset: + path = os.path.join(os.path.dirname(__file__), "fixtures", "ckan-dataset.json") + with open(path) as f: + return CkanDataset(**json.load(f)) + + +class TestConvertDatasetFromCkan: + def test_converts_ckan_dataset_to_fairspec_dataset(self): + ckan_dataset = _load_fixture() + + result = convert_dataset_from_ckan(ckan_dataset) + + assert result.titles is not None + assert len(result.titles) == 1 + assert result.titles[0].title == ckan_dataset.title + + assert result.descriptions is not None + assert len(result.descriptions) == 1 + assert result.descriptions[0].description == ckan_dataset.notes + assert result.descriptions[0].descriptionType == "Abstract" + + assert result.version == ckan_dataset.version + + assert result.dates is not None + assert len(result.dates) == 2 + created_date = next(d for d in result.dates if d.dateType == "Created") + assert created_date.date == ckan_dataset.metadata_created + updated_date = next(d for d in result.dates if d.dateType == "Updated") + assert updated_date.date == ckan_dataset.metadata_modified + + assert result.rightsList is not None + assert len(result.rightsList) == 1 + rights = result.rightsList[0] + assert rights.rights == ckan_dataset.license_title + assert rights.rightsUri == ckan_dataset.license_url + assert rights.rightsIdentifier == ckan_dataset.license_id + + assert result.creators is not None + assert len(result.creators) == 1 + assert result.creators[0].name == ckan_dataset.author + assert result.creators[0].nameType == "Personal" + + assert result.contributors is not None + assert len(result.contributors) == 1 + assert result.contributors[0].name == ckan_dataset.maintainer + assert result.contributors[0].nameType == "Personal" + assert result.contributors[0].contributorType == "ContactPerson" + + assert result.subjects is not None + assert ckan_dataset.tags is not None + assert len(result.subjects) == len(ckan_dataset.tags) + assert [s.subject for s in result.subjects] == [ + tag.name for tag in ckan_dataset.tags + ] + + assert result.resources is not None + assert ckan_dataset.resources is not None + assert len(result.resources) == len(ckan_dataset.resources) + + first_ckan_resource = ckan_dataset.resources[0] + first_resource = result.resources[0] + assert first_resource.data == first_ckan_resource.url + assert first_resource.name is not None + assert re.match(r"^sample[-_]linked[-_]csv$", first_resource.name) + assert first_resource.descriptions is not None + assert ( + first_resource.descriptions[0].description + == first_ckan_resource.description + ) + + def test_handles_empty_resources_array(self): + ckan_dataset = _load_fixture().model_copy(update={"resources": []}) + + result = convert_dataset_from_ckan(ckan_dataset) + + assert result.resources == [] + + def test_handles_undefined_optional_properties(self): + ckan_dataset = CkanDataset(resources=[], tags=[], id="test", name="test") + + result = convert_dataset_from_ckan(ckan_dataset) + + assert result.titles is None + assert result.descriptions is None + assert result.version is None + assert result.dates is None + assert result.rightsList is None + assert result.creators is None + assert result.contributors is None + assert result.subjects is None + assert result.resources == [] diff --git a/dataset/fairspec_dataset/plugins/ckan/actions/dataset/load.py b/dataset/fairspec_dataset/plugins/ckan/actions/dataset/load.py new file mode 100644 index 0000000..a0787b9 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/actions/dataset/load.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +import urllib.parse +from typing import TYPE_CHECKING + +from fairspec_dataset.actions.dataset.merge import merge_datasets + +from fairspec_dataset.plugins.ckan.models.dataset import CkanDataset +from fairspec_dataset.plugins.ckan.services.ckan import make_ckan_api_request +from .from_ckan import convert_dataset_from_ckan + +if TYPE_CHECKING: + from fairspec_metadata import Descriptor + + +def load_dataset_from_ckan( + dataset_url: str, + *, + api_key: str | None = None, +) -> Descriptor: + dataset_id = _extract_dataset_id(dataset_url) + if not dataset_id: + raise Exception(f"Failed to extract dataset ID from URL: {dataset_url}") + + ckan_dict = make_ckan_api_request( + ckan_url=dataset_url, + action="package_show", + payload={"id": dataset_id}, + api_key=api_key, + ) + + for resource in ckan_dict.get("resources", []): + resource_id = resource.get("id") + if resource.get("format", "").upper() in ("CSV", "XLS", "XLSX"): + schema = _load_ckan_schema( + dataset_url=dataset_url, + resource_id=resource_id, + api_key=api_key, + ) + if schema: + resource["schema"] = schema + + ckan_dataset = CkanDataset(**ckan_dict) + system_dataset = convert_dataset_from_ckan(ckan_dataset) + user_dataset_path: str | None = None + for resource in system_dataset.resources or []: + custom = resource.unstable_customMetadata or {} + if custom.get("ckanKey") == "dataset.json": + user_dataset_path = custom.get("ckanUrl") + break + + dataset = merge_datasets( + system_dataset=system_dataset, + user_dataset_path=user_dataset_path, + ) + + for resource in dataset.resources or []: + resource.unstable_customMetadata = None + + return dataset.model_dump(by_alias=True, exclude_none=True) + + +def _extract_dataset_id(dataset_url: str) -> str | None: + parsed = urllib.parse.urlparse(dataset_url) + parts = [p for p in parsed.path.split("/") if p] + return parts[-1] if parts else None + + +def _load_ckan_schema( + *, + dataset_url: str, + resource_id: str, + api_key: str | None = None, +) -> dict | None: + try: + result = make_ckan_api_request( + ckan_url=dataset_url, + action="datastore_search", + payload={"resource_id": resource_id, "limit": 0}, + api_key=api_key, + ) + fields = [ + f + for f in result.get("fields", []) + if f.get("id") not in ("_id", "_full_text") + ] + return {"fields": fields} + except Exception: + return None diff --git a/dataset/fairspec_dataset/plugins/ckan/actions/dataset/save.py b/dataset/fairspec_dataset/plugins/ckan/actions/dataset/save.py new file mode 100644 index 0000000..236c07f --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/actions/dataset/save.py @@ -0,0 +1,122 @@ +from __future__ import annotations + +import urllib.parse +from collections.abc import Callable +from typing import TYPE_CHECKING + +from fairspec_metadata import ( + denormalize_dataset, + get_file_extension, + stringify_descriptor, +) + +from fairspec_dataset.actions.dataset.basepath import get_dataset_basepath +from fairspec_dataset.actions.resource.save import SaveFileProps, save_resource_files +from fairspec_dataset.actions.stream.load import load_file_stream + +from fairspec_dataset.plugins.ckan.actions.resource.to_ckan import ( + convert_resource_to_ckan, +) +from fairspec_dataset.plugins.ckan.services.ckan import make_ckan_api_request +from .to_ckan import convert_dataset_to_ckan + +if TYPE_CHECKING: + from fairspec_metadata import Dataset + from fairspec_metadata import Descriptor + from fairspec_metadata import Resource + + +def save_dataset_to_ckan( + dataset: Dataset, + *, + api_key: str, + ckan_url: str, + owner_org: str, + dataset_name: str, +) -> dict: + basepath = get_dataset_basepath(dataset) + ckan_dataset = convert_dataset_to_ckan(dataset) + + payload = { + **ckan_dataset.model_dump(by_alias=True, exclude_none=True), + "name": dataset_name, + "owner_org": owner_org, + "resources": [], + } + + result = make_ckan_api_request( + action="package_create", + payload=payload, + ckan_url=ckan_url, + api_key=api_key, + ) + + parsed = urllib.parse.urlparse(ckan_url) + dataset_url = f"{parsed.scheme}://{parsed.netloc}/dataset/{result['name']}" + + resource_descriptors: list[Descriptor] = [] + for resource in dataset.resources or []: + + def _make_save_file(res: Resource) -> Callable[[SaveFileProps], str]: + def _save_file(props: SaveFileProps) -> str: + ckan_resource = convert_resource_to_ckan( + res.model_dump(by_alias=True, exclude_none=True) + ) + extension = get_file_extension(props.normalized_path) + + upload_payload: dict = { + **ckan_resource.model_dump(by_alias=True, exclude_none=True), + "package_id": dataset_name, + "name": props.denormalized_path, + } + if extension: + upload_payload["format"] = extension.upper() + + stream = load_file_stream(props.normalized_path) + file_data = stream.read() + + upload_result = make_ckan_api_request( + action="resource_create", + payload=upload_payload, + upload=(props.denormalized_path, file_data), + ckan_url=ckan_url, + api_key=api_key, + ) + + return upload_result["url"] + + return _save_file + + resource_descriptors.append( + save_resource_files( + resource, + basepath=basepath, + with_remote=True, + without_folders=True, + save_file=_make_save_file(resource), + ) + ) + + denormalized = denormalize_dataset(dataset, basepath=basepath) + descriptor: Descriptor = { + **denormalized.model_dump(by_alias=True, exclude_none=True), + "resources": resource_descriptors, + } + + descriptor_bytes = stringify_descriptor(descriptor).encode() + + make_ckan_api_request( + action="resource_create", + payload={ + "package_id": dataset_name, + "name": "datapackage.json", + }, + upload=("datapackage.json", descriptor_bytes), + ckan_url=ckan_url, + api_key=api_key, + ) + + return { + "path": result.get("url"), + "dataset_url": dataset_url, + } diff --git a/dataset/fairspec_dataset/plugins/ckan/actions/dataset/to_ckan.py b/dataset/fairspec_dataset/plugins/ckan/actions/dataset/to_ckan.py new file mode 100644 index 0000000..22e602e --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/actions/dataset/to_ckan.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_dataset.plugins.ckan.actions.resource.to_ckan import ( + convert_resource_to_ckan, +) +from fairspec_dataset.plugins.ckan.models.dataset import CkanDataset +from fairspec_dataset.plugins.ckan.models.tag import CkanTag + +if TYPE_CHECKING: + from fairspec_metadata import Dataset + + +def convert_dataset_to_ckan(dataset: Dataset) -> CkanDataset: + title = None + titles = dataset.titles or [] + if titles and titles[0].title: + title = titles[0].title + + notes = None + descriptions = dataset.descriptions or [] + if descriptions and descriptions[0].description: + notes = descriptions[0].description + + version = dataset.version + + license_id = None + license_title = None + license_url = None + rights_list = dataset.rightsList or [] + if rights_list: + rights = rights_list[0] + license_id = rights.rightsIdentifier + license_title = rights.rights + license_url = rights.rightsUri + + author = None + creators = dataset.creators or [] + if creators and creators[0].name: + author = creators[0].name + + maintainer = None + contributors = dataset.contributors or [] + if contributors: + maintainer_obj = next( + (c for c in contributors if c.contributorType == "ContactPerson"), + None, + ) + if maintainer_obj and maintainer_obj.name: + maintainer = maintainer_obj.name + + resource_list = None + resources = dataset.resources or [] + if resources: + resource_list = [ + r + for r in (convert_resource_to_ckan(res) for res in resources) + if r is not None + ] + else: + resource_list = [] + + tags = None + subjects = dataset.subjects or [] + if subjects: + tags = [CkanTag(name=s.subject, display_name=s.subject) for s in subjects] + else: + tags = [] + + metadata_created = None + metadata_modified = None + dates = dataset.dates or [] + created_date = next((d for d in dates if d.dateType == "Created"), None) + if created_date: + metadata_created = created_date.date + updated_date = next((d for d in dates if d.dateType == "Updated"), None) + if updated_date: + metadata_modified = updated_date.date + + return CkanDataset( + title=title, + notes=notes, + version=version, + license_id=license_id, + license_title=license_title, + license_url=license_url, + author=author, + maintainer=maintainer, + resources=resource_list, + tags=tags, + metadata_created=metadata_created, + metadata_modified=metadata_modified, + ) diff --git a/dataset/fairspec_dataset/plugins/ckan/actions/dataset/to_ckan_spec.py b/dataset/fairspec_dataset/plugins/ckan/actions/dataset/to_ckan_spec.py new file mode 100644 index 0000000..76ef3e1 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/actions/dataset/to_ckan_spec.py @@ -0,0 +1,169 @@ +from __future__ import annotations + +import json +import os + +from fairspec_metadata import ( + ContributorType, + CreatorNameType, + DateType, + DescriptionType, +) +from fairspec_metadata import Contributor +from fairspec_metadata import Creator +from fairspec_metadata import DataciteDate +from fairspec_metadata import DataciteDescription +from fairspec_metadata import Rights +from fairspec_metadata import Subject +from fairspec_metadata import Title +from fairspec_metadata import Dataset +from fairspec_metadata import Integrity, IntegrityType +from fairspec_metadata import CsvFileDialect +from fairspec_metadata import Resource + +from fairspec_dataset.plugins.ckan.models.dataset import CkanDataset +from .from_ckan import convert_dataset_from_ckan +from .to_ckan import convert_dataset_to_ckan + + +def _load_fixture() -> CkanDataset: + path = os.path.join(os.path.dirname(__file__), "fixtures", "ckan-dataset.json") + with open(path) as f: + return CkanDataset(**json.load(f)) + + +class TestConvertDatasetToCkan: + def test_converts_fairspec_dataset_to_ckan_dataset(self): + dataset = Dataset( + titles=[Title(title="Test Package")], + descriptions=[ + DataciteDescription( + description="This is a test package", + descriptionType=DescriptionType.Abstract, + ) + ], + version="1.0.0", + rightsList=[ + Rights( + rights="Creative Commons Attribution", + rightsUri="http://www.opendefinition.org/licenses/cc-by", + rightsIdentifier="cc-by", + ) + ], + creators=[Creator(name="Test Author", nameType=CreatorNameType.Personal)], + contributors=[ + Contributor( + name="Test Maintainer", + nameType=CreatorNameType.Personal, + contributorType=ContributorType.ContactPerson, + ) + ], + subjects=[ + Subject(subject="test"), + Subject(subject="sample"), + Subject(subject="data"), + ], + dates=[ + DataciteDate(date="2023-01-01T00:00:00Z", dateType=DateType.Created), + DataciteDate(date="2023-01-02T00:00:00Z", dateType=DateType.Updated), + ], + resources=[ + Resource( + name="test_resource", + data="https://example.com/data.csv", + fileDialect=CsvFileDialect(), + descriptions=[ + DataciteDescription( + description="Test resource", + descriptionType=DescriptionType.Abstract, + ) + ], + integrity=Integrity( + type=IntegrityType.md5, hash="1234567890abcdef" + ), + ) + ], + ) + + result = convert_dataset_to_ckan(dataset) + + assert result.title == "Test Package" + assert result.notes == "This is a test package" + assert result.version == "1.0.0" + + assert result.license_id == "cc-by" + assert result.license_title == "Creative Commons Attribution" + assert result.license_url == "http://www.opendefinition.org/licenses/cc-by" + + assert result.author == "Test Author" + assert result.maintainer == "Test Maintainer" + + assert result.tags is not None + assert len(result.tags) == 3 + for i, subject in enumerate(["test", "sample", "data"]): + assert result.tags[i].name == subject + assert result.tags[i].display_name == subject + + assert result.metadata_created == "2023-01-01T00:00:00Z" + assert result.metadata_modified == "2023-01-02T00:00:00Z" + + assert result.resources is not None + assert len(result.resources) == 1 + assert result.resources[0].name == "test_resource" + assert result.resources[0].description == "Test resource" + assert result.resources[0].hash == "1234567890abcdef" + + def test_handles_empty_resources_array(self): + dataset = Dataset(resources=[]) + + result = convert_dataset_to_ckan(dataset) + + assert result.resources == [] + + def test_handles_undefined_optional_properties(self): + dataset = Dataset(resources=[]) + + result = convert_dataset_to_ckan(dataset) + + assert result.title is None + assert result.notes is None + assert result.version is None + assert result.metadata_created is None + assert result.metadata_modified is None + assert result.license_id is None + assert result.license_title is None + assert result.license_url is None + assert result.author is None + assert result.maintainer is None + assert result.tags == [] + assert result.resources == [] + + def test_round_trip_ckan_to_dataset_to_ckan(self): + original = _load_fixture() + + dataset = convert_dataset_from_ckan(original) + result = convert_dataset_to_ckan(dataset) + + assert result.title == original.title + assert result.notes == original.notes + assert result.version == original.version + + assert result.license_id == original.license_id + assert result.license_title == original.license_title + assert result.license_url == original.license_url + + assert result.author == original.author + assert result.maintainer == original.maintainer + + assert result.metadata_created == original.metadata_created + assert result.metadata_modified == original.metadata_modified + + assert result.resources is not None + assert len(result.resources) > 0 + + assert result.tags is not None + assert original.tags is not None + assert len(result.tags) == len(original.tags) + for original_tag in original.tags: + matching = [t for t in result.tags if t.name == original_tag.name] + assert len(matching) > 0 diff --git a/dataset/fairspec_dataset/plugins/ckan/actions/resource/__init__.py b/dataset/fairspec_dataset/plugins/ckan/actions/resource/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/actions/resource/__init__.py @@ -0,0 +1 @@ + diff --git a/dataset/fairspec_dataset/plugins/ckan/actions/resource/from_ckan.py b/dataset/fairspec_dataset/plugins/ckan/actions/resource/from_ckan.py new file mode 100644 index 0000000..961a870 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/actions/resource/from_ckan.py @@ -0,0 +1,79 @@ +from __future__ import annotations + +import re +from typing import TYPE_CHECKING + +from fairspec_metadata import get_file_name +from fairspec_metadata import DateType, DescriptionType +from fairspec_metadata import DataciteDate +from fairspec_metadata import DataciteDescription +from fairspec_metadata import Integrity, IntegrityType +from fairspec_metadata import Resource + +from fairspec_dataset.plugins.ckan.actions.table_schema.from_ckan import ( + convert_table_schema_from_ckan, +) + +if TYPE_CHECKING: + from fairspec_dataset.plugins.ckan.models.resource import CkanResource + + +def convert_resource_from_ckan(ckan_resource: CkanResource) -> Resource: + name = _convert_name(ckan_resource.name) if ckan_resource.name else None + + descriptions = ( + [ + DataciteDescription( + description=ckan_resource.description, + descriptionType=DescriptionType.Abstract, + ) + ] + if ckan_resource.description + else None + ) + + sizes = [f"{ckan_resource.size} bytes"] if ckan_resource.size else None + + integrity = ( + Integrity(type=IntegrityType.md5, hash=ckan_resource.hash) + if ckan_resource.hash + else None + ) + + dates: list[DataciteDate] = [] + if ckan_resource.created: + dates.append( + DataciteDate(date=ckan_resource.created, dateType=DateType.Created) + ) + if ckan_resource.last_modified: + dates.append( + DataciteDate(date=ckan_resource.last_modified, dateType=DateType.Updated) + ) + + table_schema = ( + convert_table_schema_from_ckan(ckan_resource.schema_) + if ckan_resource.schema_ + else None + ) + + return Resource( + data=ckan_resource.url or "", + name=name, + descriptions=descriptions, + sizes=sizes, + integrity=integrity, + dates=dates if dates else None, + tableSchema=table_schema, + unstable_customMetadata={ + "ckanKey": get_file_name(ckan_resource.url or ""), + "ckanUrl": ckan_resource.url or "", + "ckanId": ckan_resource.id, + }, + ) + + +def _convert_name(name: str) -> str: + result = re.sub(r"[\s.()/\\,\-]+", "_", name).lower() + result = re.sub(r"[^a-z0-9_]", "", result) + result = re.sub(r"^(\d)", r"_\1", result) + return result[:100] diff --git a/dataset/fairspec_dataset/plugins/ckan/actions/resource/to_ckan.py b/dataset/fairspec_dataset/plugins/ckan/actions/resource/to_ckan.py new file mode 100644 index 0000000..614e87d --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/actions/resource/to_ckan.py @@ -0,0 +1,59 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata import TableSchema + +from fairspec_dataset.plugins.ckan.actions.table_schema.to_ckan import ( + convert_table_schema_to_ckan, +) +from fairspec_dataset.plugins.ckan.models.resource import CkanResource + +if TYPE_CHECKING: + from fairspec_metadata import Resource + + +def convert_resource_to_ckan(resource: Resource) -> CkanResource: + name = resource.name if resource.name else None + + fmt = None + file_dialect = resource.fileDialect + if not isinstance(file_dialect, str) and file_dialect is not None: + fmt = file_dialect.format + if fmt: + fmt = fmt.upper() + + description = None + descriptions = resource.descriptions or [] + if descriptions and descriptions[0].description: + description = descriptions[0].description + + hash_val = None + integrity = resource.integrity + if integrity and integrity.hash: + hash_val = integrity.hash + + created = None + updated = None + dates = resource.dates or [] + created_date = next((d for d in dates if d.dateType == "Created"), None) + if created_date: + created = created_date.date + updated_date = next((d for d in dates if d.dateType == "Updated"), None) + if updated_date: + updated = updated_date.date + + schema = None + table_schema = resource.tableSchema + if isinstance(table_schema, TableSchema): + schema = convert_table_schema_to_ckan(table_schema) + + return CkanResource( + name=name, + format=fmt, + description=description, + hash=hash_val, + created=created, + last_modified=updated, + schema_=schema, + ) diff --git a/dataset/fairspec_dataset/plugins/ckan/actions/table_schema/__init__.py b/dataset/fairspec_dataset/plugins/ckan/actions/table_schema/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/actions/table_schema/__init__.py @@ -0,0 +1 @@ + diff --git a/dataset/fairspec_dataset/plugins/ckan/actions/table_schema/fixtures/ckan-schema.json b/dataset/fairspec_dataset/plugins/ckan/actions/table_schema/fixtures/ckan-schema.json new file mode 100644 index 0000000..9497702 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/actions/table_schema/fixtures/ckan-schema.json @@ -0,0 +1,115 @@ +{ + "fields": [ + { + "id": "id", + "type": "int", + "info": { + "label": "ID", + "notes": "Unique identifier", + "type_override": "int" + } + }, + { + "id": "name", + "type": "text", + "info": { + "label": "Name", + "notes": "Person's full name", + "type_override": "text" + } + }, + { + "id": "age", + "type": "int" + }, + { + "id": "score", + "type": "numeric", + "info": { + "label": "Score", + "notes": "Test score", + "type_override": "numeric" + } + }, + { + "id": "is_active", + "type": "bool" + }, + { + "id": "birth_date", + "type": "date", + "info": { + "label": "Birth Date", + "notes": "Date of birth", + "type_override": "date" + } + }, + { + "id": "start_time", + "type": "time" + }, + { + "id": "created_at", + "type": "timestamp", + "info": { + "label": "Created At", + "notes": "Timestamp when record was created", + "type_override": "timestamp" + } + }, + { + "id": "metadata", + "type": "json" + }, + { + "id": "tags", + "type": "array", + "info": { + "label": "Tags", + "notes": "List of tags", + "type_override": "array" + } + }, + { + "id": "string_field", + "type": "string" + }, + { + "id": "integer_field", + "type": "integer" + }, + { + "id": "number_field", + "type": "number" + }, + { + "id": "float_field", + "type": "float" + }, + { + "id": "boolean_field", + "type": "boolean" + }, + { + "id": "datetime_field", + "type": "datetime" + }, + { + "id": "object_field", + "type": "object" + }, + { + "id": "unknown_field", + "type": "unknown_type" + }, + { + "id": "override_field", + "type": "text", + "info": { + "label": "Override Field", + "notes": "Field with type override", + "type_override": "int" + } + } + ] +} diff --git a/dataset/fairspec_dataset/plugins/ckan/actions/table_schema/from_ckan.py b/dataset/fairspec_dataset/plugins/ckan/actions/table_schema/from_ckan.py new file mode 100644 index 0000000..da5c1b9 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/actions/table_schema/from_ckan.py @@ -0,0 +1,118 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata import get_column_properties +from fairspec_metadata import ArrayColumn, ArrayColumnProperty +from fairspec_metadata import BooleanColumn, BooleanColumnProperty +from fairspec_metadata import DateColumn, DateColumnProperty +from fairspec_metadata import ( + DateTimeColumn, + DateTimeColumnProperty, +) +from fairspec_metadata import IntegerColumn, IntegerColumnProperty +from fairspec_metadata import NumberColumn, NumberColumnProperty +from fairspec_metadata import ObjectColumn, ObjectColumnProperty +from fairspec_metadata import StringColumn, StringColumnProperty +from fairspec_metadata import TimeColumn, TimeColumnProperty +from fairspec_metadata import TableSchema + +if TYPE_CHECKING: + from fairspec_metadata import Column + + from fairspec_dataset.plugins.ckan.models.field import CkanField + from fairspec_dataset.plugins.ckan.models.schema import CkanSchema + + +def convert_table_schema_from_ckan(ckan_schema: CkanSchema) -> TableSchema: + columns: list[Column] = [] + + for ckan_field in ckan_schema.fields or []: + columns.append(_convert_column(ckan_field)) + + return TableSchema(properties=get_column_properties(columns)) + + +def _convert_column(ckan_field: CkanField) -> Column: + info = ckan_field.info + + base_kwargs: dict = {} + if info: + if info.label: + base_kwargs["title"] = info.label + if info.notes: + base_kwargs["description"] = info.notes + + column_type = ( + (info.type_override if info else None) or ckan_field.type or "text" + ).lower() + + name = ckan_field.id or "" + + match column_type: + case "text" | "string": + return StringColumn( + name=name, + type="string", + property=StringColumnProperty(type="string", **base_kwargs), + ) + case "int" | "integer": + return IntegerColumn( + name=name, + type="integer", + property=IntegerColumnProperty(type="integer", **base_kwargs), + ) + case "numeric" | "number" | "float": + return NumberColumn( + name=name, + type="number", + property=NumberColumnProperty(type="number", **base_kwargs), + ) + case "bool" | "boolean": + return BooleanColumn( + name=name, + type="boolean", + property=BooleanColumnProperty(type="boolean", **base_kwargs), + ) + case "date": + return DateColumn( + name=name, + type="date", + property=DateColumnProperty( + type="string", format="date", **base_kwargs + ), + ) + case "time": + return TimeColumn( + name=name, + type="time", + property=TimeColumnProperty( + type="string", format="time", **base_kwargs + ), + ) + case "timestamp" | "datetime": + return DateTimeColumn( + name=name, + type="date-time", + property=DateTimeColumnProperty( + type="string", format="date-time", **base_kwargs + ), + ) + case "json" | "object": + return ObjectColumn( + name=name, + type="object", + property=ObjectColumnProperty(type="object", **base_kwargs), + ) + case "array": + return ArrayColumn( + name=name, + type="array", + property=ArrayColumnProperty(type="array", **base_kwargs), + ) + case _: + return StringColumn( + name=name, + type="string", + property=StringColumnProperty(type="string", **base_kwargs), + ) diff --git a/dataset/fairspec_dataset/plugins/ckan/actions/table_schema/from_ckan_spec.py b/dataset/fairspec_dataset/plugins/ckan/actions/table_schema/from_ckan_spec.py new file mode 100644 index 0000000..0a6f468 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/actions/table_schema/from_ckan_spec.py @@ -0,0 +1,150 @@ +from __future__ import annotations + +import json +import os + +from fairspec_dataset.plugins.ckan.models.field import CkanField +from fairspec_dataset.plugins.ckan.models.schema import CkanSchema +from .from_ckan import convert_table_schema_from_ckan + + +def _load_fixture() -> CkanSchema: + path = os.path.join(os.path.dirname(__file__), "fixtures", "ckan-schema.json") + with open(path) as f: + return CkanSchema(**json.load(f)) + + +class TestConvertTableSchemaFromCkan: + def test_converts_ckan_schema_to_fairspec_table_schema(self): + ckan_schema = _load_fixture() + + result = convert_table_schema_from_ckan(ckan_schema) + + assert result.properties is not None + properties = result.properties + assert ckan_schema.fields is not None + assert len(properties) == len(ckan_schema.fields) + + id_col = properties["id"] + assert id_col.type == "integer" + assert id_col.title == "ID" + assert id_col.description == "Unique identifier" + + name_col = properties["name"] + assert name_col.type == "string" + assert name_col.title == "Name" + assert name_col.description == "Person's full name" + + age_col = properties["age"] + assert age_col.type == "integer" + assert age_col.title is None + assert age_col.description is None + + score_col = properties["score"] + assert score_col.type == "number" + assert score_col.title == "Score" + assert score_col.description == "Test score" + + is_active_col = properties["is_active"] + assert is_active_col.type == "boolean" + + birth_date_col = properties["birth_date"] + assert birth_date_col.type == "string" + assert birth_date_col.format == "date" + assert birth_date_col.title == "Birth Date" + assert birth_date_col.description == "Date of birth" + + start_time_col = properties["start_time"] + assert start_time_col.type == "string" + assert start_time_col.format == "time" + + created_at_col = properties["created_at"] + assert created_at_col.type == "string" + assert created_at_col.format == "date-time" + assert created_at_col.title == "Created At" + assert created_at_col.description == "Timestamp when record was created" + + metadata_col = properties["metadata"] + assert metadata_col.type == "object" + + tags_col = properties["tags"] + assert tags_col.type == "array" + assert tags_col.title == "Tags" + assert tags_col.description == "List of tags" + + def test_converts_ckan_type_aliases(self): + ckan_schema = _load_fixture() + + result = convert_table_schema_from_ckan(ckan_schema) + assert result.properties is not None + properties = result.properties + + assert properties["string_field"].type == "string" + assert properties["integer_field"].type == "integer" + assert properties["number_field"].type == "number" + assert properties["float_field"].type == "number" + assert properties["boolean_field"].type == "boolean" + assert properties["datetime_field"].type == "string" + assert properties["datetime_field"].format == "date-time" + assert properties["object_field"].type == "object" + + def test_handles_unknown_field_types(self): + ckan_schema = _load_fixture() + + result = convert_table_schema_from_ckan(ckan_schema) + assert result.properties is not None + properties = result.properties + + assert properties["unknown_field"].type == "string" + + def test_respects_type_override_in_field_info(self): + ckan_schema = _load_fixture() + + result = convert_table_schema_from_ckan(ckan_schema) + assert result.properties is not None + properties = result.properties + + override_col = properties["override_field"] + assert override_col.type == "integer" + assert override_col.title == "Override Field" + assert override_col.description == "Field with type override" + + def test_handles_empty_fields_array(self): + ckan_schema = CkanSchema(fields=[]) + + result = convert_table_schema_from_ckan(ckan_schema) + + assert result.properties is None or len(result.properties) == 0 + + def test_handles_fields_without_info_object(self): + ckan_schema = CkanSchema(fields=[CkanField(id="simple_field", type="text")]) + + result = convert_table_schema_from_ckan(ckan_schema) + assert result.properties is not None + properties = result.properties + + assert len(properties) == 1 + col = properties["simple_field"] + assert col.type == "string" + assert col.title is None + assert col.description is None + + def test_handles_case_insensitive_type_conversion(self): + ckan_schema = CkanSchema( + fields=[ + CkanField(id="field1", type="TEXT"), + CkanField(id="field2", type="INT"), + CkanField(id="field3", type="BOOL"), + CkanField(id="field4", type="TIMESTAMP"), + ], + ) + + result = convert_table_schema_from_ckan(ckan_schema) + assert result.properties is not None + properties = result.properties + + assert properties["field1"].type == "string" + assert properties["field2"].type == "integer" + assert properties["field3"].type == "boolean" + assert properties["field4"].type == "string" + assert properties["field4"].format == "date-time" diff --git a/dataset/fairspec_dataset/plugins/ckan/actions/table_schema/to_ckan.py b/dataset/fairspec_dataset/plugins/ckan/actions/table_schema/to_ckan.py new file mode 100644 index 0000000..59b22f3 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/actions/table_schema/to_ckan.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata import get_columns + +from fairspec_dataset.plugins.ckan.models.field import CkanField, CkanFieldInfo +from fairspec_dataset.plugins.ckan.models.schema import CkanSchema + +if TYPE_CHECKING: + from fairspec_metadata import Column + from fairspec_metadata import TableSchema + + +def convert_table_schema_to_ckan(table_schema: TableSchema) -> CkanSchema: + fields: list[CkanField] = [] + + columns = get_columns(table_schema.model_dump(by_alias=True, exclude_none=True)) + for column in columns: + fields.append(_convert_column(column)) + + return CkanSchema(fields=fields) + + +def _convert_column(column: Column) -> CkanField: + title = column.property.title + description = column.property.description + + info = None + if title or description: + info = CkanFieldInfo( + label=title if title else None, + notes=description if description else None, + type_override=_convert_type(column), + ) + + return CkanField( + id=column.name, + type=_convert_type(column), + info=info, + ) + + +def _convert_type(column: Column) -> str: + match column.type: + case "string": + return "text" + case "integer": + return "int" + case "number": + return "numeric" + case "boolean": + return "bool" + case "date": + return "date" + case "time": + return "time" + case "date-time": + return "timestamp" + case "object": + return "json" + case "array": + return "array" + case _: + return "text" diff --git a/dataset/fairspec_dataset/plugins/ckan/actions/table_schema/to_ckan_spec.py b/dataset/fairspec_dataset/plugins/ckan/actions/table_schema/to_ckan_spec.py new file mode 100644 index 0000000..2797f25 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/actions/table_schema/to_ckan_spec.py @@ -0,0 +1,227 @@ +from __future__ import annotations + +import json +import os + +from fairspec_metadata import ArrayColumnProperty +from fairspec_metadata import BooleanColumnProperty +from fairspec_metadata import DateColumnProperty +from fairspec_metadata import DateTimeColumnProperty +from fairspec_metadata import IntegerColumnProperty +from fairspec_metadata import NumberColumnProperty +from fairspec_metadata import ObjectColumnProperty +from fairspec_metadata import StringColumnProperty +from fairspec_metadata import TimeColumnProperty +from fairspec_metadata import UnknownColumnProperty +from fairspec_metadata import TableSchema + +from fairspec_dataset.plugins.ckan.models.schema import CkanSchema +from .from_ckan import convert_table_schema_from_ckan +from .to_ckan import convert_table_schema_to_ckan + + +def _load_fixture() -> CkanSchema: + path = os.path.join(os.path.dirname(__file__), "fixtures", "ckan-schema.json") + with open(path) as f: + return CkanSchema(**json.load(f)) + + +class TestConvertTableSchemaToCkan: + def test_converts_fairspec_schema_to_ckan_schema(self): + schema = TableSchema( + properties={ + "id": IntegerColumnProperty( + type="integer", + title="ID", + description="Unique identifier", + ), + "name": StringColumnProperty( + type="string", + title="Name", + description="Person's full name", + ), + "age": IntegerColumnProperty(), + "score": NumberColumnProperty( + type="number", + title="Score", + description="Test score", + ), + "is_active": BooleanColumnProperty(), + "birth_date": DateColumnProperty( + type="string", + format="date", + title="Birth Date", + description="Date of birth", + ), + "start_time": TimeColumnProperty(), + "created_at": DateTimeColumnProperty( + type="string", + format="date-time", + title="Created At", + description="Timestamp when record was created", + ), + "metadata": ObjectColumnProperty(), + "tags": ArrayColumnProperty( + type="array", + title="Tags", + description="List of tags", + ), + } + ) + + result = convert_table_schema_to_ckan(schema) + + assert schema.properties is not None + assert result.fields is not None + assert len(result.fields) == len(schema.properties) + + id_field = next(f for f in result.fields if f.id == "id") + assert id_field.type == "int" + assert id_field.info is not None + assert id_field.info.label == "ID" + assert id_field.info.notes == "Unique identifier" + assert id_field.info.type_override == "int" + + name_field = next(f for f in result.fields if f.id == "name") + assert name_field.type == "text" + assert name_field.info is not None + assert name_field.info.label == "Name" + assert name_field.info.notes == "Person's full name" + assert name_field.info.type_override == "text" + + age_field = next(f for f in result.fields if f.id == "age") + assert age_field.type == "int" + assert age_field.info is None + + score_field = next(f for f in result.fields if f.id == "score") + assert score_field.type == "numeric" + assert score_field.info is not None + assert score_field.info.label == "Score" + assert score_field.info.notes == "Test score" + assert score_field.info.type_override == "numeric" + + is_active_field = next(f for f in result.fields if f.id == "is_active") + assert is_active_field.type == "bool" + assert is_active_field.info is None + + birth_date_field = next(f for f in result.fields if f.id == "birth_date") + assert birth_date_field.type == "date" + assert birth_date_field.info is not None + assert birth_date_field.info.label == "Birth Date" + assert birth_date_field.info.notes == "Date of birth" + assert birth_date_field.info.type_override == "date" + + start_time_field = next(f for f in result.fields if f.id == "start_time") + assert start_time_field.type == "time" + assert start_time_field.info is None + + created_at_field = next(f for f in result.fields if f.id == "created_at") + assert created_at_field.type == "timestamp" + assert created_at_field.info is not None + assert created_at_field.info.label == "Created At" + assert created_at_field.info.notes == "Timestamp when record was created" + assert created_at_field.info.type_override == "timestamp" + + metadata_field = next(f for f in result.fields if f.id == "metadata") + assert metadata_field.type == "json" + assert metadata_field.info is None + + tags_field = next(f for f in result.fields if f.id == "tags") + assert tags_field.type == "array" + assert tags_field.info is not None + assert tags_field.info.label == "Tags" + assert tags_field.info.notes == "List of tags" + assert tags_field.info.type_override == "array" + + def test_handles_columns_with_only_title(self): + schema = TableSchema( + properties={"field1": StringColumnProperty(title="Field 1")} + ) + + result = convert_table_schema_to_ckan(schema) + + assert result.fields is not None + assert len(result.fields) == 1 + field = result.fields[0] + assert field.id == "field1" + assert field.type == "text" + assert field.info is not None + assert field.info.label == "Field 1" + assert field.info.notes is None + assert field.info.type_override == "text" + + def test_handles_columns_with_only_description(self): + schema = TableSchema( + properties={ + "field1": StringColumnProperty( + type="string", description="Field 1 description" + ) + } + ) + + result = convert_table_schema_to_ckan(schema) + + assert result.fields is not None + assert len(result.fields) == 1 + field = result.fields[0] + assert field.id == "field1" + assert field.type == "text" + assert field.info is not None + assert field.info.label is None + assert field.info.notes == "Field 1 description" + assert field.info.type_override == "text" + + def test_handles_columns_without_title_or_description(self): + schema = TableSchema(properties={"simple_field": StringColumnProperty()}) + + result = convert_table_schema_to_ckan(schema) + + assert result.fields is not None + assert len(result.fields) == 1 + field = result.fields[0] + assert field.id == "simple_field" + assert field.type == "text" + assert field.info is None + + def test_handles_empty_properties(self): + schema = TableSchema(properties={}) + + result = convert_table_schema_to_ckan(schema) + + assert result.fields == [] + + def test_converts_unmapped_types_to_text(self): + schema = TableSchema( + properties={"null_field": UnknownColumnProperty(type="null")} + ) + + result = convert_table_schema_to_ckan(schema) + + assert result.fields is not None + assert len(result.fields) == 1 + assert result.fields[0].type == "text" + + def test_round_trip_ckan_to_fairspec_to_ckan(self): + original = _load_fixture() + + fairspec_schema = convert_table_schema_from_ckan(original) + assert fairspec_schema.properties is not None + + result = convert_table_schema_to_ckan(fairspec_schema) + + assert result.fields is not None + assert original.fields is not None + assert len(result.fields) == len(original.fields) + + for original_field in original.fields: + result_field = next( + (f for f in result.fields if f.id == original_field.id), None + ) + assert result_field is not None + + if original_field.info: + assert result_field.info is not None + if original_field.info.label: + assert result_field.info.label == original_field.info.label + if original_field.info.notes: + assert result_field.info.notes == original_field.info.notes diff --git a/dataset/fairspec_dataset/plugins/ckan/models/__init__.py b/dataset/fairspec_dataset/plugins/ckan/models/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/models/__init__.py @@ -0,0 +1 @@ + diff --git a/dataset/fairspec_dataset/plugins/ckan/models/dataset.py b/dataset/fairspec_dataset/plugins/ckan/models/dataset.py new file mode 100644 index 0000000..c47c47a --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/models/dataset.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +from fairspec_metadata.models.base import FairspecModel + +from .organization import CkanOrganization +from .resource import CkanResource +from .tag import CkanTag + + +class CkanDataset(FairspecModel): + resources: list[CkanResource] | None = None + organization: CkanOrganization | None = None + tags: list[CkanTag] | None = None + id: str | None = None + name: str | None = None + title: str | None = None + notes: str | None = None + version: str | None = None + license_id: str | None = None + license_title: str | None = None + license_url: str | None = None + author: str | None = None + author_email: str | None = None + maintainer: str | None = None + maintainer_email: str | None = None + metadata_created: str | None = None + metadata_modified: str | None = None diff --git a/dataset/fairspec_dataset/plugins/ckan/models/field.py b/dataset/fairspec_dataset/plugins/ckan/models/field.py new file mode 100644 index 0000000..e085509 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/models/field.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from fairspec_metadata.models.base import FairspecModel + + +class CkanFieldInfo(FairspecModel): + label: str | None = None + notes: str | None = None + type_override: str | None = None + + +class CkanField(FairspecModel): + id: str | None = None + type: str | None = None + info: CkanFieldInfo | None = None diff --git a/dataset/fairspec_dataset/plugins/ckan/models/organization.py b/dataset/fairspec_dataset/plugins/ckan/models/organization.py new file mode 100644 index 0000000..4637e82 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/models/organization.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +from fairspec_metadata.models.base import FairspecModel + + +class CkanOrganization(FairspecModel): + id: str | None = None + name: str | None = None + title: str | None = None + description: str | None = None diff --git a/dataset/fairspec_dataset/plugins/ckan/models/resource.py b/dataset/fairspec_dataset/plugins/ckan/models/resource.py new file mode 100644 index 0000000..a111195 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/models/resource.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from pydantic import ConfigDict, Field + +from fairspec_metadata.models.base import FairspecModel + +from .schema import CkanSchema + + +class CkanResource(FairspecModel): + model_config = ConfigDict(populate_by_name=True) + + id: str | None = None + url: str | None = None + name: str | None = None + created: str | None = None + description: str | None = None + format: str | None = None + hash: str | None = None + last_modified: str | None = None + metadata_modified: str | None = None + mimetype: str | None = None + size: int | None = None + schema_: CkanSchema | None = Field(default=None, alias="schema") diff --git a/dataset/fairspec_dataset/plugins/ckan/models/schema.py b/dataset/fairspec_dataset/plugins/ckan/models/schema.py new file mode 100644 index 0000000..0966a2a --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/models/schema.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from fairspec_metadata.models.base import FairspecModel + +from .field import CkanField + + +class CkanSchema(FairspecModel): + fields: list[CkanField] | None = None diff --git a/dataset/fairspec_dataset/plugins/ckan/models/tag.py b/dataset/fairspec_dataset/plugins/ckan/models/tag.py new file mode 100644 index 0000000..ae32b27 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/models/tag.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from fairspec_metadata.models.base import FairspecModel + + +class CkanTag(FairspecModel): + id: str | None = None + name: str | None = None + display_name: str | None = None diff --git a/dataset/fairspec_dataset/plugins/ckan/plugin.py b/dataset/fairspec_dataset/plugins/ckan/plugin.py new file mode 100644 index 0000000..c824db9 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/plugin.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata import get_is_remote_path + +from fairspec_dataset.plugin import DatasetPlugin +from .actions.dataset.load import load_dataset_from_ckan + +if TYPE_CHECKING: + from fairspec_metadata import Descriptor + + +class CkanPlugin(DatasetPlugin): + def load_dataset(self, source: str) -> Descriptor | None: + if not _get_is_ckan(source): + return None + return load_dataset_from_ckan(source) + + +def _get_is_ckan(path: str) -> bool: + if not get_is_remote_path(path): + return False + return "/dataset/" in path diff --git a/dataset/fairspec_dataset/plugins/ckan/plugin_spec.py b/dataset/fairspec_dataset/plugins/ckan/plugin_spec.py new file mode 100644 index 0000000..c535690 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/plugin_spec.py @@ -0,0 +1,70 @@ +from unittest.mock import MagicMock, patch + +from .plugin import CkanPlugin + + +class TestLoadDataset: + def setup_method(self): + self.plugin = CkanPlugin() + + @patch("fairspec_dataset.plugins.ckan.plugin.load_dataset_from_ckan") + def test_loads_from_ckan_url(self, mock_load: MagicMock): + mock_load.return_value = {"resources": []} + result = self.plugin.load_dataset("https://demo.ckan.org/dataset/my-dataset") + mock_load.assert_called_once_with("https://demo.ckan.org/dataset/my-dataset") + assert result == {"resources": []} + + @patch("fairspec_dataset.plugins.ckan.plugin.load_dataset_from_ckan") + def test_returns_none_for_url_without_dataset_path(self, mock_load: MagicMock): + result = self.plugin.load_dataset("https://demo.ckan.org/about") + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.ckan.plugin.load_dataset_from_ckan") + def test_returns_none_for_local_paths(self, mock_load: MagicMock): + result = self.plugin.load_dataset("/tmp/data") + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.ckan.plugin.load_dataset_from_ckan") + def test_returns_none_for_github_urls(self, mock_load: MagicMock): + result = self.plugin.load_dataset("https://github.com/owner/repo") + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.ckan.plugin.load_dataset_from_ckan") + def test_handles_additional_path_segments(self, mock_load: MagicMock): + mock_load.return_value = {"resources": []} + result = self.plugin.load_dataset( + "https://demo.ckan.org/en_GB/dataset/my-dataset" + ) + mock_load.assert_called_once() + assert result == {"resources": []} + + @patch("fairspec_dataset.plugins.ckan.plugin.load_dataset_from_ckan") + def test_handles_query_parameters(self, mock_load: MagicMock): + mock_load.return_value = {"resources": []} + result = self.plugin.load_dataset( + "https://demo.ckan.org/dataset/my-dataset?page=1" + ) + mock_load.assert_called_once() + assert result == {"resources": []} + + @patch("fairspec_dataset.plugins.ckan.plugin.load_dataset_from_ckan") + def test_handles_http_urls(self, mock_load: MagicMock): + mock_load.return_value = {"resources": []} + result = self.plugin.load_dataset("http://demo.ckan.org/dataset/my-dataset") + mock_load.assert_called_once() + assert result == {"resources": []} + + @patch("fairspec_dataset.plugins.ckan.plugin.load_dataset_from_ckan") + def test_returns_none_for_zenodo_urls(self, mock_load: MagicMock): + result = self.plugin.load_dataset("https://zenodo.org/records/123456") + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.ckan.plugin.load_dataset_from_ckan") + def test_returns_none_for_dataset_in_query_params_only(self, mock_load: MagicMock): + result = self.plugin.load_dataset("https://example.com/search?name=dataset") + mock_load.assert_not_called() + assert result is None diff --git a/dataset/fairspec_dataset/plugins/ckan/services/__init__.py b/dataset/fairspec_dataset/plugins/ckan/services/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/services/__init__.py @@ -0,0 +1 @@ + diff --git a/dataset/fairspec_dataset/plugins/ckan/services/ckan.py b/dataset/fairspec_dataset/plugins/ckan/services/ckan.py new file mode 100644 index 0000000..10ddb23 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/ckan/services/ckan.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +import json +import urllib.parse +import urllib.request +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from fairspec_metadata import Descriptor + + +def make_ckan_api_request( + *, + ckan_url: str, + action: str, + payload: Descriptor, + upload: tuple[str, bytes] | None = None, + api_key: str | None = None, +) -> dict: + parsed = urllib.parse.urlparse(ckan_url) + url = f"{parsed.scheme}://{parsed.netloc}/api/3/action/{action}" + + headers: dict[str, str] = {} + if api_key: + headers["Authorization"] = api_key + + if upload: + file_name, file_data = upload + boundary = "----FormBoundary7MA4YWxkTrZu0gW" + headers["Content-Type"] = f"multipart/form-data; boundary={boundary}" + + body = b"" + for key, value in payload.items(): + body += f"--{boundary}\r\n".encode() + body += f'Content-Disposition: form-data; name="{key}"\r\n\r\n'.encode() + body += f"{value}\r\n".encode() + + body += f"--{boundary}\r\n".encode() + body += f'Content-Disposition: form-data; name="upload"; filename="{file_name}"\r\n'.encode() + body += b"Content-Type: application/octet-stream\r\n\r\n" + body += file_data + body += f"\r\n--{boundary}--\r\n".encode() + else: + headers["Content-Type"] = "application/json" + body = json.dumps(payload).encode() + + request = urllib.request.Request(url, data=body, headers=headers, method="POST") + + with urllib.request.urlopen(request) as response: + data = json.loads(response.read().decode()) + + if not data.get("success"): + raise Exception(f"CKAN API error: {data.get('error')}") + + return data["result"] diff --git a/dataset/fairspec_dataset/plugins/descriptor/__init__.py b/dataset/fairspec_dataset/plugins/descriptor/__init__.py new file mode 100644 index 0000000..77b626f --- /dev/null +++ b/dataset/fairspec_dataset/plugins/descriptor/__init__.py @@ -0,0 +1,3 @@ +from .plugin import DescriptorPlugin + +__all__ = ["DescriptorPlugin"] diff --git a/dataset/fairspec_dataset/plugins/descriptor/plugin.py b/dataset/fairspec_dataset/plugins/descriptor/plugin.py new file mode 100644 index 0000000..61e736a --- /dev/null +++ b/dataset/fairspec_dataset/plugins/descriptor/plugin.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack + +from fairspec_metadata import load_dataset_descriptor +from fairspec_metadata import save_dataset_descriptor +from fairspec_metadata import get_file_extension, get_is_remote_path + +from fairspec_dataset.models.dataset import SaveDatasetResult +from fairspec_dataset.plugin import DatasetPlugin + +if TYPE_CHECKING: + from fairspec_metadata import Dataset + from fairspec_metadata import Descriptor + + from fairspec_dataset.models.dataset import SaveDatasetOptions + + +class DescriptorPlugin(DatasetPlugin): + def load_dataset(self, source: str) -> Descriptor | None: + if not _get_is_json(source): + return None + dataset = load_dataset_descriptor(source) + return dataset.model_dump(by_alias=True, exclude_none=True) + + def save_dataset( + self, dataset: Dataset, **options: Unpack[SaveDatasetOptions] + ) -> SaveDatasetResult | None: + target = options["target"] + if not _get_is_local_json(target): + return None + if not target.endswith("datapackage.json"): + return None + save_dataset_descriptor(dataset, path=target) + return SaveDatasetResult(path=target) + + +def _get_is_json(path: str) -> bool: + return get_file_extension(path) == "json" + + +def _get_is_local_json(path: str) -> bool: + return _get_is_json(path) and not get_is_remote_path(path) diff --git a/dataset/fairspec_dataset/plugins/descriptor/plugin_spec.py b/dataset/fairspec_dataset/plugins/descriptor/plugin_spec.py new file mode 100644 index 0000000..a8af5ae --- /dev/null +++ b/dataset/fairspec_dataset/plugins/descriptor/plugin_spec.py @@ -0,0 +1,160 @@ +from unittest.mock import MagicMock, patch + +from fairspec_metadata import Dataset +from fairspec_metadata import Resource + +from .plugin import DescriptorPlugin + + +class TestLoadDataset: + def setup_method(self): + self.plugin = DescriptorPlugin() + + @patch("fairspec_dataset.plugins.descriptor.plugin.load_dataset_descriptor") + def test_loads_from_local_datapackage_json(self, mock_load: MagicMock): + mock_dataset = MagicMock() + mock_dataset.model_dump.return_value = { + "resources": [{"name": "test", "data": []}] + } + mock_load.return_value = mock_dataset + + result = self.plugin.load_dataset("./datapackage.json") + + mock_load.assert_called_once_with("./datapackage.json") + assert result == {"resources": [{"name": "test", "data": []}]} + + @patch("fairspec_dataset.plugins.descriptor.plugin.load_dataset_descriptor") + def test_loads_from_local_json(self, mock_load: MagicMock): + mock_dataset = MagicMock() + mock_dataset.model_dump.return_value = { + "resources": [{"name": "test", "data": []}] + } + mock_load.return_value = mock_dataset + + result = self.plugin.load_dataset("./dataset.json") + + mock_load.assert_called_once_with("./dataset.json") + assert result == {"resources": [{"name": "test", "data": []}]} + + @patch("fairspec_dataset.plugins.descriptor.plugin.load_dataset_descriptor") + def test_loads_from_absolute_path(self, mock_load: MagicMock): + mock_dataset = MagicMock() + mock_dataset.model_dump.return_value = { + "resources": [{"name": "test", "data": []}] + } + mock_load.return_value = mock_dataset + + result = self.plugin.load_dataset("/absolute/path/datapackage.json") + + mock_load.assert_called_once_with("/absolute/path/datapackage.json") + assert result == {"resources": [{"name": "test", "data": []}]} + + @patch("fairspec_dataset.plugins.descriptor.plugin.load_dataset_descriptor") + def test_returns_none_for_csv(self, mock_load: MagicMock): + result = self.plugin.load_dataset("./data.csv") + + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.descriptor.plugin.load_dataset_descriptor") + def test_returns_none_for_xlsx(self, mock_load: MagicMock): + result = self.plugin.load_dataset("./data.xlsx") + + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.descriptor.plugin.load_dataset_descriptor") + def test_returns_none_for_parquet(self, mock_load: MagicMock): + result = self.plugin.load_dataset("./data.parquet") + + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.descriptor.plugin.load_dataset_descriptor") + def test_returns_none_for_zenodo_url(self, mock_load: MagicMock): + result = self.plugin.load_dataset("https://zenodo.org/record/123") + + mock_load.assert_not_called() + assert result is None + + +class TestSaveDataset: + def setup_method(self): + self.plugin = DescriptorPlugin() + self.dataset = Dataset(resources=[Resource(name="test", data=[])]) + + @patch("fairspec_dataset.plugins.descriptor.plugin.save_dataset_descriptor") + def test_saves_to_local_datapackage_json(self, mock_save: MagicMock): + result = self.plugin.save_dataset(self.dataset, target="./datapackage.json") + + mock_save.assert_called_once_with(self.dataset, path="./datapackage.json") + assert result is not None + assert result.path == "./datapackage.json" + + @patch("fairspec_dataset.plugins.descriptor.plugin.save_dataset_descriptor") + def test_saves_with_absolute_path(self, mock_save: MagicMock): + result = self.plugin.save_dataset( + self.dataset, target="/absolute/path/datapackage.json" + ) + + mock_save.assert_called_once_with( + self.dataset, path="/absolute/path/datapackage.json" + ) + assert result is not None + assert result.path == "/absolute/path/datapackage.json" + + @patch("fairspec_dataset.plugins.descriptor.plugin.save_dataset_descriptor") + def test_returns_none_for_remote_https(self, mock_save: MagicMock): + result = self.plugin.save_dataset( + self.dataset, target="https://example.com/datapackage.json" + ) + + mock_save.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.descriptor.plugin.save_dataset_descriptor") + def test_returns_none_for_remote_http(self, mock_save: MagicMock): + result = self.plugin.save_dataset( + self.dataset, target="http://example.com/datapackage.json" + ) + + mock_save.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.descriptor.plugin.save_dataset_descriptor") + def test_returns_none_for_non_datapackage_json(self, mock_save: MagicMock): + result = self.plugin.save_dataset(self.dataset, target="./dataset.json") + + mock_save.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.descriptor.plugin.save_dataset_descriptor") + def test_returns_none_for_csv(self, mock_save: MagicMock): + result = self.plugin.save_dataset(self.dataset, target="./data.csv") + + mock_save.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.descriptor.plugin.save_dataset_descriptor") + def test_returns_none_for_xlsx(self, mock_save: MagicMock): + result = self.plugin.save_dataset(self.dataset, target="./data.xlsx") + + mock_save.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.descriptor.plugin.save_dataset_descriptor") + def test_returns_none_for_directory(self, mock_save: MagicMock): + result = self.plugin.save_dataset(self.dataset, target="./data") + + mock_save.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.descriptor.plugin.save_dataset_descriptor") + def test_ignores_with_remote_option(self, mock_save: MagicMock): + result = self.plugin.save_dataset( + self.dataset, target="./datapackage.json", with_remote=True + ) + + mock_save.assert_called_once_with(self.dataset, path="./datapackage.json") + assert result is not None + assert result.path == "./datapackage.json" diff --git a/dataset/fairspec_dataset/plugins/folder/__init__.py b/dataset/fairspec_dataset/plugins/folder/__init__.py new file mode 100644 index 0000000..54ba1a3 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/folder/__init__.py @@ -0,0 +1,9 @@ +from .actions.dataset.load import load_dataset_from_folder +from .actions.dataset.save import save_dataset_to_folder +from .plugin import FolderPlugin + +__all__ = [ + "FolderPlugin", + "load_dataset_from_folder", + "save_dataset_to_folder", +] diff --git a/dataset/fairspec_dataset/plugins/folder/actions/__init__.py b/dataset/fairspec_dataset/plugins/folder/actions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dataset/fairspec_dataset/plugins/folder/actions/dataset/__init__.py b/dataset/fairspec_dataset/plugins/folder/actions/dataset/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dataset/fairspec_dataset/plugins/folder/actions/dataset/load.py b/dataset/fairspec_dataset/plugins/folder/actions/dataset/load.py new file mode 100644 index 0000000..3306dd7 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/folder/actions/dataset/load.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +import os + +from fairspec_metadata import load_dataset_descriptor +from fairspec_metadata import Dataset + + +def load_dataset_from_folder(folder_path: str) -> Dataset: + return load_dataset_descriptor(os.path.join(folder_path, "dataset.json")) diff --git a/dataset/fairspec_dataset/plugins/folder/actions/dataset/load_spec.py b/dataset/fairspec_dataset/plugins/folder/actions/dataset/load_spec.py new file mode 100644 index 0000000..44d2de7 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/folder/actions/dataset/load_spec.py @@ -0,0 +1,154 @@ +import os + +import pytest +from fairspec_metadata import DescriptionType +from fairspec_metadata import DataciteDescription +from fairspec_metadata import Title +from fairspec_metadata import Dataset +from fairspec_metadata import CsvFileDialect +from fairspec_metadata import Resource +from fairspec_metadata import IntegerColumnProperty +from fairspec_metadata import StringColumnProperty +from fairspec_metadata import TableSchema + +from fairspec_dataset.actions.file.temp import get_temp_file_path, write_temp_file +from fairspec_dataset.actions.folder.temp import get_temp_folder_path +from .load import load_dataset_from_folder +from .save import save_dataset_to_folder + + +class TestLoadDatasetFromFolder: + def test_loads_basic_dataset_from_folder(self): + folder = get_temp_file_path() + dataset = Dataset(resources=[Resource(name="test_res", data=[{"id": 1}])]) + save_dataset_to_folder(dataset, folder_path=folder) + + result = load_dataset_from_folder(folder) + + assert result.resources is not None + assert len(result.resources) == 1 + + def test_loads_dataset_with_metadata(self): + folder = get_temp_file_path() + dataset = Dataset( + titles=[Title(title="Test Dataset")], + descriptions=[ + DataciteDescription( + description="A test", descriptionType=DescriptionType.Abstract + ) + ], + version="1.0", + resources=[Resource(name="test_res", data=[{"id": 1}])], + ) + save_dataset_to_folder(dataset, folder_path=folder) + + result = load_dataset_from_folder(folder) + + assert result.titles is not None + assert result.titles[0].title == "Test Dataset" + assert result.descriptions is not None + assert result.descriptions[0].description == "A test" + assert result.version == "1.0" + + def test_loads_dataset_with_inline_data_resources(self): + folder = get_temp_file_path() + data = [{"id": 1, "name": "alice"}, {"id": 2, "name": "bob"}] + dataset = Dataset(resources=[Resource(name="test_res", data=data)]) + save_dataset_to_folder(dataset, folder_path=folder) + + result = load_dataset_from_folder(folder) + + assert result.resources is not None + assert result.resources[0].data == data + + def test_loads_dataset_with_file_resources(self): + csv_path = write_temp_file("id,name\n1,alice\n2,bob\n", format="csv") + folder = get_temp_file_path() + dataset = Dataset( + resources=[ + Resource( + name="test_res", + data=csv_path, + fileDialect=CsvFileDialect(), + ) + ] + ) + save_dataset_to_folder(dataset, folder_path=folder) + + result = load_dataset_from_folder(folder) + + assert result.resources is not None + assert result.resources[0].fileDialect is not None + + def test_loads_dataset_with_table_schema(self): + folder = get_temp_file_path() + dataset = Dataset( + resources=[ + Resource( + name="test_res", + data=[{"id": 1}], + tableSchema=TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ), + ) + ] + ) + save_dataset_to_folder(dataset, folder_path=folder) + + result = load_dataset_from_folder(folder) + + assert result.resources is not None + assert isinstance(result.resources[0].tableSchema, TableSchema) + assert result.resources[0].tableSchema.properties is not None + assert len(result.resources[0].tableSchema.properties) == 2 + + def test_loads_dataset_with_multiple_resources(self): + csv_path = write_temp_file("id,name\n1,alice\n", format="csv") + folder = get_temp_file_path() + dataset = Dataset( + resources=[ + Resource(name="file_res", data=csv_path), + Resource(name="inline_res", data=[{"id": 1}]), + ] + ) + save_dataset_to_folder(dataset, folder_path=folder) + + result = load_dataset_from_folder(folder) + + assert result.resources is not None + assert len(result.resources) == 2 + assert result.resources[0].name == "file_res" + assert result.resources[1].name == "inline_res" + + def test_loads_dataset_with_delimiter(self): + csv_path = write_temp_file("id;name\n1;alice\n", format="csv") + folder = get_temp_file_path() + dataset = Dataset( + resources=[ + Resource( + name="test_res", + data=csv_path, + fileDialect=CsvFileDialect(delimiter=";"), + ) + ] + ) + save_dataset_to_folder(dataset, folder_path=folder) + + result = load_dataset_from_folder(folder) + + assert result.resources is not None + assert isinstance(result.resources[0].fileDialect, CsvFileDialect) + assert result.resources[0].fileDialect.delimiter == ";" + + def test_throws_error_for_non_existent_folder(self): + with pytest.raises(Exception): + load_dataset_from_folder("/non/existent/folder") + + def test_throws_error_for_folder_without_dataset_json(self): + folder = get_temp_folder_path() + os.makedirs(folder, exist_ok=True) + with pytest.raises(Exception): + load_dataset_from_folder(folder) diff --git a/dataset/fairspec_dataset/plugins/folder/actions/dataset/save.py b/dataset/fairspec_dataset/plugins/folder/actions/dataset/save.py new file mode 100644 index 0000000..d1c6446 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/folder/actions/dataset/save.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +import os +from typing import TYPE_CHECKING + +from fairspec_metadata import denormalize_dataset +from fairspec_metadata import save_descriptor + +from fairspec_dataset.actions.dataset.basepath import get_dataset_basepath +from fairspec_dataset.actions.file.copy import copy_file +from fairspec_dataset.actions.file.path import assert_local_path_vacant +from fairspec_dataset.actions.folder.create import create_folder +from fairspec_dataset.actions.resource.save import SaveFileProps, save_resource_files + +if TYPE_CHECKING: + from fairspec_metadata import Dataset + from fairspec_metadata import Descriptor + + +def save_dataset_to_folder( + dataset: Dataset, + *, + folder_path: str, + with_remote: bool = False, +) -> Descriptor: + basepath = get_dataset_basepath(dataset) + + assert_local_path_vacant(folder_path) + create_folder(folder_path) + + resource_descriptors: list[Descriptor] = [] + for resource in dataset.resources or []: + resource_descriptors.append( + save_resource_files( + resource, + basepath=basepath, + with_remote=with_remote, + save_file=_make_save_file(folder_path), + ) + ) + + denormalized = denormalize_dataset(dataset, basepath=basepath) + descriptor: Descriptor = { + **denormalized.model_dump(by_alias=True, exclude_none=True), + "resources": resource_descriptors, + } + + save_descriptor(descriptor, path=os.path.join(folder_path, "dataset.json")) + + return descriptor + + +def _make_save_file(folder_path: str): + def save_file(props: SaveFileProps) -> str: + copy_file( + source_path=props.normalized_path, + target_path=os.path.join(folder_path, props.denormalized_path), + ) + return props.denormalized_path + + return save_file diff --git a/dataset/fairspec_dataset/plugins/folder/actions/dataset/save_spec.py b/dataset/fairspec_dataset/plugins/folder/actions/dataset/save_spec.py new file mode 100644 index 0000000..1d12f5d --- /dev/null +++ b/dataset/fairspec_dataset/plugins/folder/actions/dataset/save_spec.py @@ -0,0 +1,257 @@ +import json +import os + +import pytest +from fairspec_metadata import DescriptionType +from fairspec_metadata import DataciteDescription +from fairspec_metadata import Title +from fairspec_metadata import Dataset +from fairspec_metadata import CsvFileDialect +from fairspec_metadata import Resource +from fairspec_metadata import IntegerColumnProperty +from fairspec_metadata import StringColumnProperty +from fairspec_metadata import TableSchema + +from fairspec_dataset.actions.file.temp import get_temp_file_path, write_temp_file +from .load import load_dataset_from_folder +from .save import save_dataset_to_folder + + +class TestSaveDatasetToFolder: + def test_saves_basic_dataset_to_folder(self): + folder = get_temp_file_path() + dataset = Dataset(resources=[Resource(name="test_res", data=[{"id": 1}])]) + + save_dataset_to_folder(dataset, folder_path=folder) + + assert os.path.exists(os.path.join(folder, "dataset.json")) + + def test_saves_dataset_with_metadata(self): + folder = get_temp_file_path() + dataset = Dataset( + titles=[Title(title="Test Dataset")], + descriptions=[ + DataciteDescription( + description="A test", descriptionType=DescriptionType.Abstract + ) + ], + version="1.0", + resources=[Resource(name="test_res", data=[{"id": 1}])], + ) + + save_dataset_to_folder(dataset, folder_path=folder) + + assert os.path.exists(os.path.join(folder, "dataset.json")) + + def test_saves_dataset_with_inline_data_resources(self): + folder = get_temp_file_path() + dataset = Dataset( + resources=[Resource(name="test_res", data=[{"id": 1}, {"id": 2}])] + ) + + save_dataset_to_folder(dataset, folder_path=folder) + + assert os.path.exists(os.path.join(folder, "dataset.json")) + + def test_saves_dataset_with_file_resources(self): + csv_path = write_temp_file("id,name\n1,alice\n2,bob\n", format="csv") + folder = get_temp_file_path() + dataset = Dataset(resources=[Resource(name="test_res", data=csv_path)]) + + save_dataset_to_folder(dataset, folder_path=folder) + + assert os.path.exists(os.path.join(folder, "dataset.json")) + + def test_saves_dataset_with_multiple_resources(self): + csv_path = write_temp_file("id,name\n1,alice\n", format="csv") + folder = get_temp_file_path() + dataset = Dataset( + resources=[ + Resource(name="file_res", data=csv_path), + Resource(name="inline_res", data=[{"id": 1}]), + ] + ) + + save_dataset_to_folder(dataset, folder_path=folder) + + assert os.path.exists(os.path.join(folder, "dataset.json")) + + def test_saves_dataset_with_table_schema(self): + folder = get_temp_file_path() + dataset = Dataset( + resources=[ + Resource( + name="test_res", + data=[{"id": 1}], + tableSchema=TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ), + ) + ] + ) + + save_dataset_to_folder(dataset, folder_path=folder) + + assert os.path.exists(os.path.join(folder, "dataset.json")) + + def test_saves_dataset_with_delimiter(self): + csv_path = write_temp_file("id;name\n1;alice\n", format="csv") + folder = get_temp_file_path() + dataset = Dataset( + resources=[ + Resource( + name="test_res", + data=csv_path, + fileDialect=CsvFileDialect(delimiter=";"), + ) + ] + ) + + save_dataset_to_folder(dataset, folder_path=folder) + + assert os.path.exists(os.path.join(folder, "dataset.json")) + + def test_roundtrip_preserves_structure(self): + folder = get_temp_file_path() + dataset = Dataset( + titles=[Title(title="My Dataset")], + descriptions=[ + DataciteDescription( + description="Desc", descriptionType=DescriptionType.Abstract + ) + ], + resources=[Resource(name="test_res", data=[{"id": 1}])], + ) + save_dataset_to_folder(dataset, folder_path=folder) + + result = load_dataset_from_folder(folder) + + assert result.titles is not None + assert result.titles[0].title == "My Dataset" + assert result.descriptions is not None + assert result.descriptions[0].description == "Desc" + assert result.resources is not None + assert len(result.resources) == 1 + + def test_roundtrip_preserves_metadata(self): + folder = get_temp_file_path() + dataset = Dataset( + titles=[Title(title="My Dataset")], + version="2.0", + resources=[Resource(name="test_res", data=[{"id": 1}])], + ) + save_dataset_to_folder(dataset, folder_path=folder) + + result = load_dataset_from_folder(folder) + + assert result.titles is not None + assert result.titles[0].title == "My Dataset" + assert result.version == "2.0" + + def test_roundtrip_preserves_table_schema(self): + folder = get_temp_file_path() + dataset = Dataset( + resources=[ + Resource( + name="test_res", + data=[{"id": 1}], + tableSchema=TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ), + ) + ] + ) + save_dataset_to_folder(dataset, folder_path=folder) + + result = load_dataset_from_folder(folder) + + assert result.resources is not None + assert isinstance(result.resources[0].tableSchema, TableSchema) + assert result.resources[0].tableSchema.properties is not None + assert len(result.resources[0].tableSchema.properties) == 2 + + def test_roundtrip_preserves_file_resources(self): + csv_path = write_temp_file("id,name\n1,alice\n2,bob\n", format="csv") + folder = get_temp_file_path() + dataset = Dataset( + resources=[ + Resource( + name="test_res", + data=csv_path, + fileDialect=CsvFileDialect(), + ) + ] + ) + save_dataset_to_folder(dataset, folder_path=folder) + + result = load_dataset_from_folder(folder) + + assert result.resources is not None + assert result.resources[0].fileDialect is not None + + def test_throws_error_for_existing_folder(self): + folder = get_temp_file_path() + dataset = Dataset(resources=[Resource(name="test_res", data=[{"id": 1}])]) + save_dataset_to_folder(dataset, folder_path=folder) + + with pytest.raises(FileExistsError): + save_dataset_to_folder(dataset, folder_path=folder) + + def test_creates_valid_folder_structure(self): + folder = get_temp_file_path() + dataset = Dataset(resources=[Resource(name="test_res", data=[{"id": 1}])]) + save_dataset_to_folder(dataset, folder_path=folder) + + result = load_dataset_from_folder(folder) + + assert result.resources is not None + assert result.resources[0].name == "test_res" + + def test_roundtrip_with_multiple_file_resources(self): + csv1 = write_temp_file("id,name\n1,alice\n", filename="data1.csv") + csv2 = write_temp_file("id,name\n2,bob\n", filename="data2.csv") + folder = get_temp_file_path() + dataset = Dataset( + resources=[ + Resource(name="first_res", data=csv1), + Resource(name="second_res", data=csv2), + ] + ) + save_dataset_to_folder(dataset, folder_path=folder) + + result = load_dataset_from_folder(folder) + + assert result.resources is not None + assert len(result.resources) == 2 + assert result.resources[0].name == "first_res" + assert result.resources[1].name == "second_res" + + def test_creates_dataset_json_in_folder(self): + folder = get_temp_file_path() + dataset = Dataset(resources=[Resource(name="test_res", data=[{"id": 1}])]) + save_dataset_to_folder(dataset, folder_path=folder) + + dataset_json_path = os.path.join(folder, "dataset.json") + with open(dataset_json_path, encoding="utf-8") as f: + descriptor = json.load(f) + + assert "resources" in descriptor + assert len(descriptor["resources"]) == 1 + + def test_copies_file_resources_to_folder(self): + csv_content = "id,name\n1,alice\n2,bob\n" + csv_path = write_temp_file(csv_content, filename="data.csv") + folder = get_temp_file_path() + dataset = Dataset(resources=[Resource(name="test_res", data=csv_path)]) + save_dataset_to_folder(dataset, folder_path=folder) + + copied_path = os.path.join(folder, "data.csv") + assert os.path.exists(copied_path) + with open(copied_path, encoding="utf-8") as f: + assert f.read() == csv_content diff --git a/dataset/fairspec_dataset/plugins/folder/plugin.py b/dataset/fairspec_dataset/plugins/folder/plugin.py new file mode 100644 index 0000000..7ac67cd --- /dev/null +++ b/dataset/fairspec_dataset/plugins/folder/plugin.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +import os +from typing import TYPE_CHECKING, Unpack + +from fairspec_metadata import get_is_remote_path + +from fairspec_dataset.models.dataset import SaveDatasetResult +from fairspec_dataset.plugin import DatasetPlugin +from .actions.dataset.load import load_dataset_from_folder +from .actions.dataset.save import save_dataset_to_folder + +if TYPE_CHECKING: + from fairspec_metadata import Dataset + from fairspec_metadata import Descriptor + + from fairspec_dataset.models.dataset import SaveDatasetOptions + + +class FolderPlugin(DatasetPlugin): + def load_dataset(self, source: str) -> Descriptor | None: + if not _get_is_folder(source): + return None + dataset = load_dataset_from_folder(source) + return dataset.model_dump(by_alias=True, exclude_none=True) + + def save_dataset( + self, dataset: Dataset, **options: Unpack[SaveDatasetOptions] + ) -> SaveDatasetResult | None: + target = options["target"] + if not _get_is_folder(target): + return None + save_dataset_to_folder( + dataset, folder_path=target, with_remote=bool(options.get("with_remote")) + ) + return SaveDatasetResult(path=target) + + +def _get_is_folder(path: str) -> bool: + if get_is_remote_path(path): + return False + try: + return os.path.isdir(path) + except Exception: + return False diff --git a/dataset/fairspec_dataset/plugins/folder/plugin_spec.py b/dataset/fairspec_dataset/plugins/folder/plugin_spec.py new file mode 100644 index 0000000..918d218 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/folder/plugin_spec.py @@ -0,0 +1,114 @@ +from unittest.mock import MagicMock, patch + +from fairspec_metadata import Dataset +from fairspec_metadata import Resource + +from .plugin import FolderPlugin + + +class TestLoadDataset: + def setup_method(self): + self.plugin = FolderPlugin() + + @patch("fairspec_dataset.plugins.folder.plugin.os.path.isdir", return_value=True) + @patch("fairspec_dataset.plugins.folder.plugin.load_dataset_from_folder") + def test_loads_from_local_directory( + self, mock_load: MagicMock, _mock_isdir: MagicMock + ): + mock_dataset = MagicMock() + mock_dataset.model_dump.return_value = { + "resources": [{"name": "test", "data": []}] + } + mock_load.return_value = mock_dataset + + result = self.plugin.load_dataset(".") + + mock_load.assert_called_once_with(".") + assert result == {"resources": [{"name": "test", "data": []}]} + + @patch("fairspec_dataset.plugins.folder.plugin.load_dataset_from_folder") + def test_returns_none_for_http(self, mock_load: MagicMock): + result = self.plugin.load_dataset("http://example.com/data") + + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.folder.plugin.load_dataset_from_folder") + def test_returns_none_for_https(self, mock_load: MagicMock): + result = self.plugin.load_dataset("https://example.com/data") + + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.folder.plugin.load_dataset_from_folder") + def test_returns_none_for_ftp(self, mock_load: MagicMock): + result = self.plugin.load_dataset("ftp://example.com/data") + + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.folder.plugin.load_dataset_from_folder") + def test_returns_none_for_github(self, mock_load: MagicMock): + result = self.plugin.load_dataset("https://github.com/owner/repo/data") + + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.folder.plugin.load_dataset_from_folder") + def test_returns_none_for_zenodo(self, mock_load: MagicMock): + result = self.plugin.load_dataset("https://zenodo.org/record/123") + + mock_load.assert_not_called() + assert result is None + + +class TestSaveDataset: + def setup_method(self): + self.plugin = FolderPlugin() + self.dataset = Dataset(resources=[Resource(name="test", data=[])]) + + @patch("fairspec_dataset.plugins.folder.plugin.os.path.isdir", return_value=True) + @patch("fairspec_dataset.plugins.folder.plugin.save_dataset_to_folder") + def test_saves_to_local_directory( + self, mock_save: MagicMock, _mock_isdir: MagicMock + ): + result = self.plugin.save_dataset(self.dataset, target="/tmp/test") + + mock_save.assert_called_once_with( + self.dataset, folder_path="/tmp/test", with_remote=False + ) + assert result is not None + assert result.path == "/tmp/test" + + @patch("fairspec_dataset.plugins.folder.plugin.os.path.isdir", return_value=True) + @patch("fairspec_dataset.plugins.folder.plugin.save_dataset_to_folder") + def test_saves_with_remote_option( + self, mock_save: MagicMock, _mock_isdir: MagicMock + ): + result = self.plugin.save_dataset( + self.dataset, target="/tmp/test", with_remote=True + ) + + mock_save.assert_called_once_with( + self.dataset, folder_path="/tmp/test", with_remote=True + ) + assert result is not None + assert result.path == "/tmp/test" + + @patch("fairspec_dataset.plugins.folder.plugin.save_dataset_to_folder") + def test_returns_none_for_http(self, mock_save: MagicMock): + result = self.plugin.save_dataset( + self.dataset, target="http://example.com/data" + ) + + mock_save.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.folder.plugin.save_dataset_to_folder") + def test_returns_none_for_https(self, mock_save: MagicMock): + result = self.plugin.save_dataset( + self.dataset, target="https://example.com/data" + ) + + mock_save.assert_not_called() + assert result is None diff --git a/dataset/fairspec_dataset/plugins/github/__init__.py b/dataset/fairspec_dataset/plugins/github/__init__.py new file mode 100644 index 0000000..0c9d5bb --- /dev/null +++ b/dataset/fairspec_dataset/plugins/github/__init__.py @@ -0,0 +1,9 @@ +from .actions.dataset.load import load_dataset_from_github +from .actions.dataset.save import save_dataset_to_github +from .plugin import GithubPlugin + +__all__ = [ + "GithubPlugin", + "load_dataset_from_github", + "save_dataset_to_github", +] diff --git a/dataset/fairspec_dataset/plugins/github/actions/__init__.py b/dataset/fairspec_dataset/plugins/github/actions/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/github/actions/__init__.py @@ -0,0 +1 @@ + diff --git a/dataset/fairspec_dataset/plugins/github/actions/dataset/__init__.py b/dataset/fairspec_dataset/plugins/github/actions/dataset/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/github/actions/dataset/__init__.py @@ -0,0 +1 @@ + diff --git a/dataset/fairspec_dataset/plugins/github/actions/dataset/from_github.py b/dataset/fairspec_dataset/plugins/github/actions/dataset/from_github.py new file mode 100644 index 0000000..3beffa6 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/github/actions/dataset/from_github.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata import ( + ContributorType, + CreatorNameType, + DateType, + DescriptionType, +) +from fairspec_metadata import Contributor +from fairspec_metadata import Creator +from fairspec_metadata import DataciteDate +from fairspec_metadata import DataciteDescription +from fairspec_metadata import Rights +from fairspec_metadata import Subject +from fairspec_metadata import Title +from fairspec_metadata import Dataset + +from fairspec_dataset.plugins.github.actions.resource.from_github import ( + convert_resource_from_github, +) + +if TYPE_CHECKING: + from fairspec_dataset.plugins.github.models.repository import GithubRepository + + +def convert_dataset_from_github(repository: GithubRepository) -> Dataset: + titles = [Title(title=repository.full_name)] if repository.full_name else None + + descriptions = ( + [ + DataciteDescription( + description=repository.description, + descriptionType=DescriptionType.Abstract, + ) + ] + if repository.description + else None + ) + + rights_list = None + license_info = repository.license + if license_info: + rights_list = [ + Rights( + rights=license_info.name or "", + rightsUri=license_info.url, + rightsIdentifier=license_info.spdx_id or license_info.key, + rightsIdentifierScheme="SPDX", + ) + ] + + creators = None + contributors = None + owner = repository.owner + if owner: + if owner.type == "Organization": + contributors = [ + Contributor( + name=owner.login or "", + nameType=CreatorNameType.Organizational, + contributorType=ContributorType.HostingInstitution, + ) + ] + else: + creators = [ + Creator( + name=owner.login or "", + nameType=CreatorNameType.Personal, + ) + ] + + files = repository.files or [] + resource_list = [] + if files: + default_branch = repository.default_branch or "main" + resource_list = [ + convert_resource_from_github(f, default_branch=default_branch) + for f in files + if not (f.path or "").startswith(".") and f.type == "blob" + ] + + topics = repository.topics or [] + subjects = [Subject(subject=topic) for topic in topics] if topics else None + + dates = ( + [DataciteDate(date=repository.created_at, dateType=DateType.Created)] + if repository.created_at + else None + ) + + return Dataset( + titles=titles, + descriptions=descriptions, + rightsList=rights_list, + creators=creators, + contributors=contributors, + resources=resource_list, + subjects=subjects, + dates=dates, + ) diff --git a/dataset/fairspec_dataset/plugins/github/actions/dataset/load.py b/dataset/fairspec_dataset/plugins/github/actions/dataset/load.py new file mode 100644 index 0000000..08fa1cc --- /dev/null +++ b/dataset/fairspec_dataset/plugins/github/actions/dataset/load.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +import urllib.parse +from typing import TYPE_CHECKING + +from fairspec_dataset.actions.dataset.merge import merge_datasets +from fairspec_dataset.plugins.github.models.repository import GithubRepository + +from fairspec_dataset.plugins.github.services.github import make_github_api_request +from .from_github import convert_dataset_from_github + +if TYPE_CHECKING: + from fairspec_metadata import Descriptor + + +def load_dataset_from_github( + repo_url: str, + *, + api_key: str | None = None, +) -> Descriptor: + owner, repo = _extract_repository_info(repo_url) + if not owner or not repo: + raise Exception(f"Failed to extract repository info from URL: {repo_url}") + + repository = make_github_api_request( + endpoint=f"/repos/{owner}/{repo}", + api_key=api_key, + ) + + ref = repository["default_branch"] + tree_response = make_github_api_request( + endpoint=f"/repos/{owner}/{repo}/git/trees/{ref}?recursive=1", + api_key=api_key, + ) + repository["files"] = tree_response["tree"] + + system_dataset = convert_dataset_from_github(GithubRepository(**repository)) + user_dataset_path: str | None = None + for resource in system_dataset.resources or []: + custom = resource.unstable_customMetadata or {} + if custom.get("githubKey") == "dataset.json": + user_dataset_path = custom.get("githubUrl") + break + + dataset = merge_datasets( + system_dataset=system_dataset, + user_dataset_path=user_dataset_path, + ) + + for resource in dataset.resources or []: + resource.unstable_customMetadata = None + + return dataset.model_dump(by_alias=True, exclude_none=True) + + +def _extract_repository_info(repo_url: str) -> tuple[str | None, str | None]: + parsed = urllib.parse.urlparse(repo_url) + parts = [p for p in parsed.path.split("/") if p] + if len(parts) >= 2: + return parts[0], parts[1] + return None, None diff --git a/dataset/fairspec_dataset/plugins/github/actions/dataset/save.py b/dataset/fairspec_dataset/plugins/github/actions/dataset/save.py new file mode 100644 index 0000000..4854a00 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/github/actions/dataset/save.py @@ -0,0 +1,96 @@ +from __future__ import annotations + +import base64 +from collections.abc import Callable +from typing import TYPE_CHECKING + +from fairspec_metadata import denormalize_dataset, stringify_descriptor + +from fairspec_dataset.actions.dataset.basepath import get_dataset_basepath +from fairspec_dataset.actions.resource.save import SaveFileProps, save_resource_files +from fairspec_dataset.actions.stream.load import load_file_stream + +from fairspec_dataset.plugins.github.services.github import make_github_api_request + +if TYPE_CHECKING: + from fairspec_metadata import Dataset + from fairspec_metadata import Descriptor + from fairspec_metadata import Resource + + +def save_dataset_to_github( + dataset: Dataset, + *, + api_key: str, + repo: str, + org: str | None = None, +) -> dict: + basepath = get_dataset_basepath(dataset) + + endpoint = f"/orgs/{org}/repos" if org else "/user/repos" + github_repository = make_github_api_request( + endpoint=endpoint, + payload={"name": repo, "auto_init": True}, + method="POST", + api_key=api_key, + ) + + owner_login = github_repository["owner"]["login"] + + resource_descriptors: list[Descriptor] = [] + for resource in dataset.resources or []: + + def _make_save_file(res: Resource) -> Callable[[SaveFileProps], str]: + def _save_file(props: SaveFileProps) -> str: + stream = load_file_stream(props.normalized_path) + content = base64.b64encode(stream.read()).decode() + + payload = { + "path": props.denormalized_path, + "content": content, + "message": f'Added file "{props.denormalized_path}"', + } + + make_github_api_request( + endpoint=f"/repos/{owner_login}/{repo}/contents/{props.denormalized_path}", + method="PUT", + payload=payload, + api_key=api_key, + ) + + return props.denormalized_path + + return _save_file + + resource_descriptors.append( + save_resource_files( + resource, + basepath=basepath, + with_remote=False, + save_file=_make_save_file(resource), + ) + ) + + denormalized = denormalize_dataset(dataset, basepath=basepath) + descriptor: Descriptor = { + **denormalized.model_dump(by_alias=True, exclude_none=True), + "resources": resource_descriptors, + } + + content = base64.b64encode(stringify_descriptor(descriptor).encode()).decode() + + make_github_api_request( + endpoint=f"/repos/{owner_login}/{repo}/contents/dataset.json", + method="PUT", + payload={ + "path": "dataset.json", + "message": 'Added file "dataset.json"', + "content": content, + }, + api_key=api_key, + ) + + return { + "path": f"https://raw.githubusercontent.com/{owner_login}/{repo}/refs/heads/main/dataset.json", + "repo_url": github_repository["html_url"], + } diff --git a/dataset/fairspec_dataset/plugins/github/actions/resource/__init__.py b/dataset/fairspec_dataset/plugins/github/actions/resource/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/github/actions/resource/__init__.py @@ -0,0 +1 @@ + diff --git a/dataset/fairspec_dataset/plugins/github/actions/resource/from_github.py b/dataset/fairspec_dataset/plugins/github/actions/resource/from_github.py new file mode 100644 index 0000000..8e6733f --- /dev/null +++ b/dataset/fairspec_dataset/plugins/github/actions/resource/from_github.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +import urllib.parse +from typing import TYPE_CHECKING + +from fairspec_metadata import get_file_name_slug +from fairspec_metadata import Integrity, IntegrityType +from fairspec_metadata import Resource + +if TYPE_CHECKING: + from fairspec_dataset.plugins.github.models.file import GithubFile + + +def convert_resource_from_github( + github_file: GithubFile, + *, + default_branch: str, +) -> Resource: + path = _convert_path( + url=github_file.url or "", + ref=default_branch, + file_path=github_file.path or "", + ) + + return Resource( + data=path, + name=get_file_name_slug(path) or github_file.sha or "", + integrity=Integrity( + type=IntegrityType.sha1, + hash=github_file.sha or "", + ), + unstable_customMetadata={ + "githubKey": github_file.path, + "githubUrl": path, + }, + ) + + +def _convert_path(*, url: str, ref: str, file_path: str) -> str: + parsed = urllib.parse.urlparse(url) + parts = parsed.path.split("/")[2:] + if len(parts) >= 2: + owner, repo = parts[0], parts[1] + else: + owner, repo = "", "" + return ( + f"https://raw.githubusercontent.com/{owner}/{repo}/refs/heads/{ref}/{file_path}" + ) diff --git a/dataset/fairspec_dataset/plugins/github/models/__init__.py b/dataset/fairspec_dataset/plugins/github/models/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/github/models/__init__.py @@ -0,0 +1 @@ + diff --git a/dataset/fairspec_dataset/plugins/github/models/file.py b/dataset/fairspec_dataset/plugins/github/models/file.py new file mode 100644 index 0000000..7e80c1e --- /dev/null +++ b/dataset/fairspec_dataset/plugins/github/models/file.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from fairspec_metadata.models.base import FairspecModel + + +class GithubFile(FairspecModel): + path: str | None = None + mode: str | None = None + type: str | None = None + size: int | None = None + sha: str | None = None + url: str | None = None diff --git a/dataset/fairspec_dataset/plugins/github/models/license.py b/dataset/fairspec_dataset/plugins/github/models/license.py new file mode 100644 index 0000000..a6dc329 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/github/models/license.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +from fairspec_metadata.models.base import FairspecModel + + +class GithubLicense(FairspecModel): + key: str | None = None + name: str | None = None + spdx_id: str | None = None + url: str | None = None diff --git a/dataset/fairspec_dataset/plugins/github/models/owner.py b/dataset/fairspec_dataset/plugins/github/models/owner.py new file mode 100644 index 0000000..865e378 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/github/models/owner.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from fairspec_metadata.models.base import FairspecModel + + +class GithubOwner(FairspecModel): + login: str | None = None + id: int | None = None + avatar_url: str | None = None + html_url: str | None = None + type: str | None = None diff --git a/dataset/fairspec_dataset/plugins/github/models/repository.py b/dataset/fairspec_dataset/plugins/github/models/repository.py new file mode 100644 index 0000000..5d3cad1 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/github/models/repository.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +from fairspec_metadata.models.base import FairspecModel + +from .file import GithubFile +from .license import GithubLicense +from .owner import GithubOwner + + +class GithubRepository(FairspecModel): + id: int | None = None + name: str | None = None + full_name: str | None = None + owner: GithubOwner | None = None + description: str | None = None + created_at: str | None = None + updated_at: str | None = None + homepage: str | None = None + size: int | None = None + stargazers_count: int | None = None + watchers_count: int | None = None + language: str | None = None + license: GithubLicense | None = None + default_branch: str | None = None + topics: list[str] | None = None + private: bool | None = None + archived: bool | None = None + html_url: str | None = None + git_url: str | None = None + ssh_url: str | None = None + clone_url: str | None = None + files: list[GithubFile] | None = None diff --git a/dataset/fairspec_dataset/plugins/github/plugin.py b/dataset/fairspec_dataset/plugins/github/plugin.py new file mode 100644 index 0000000..9c9d5db --- /dev/null +++ b/dataset/fairspec_dataset/plugins/github/plugin.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import urllib.parse +from typing import TYPE_CHECKING + +from fairspec_metadata import get_is_remote_path + +from fairspec_dataset.plugin import DatasetPlugin +from .actions.dataset.load import load_dataset_from_github + +if TYPE_CHECKING: + from fairspec_metadata import Descriptor + + +class GithubPlugin(DatasetPlugin): + def load_dataset(self, source: str) -> Descriptor | None: + if not _get_is_github(source): + return None + return load_dataset_from_github(source) + + +def _get_is_github(path: str) -> bool: + if not get_is_remote_path(path): + return False + return urllib.parse.urlparse(path).hostname == "github.com" diff --git a/dataset/fairspec_dataset/plugins/github/plugin_spec.py b/dataset/fairspec_dataset/plugins/github/plugin_spec.py new file mode 100644 index 0000000..defa991 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/github/plugin_spec.py @@ -0,0 +1,59 @@ +from unittest.mock import MagicMock, patch + +from .plugin import GithubPlugin + + +class TestLoadDataset: + def setup_method(self): + self.plugin = GithubPlugin() + + @patch("fairspec_dataset.plugins.github.plugin.load_dataset_from_github") + def test_loads_from_github_url(self, mock_load: MagicMock): + mock_load.return_value = {"resources": []} + result = self.plugin.load_dataset("https://github.com/owner/repo") + mock_load.assert_called_once_with("https://github.com/owner/repo") + assert result == {"resources": []} + + @patch("fairspec_dataset.plugins.github.plugin.load_dataset_from_github") + def test_returns_none_for_non_github_urls(self, mock_load: MagicMock): + result = self.plugin.load_dataset("https://example.com/data") + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.github.plugin.load_dataset_from_github") + def test_returns_none_for_local_paths(self, mock_load: MagicMock): + result = self.plugin.load_dataset("/tmp/data") + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.github.plugin.load_dataset_from_github") + def test_returns_none_for_zenodo_urls(self, mock_load: MagicMock): + result = self.plugin.load_dataset("https://zenodo.org/records/123456") + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.github.plugin.load_dataset_from_github") + def test_handles_github_urls_with_paths(self, mock_load: MagicMock): + mock_load.return_value = {"resources": []} + result = self.plugin.load_dataset("https://github.com/owner/repo/tree/main") + mock_load.assert_called_once() + assert result == {"resources": []} + + @patch("fairspec_dataset.plugins.github.plugin.load_dataset_from_github") + def test_handles_github_urls_with_query_params(self, mock_load: MagicMock): + mock_load.return_value = {"resources": []} + result = self.plugin.load_dataset("https://github.com/owner/repo?tab=code") + mock_load.assert_called_once() + assert result == {"resources": []} + + @patch("fairspec_dataset.plugins.github.plugin.load_dataset_from_github") + def test_returns_none_for_http_non_github_urls(self, mock_load: MagicMock): + result = self.plugin.load_dataset("http://example.com/data") + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.github.plugin.load_dataset_from_github") + def test_returns_none_for_gitlab_urls(self, mock_load: MagicMock): + result = self.plugin.load_dataset("https://gitlab.com/owner/repo") + mock_load.assert_not_called() + assert result is None diff --git a/dataset/fairspec_dataset/plugins/github/services/__init__.py b/dataset/fairspec_dataset/plugins/github/services/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/github/services/__init__.py @@ -0,0 +1 @@ + diff --git a/dataset/fairspec_dataset/plugins/github/services/github.py b/dataset/fairspec_dataset/plugins/github/services/github.py new file mode 100644 index 0000000..6c775e8 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/github/services/github.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +import json +import urllib.request +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from fairspec_metadata import Descriptor + + +def make_github_api_request( + *, + endpoint: str, + method: str = "GET", + payload: Descriptor | None = None, + api_key: str | None = None, +) -> dict: + base_url = "https://api.github.com" + url = f"{base_url}{endpoint}" + + headers: dict[str, str] = {} + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + + data: bytes | None = None + if payload: + headers["Content-Type"] = "application/json" + data = json.dumps(payload).encode() + + request = urllib.request.Request(url, data=data, headers=headers, method=method) + + with urllib.request.urlopen(request) as response: + return json.loads(response.read().decode()) diff --git a/dataset/fairspec_dataset/plugins/zenodo/__init__.py b/dataset/fairspec_dataset/plugins/zenodo/__init__.py new file mode 100644 index 0000000..4f3c0ed --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zenodo/__init__.py @@ -0,0 +1,9 @@ +from .actions.dataset.load import load_dataset_from_zenodo +from .actions.dataset.save import save_dataset_to_zenodo +from .plugin import ZenodoPlugin + +__all__ = [ + "ZenodoPlugin", + "load_dataset_from_zenodo", + "save_dataset_to_zenodo", +] diff --git a/dataset/fairspec_dataset/plugins/zenodo/actions/__init__.py b/dataset/fairspec_dataset/plugins/zenodo/actions/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zenodo/actions/__init__.py @@ -0,0 +1 @@ + diff --git a/dataset/fairspec_dataset/plugins/zenodo/actions/dataset/__init__.py b/dataset/fairspec_dataset/plugins/zenodo/actions/dataset/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zenodo/actions/dataset/__init__.py @@ -0,0 +1 @@ + diff --git a/dataset/fairspec_dataset/plugins/zenodo/actions/dataset/from_zenodo.py b/dataset/fairspec_dataset/plugins/zenodo/actions/dataset/from_zenodo.py new file mode 100644 index 0000000..529e6c2 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zenodo/actions/dataset/from_zenodo.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata import CreatorNameType, DateType, DescriptionType +from fairspec_metadata import Creator, CreatorAffiliation +from fairspec_metadata import DataciteDate +from fairspec_metadata import DataciteDescription +from fairspec_metadata import Rights +from fairspec_metadata import Subject +from fairspec_metadata import Title +from fairspec_metadata import Dataset + +from fairspec_dataset.plugins.zenodo.actions.resource.from_zenodo import ( + convert_resource_from_zenodo, +) + +if TYPE_CHECKING: + from fairspec_dataset.plugins.zenodo.models.record import ZenodoRecord + + +def convert_dataset_from_zenodo(zenodo_record: ZenodoRecord) -> Dataset: + metadata = zenodo_record.metadata + + titles = [Title(title=metadata.title)] if metadata and metadata.title else None + + descriptions = ( + [ + DataciteDescription( + description=metadata.description, + descriptionType=DescriptionType.Abstract, + ) + ] + if metadata and metadata.description + else None + ) + + creators_raw = metadata.creators or [] if metadata else [] + creators = None + if creators_raw: + creators = [] + for creator_data in creators_raw: + affiliation = ( + [CreatorAffiliation(name=creator_data.affiliation)] + if creator_data.affiliation + else None + ) + creators.append( + Creator( + name=creator_data.name or "", + nameType=CreatorNameType.Personal, + affiliation=affiliation, + ) + ) + + keywords = metadata.keywords or [] if metadata else [] + subjects = [Subject(subject=kw) for kw in keywords] if keywords else None + + dates = ( + [DataciteDate(date=metadata.publication_date, dateType=DateType.Issued)] + if metadata and metadata.publication_date + else None + ) + + rights_list = ( + [Rights(rights=metadata.license)] if metadata and metadata.license else None + ) + + doi = metadata.doi if metadata else None + version = metadata.version if metadata else None + + files = zenodo_record.files or [] + resource_list = [convert_resource_from_zenodo(f) for f in files] if files else [] + + return Dataset( + titles=titles, + descriptions=descriptions, + creators=creators, + subjects=subjects, + dates=dates, + rightsList=rights_list, + doi=doi, + version=version, + resources=resource_list, + ) diff --git a/dataset/fairspec_dataset/plugins/zenodo/actions/dataset/load.py b/dataset/fairspec_dataset/plugins/zenodo/actions/dataset/load.py new file mode 100644 index 0000000..6a3733b --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zenodo/actions/dataset/load.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +import urllib.parse +from typing import TYPE_CHECKING + +from fairspec_dataset.actions.dataset.merge import merge_datasets +from fairspec_dataset.plugins.zenodo.models.record import ZenodoRecord + +from fairspec_dataset.plugins.zenodo.services.zenodo import make_zenodo_api_request +from .from_zenodo import convert_dataset_from_zenodo + +if TYPE_CHECKING: + from fairspec_metadata import Descriptor + + +def load_dataset_from_zenodo( + dataset_url: str, + *, + api_key: str | None = None, +) -> Descriptor: + parsed = urllib.parse.urlparse(dataset_url) + sandbox = parsed.hostname == "sandbox.zenodo.org" + + record_id = _extract_record_id(dataset_url) + if not record_id: + raise Exception(f"Failed to extract record ID from URL: {dataset_url}") + + zenodo_record = make_zenodo_api_request( + endpoint=f"/records/{record_id}", + api_key=api_key, + sandbox=sandbox, + ) + + system_dataset = convert_dataset_from_zenodo(ZenodoRecord(**zenodo_record)) + user_dataset_path: str | None = None + for resource in system_dataset.resources or []: + custom = resource.unstable_customMetadata or {} + if custom.get("zenodoKey") == "dataset.json": + user_dataset_path = custom.get("zenodoUrl") + break + + dataset = merge_datasets( + system_dataset=system_dataset, + user_dataset_path=user_dataset_path, + ) + + for resource in dataset.resources or []: + resource.unstable_customMetadata = None + + return dataset.model_dump(by_alias=True, exclude_none=True) + + +def _extract_record_id(dataset_url: str) -> str | None: + parsed = urllib.parse.urlparse(dataset_url) + parts = [p for p in parsed.path.split("/") if p] + return parts[-1] if parts else None diff --git a/dataset/fairspec_dataset/plugins/zenodo/actions/dataset/save.py b/dataset/fairspec_dataset/plugins/zenodo/actions/dataset/save.py new file mode 100644 index 0000000..0866545 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zenodo/actions/dataset/save.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +import urllib.parse +from typing import TYPE_CHECKING + +from fairspec_metadata import denormalize_dataset, stringify_descriptor + +from fairspec_dataset.actions.dataset.basepath import get_dataset_basepath +from fairspec_dataset.actions.resource.save import ( + SaveFileCallback, + SaveFileProps, + save_resource_files, +) +from fairspec_dataset.actions.stream.load import load_file_stream + +from fairspec_dataset.plugins.zenodo.services.zenodo import make_zenodo_api_request +from .to_zenodo import convert_dataset_to_zenodo + +if TYPE_CHECKING: + from fairspec_metadata import Dataset + from fairspec_metadata import Descriptor + from fairspec_metadata import Resource + + +def save_dataset_to_zenodo( + dataset: Dataset, + *, + api_key: str, + sandbox: bool = False, +) -> dict: + basepath = get_dataset_basepath(dataset) + + new_zenodo_record = convert_dataset_to_zenodo(dataset) + zenodo_record = make_zenodo_api_request( + payload=new_zenodo_record, + endpoint="/deposit/depositions", + method="POST", + api_key=api_key, + sandbox=sandbox, + ) + + record_id = zenodo_record["id"] + + resource_descriptors: list[Descriptor] = [] + for resource in dataset.resources or []: + + def _make_save_file(res: Resource) -> SaveFileCallback: + def _save_file(props: SaveFileProps) -> str: + stream = load_file_stream(props.normalized_path) + file_data = stream.read() + + make_zenodo_api_request( + endpoint=f"/deposit/depositions/{record_id}/files", + method="POST", + upload=(props.denormalized_path, file_data), + api_key=api_key, + sandbox=sandbox, + ) + + return props.denormalized_path + + return _save_file + + resource_descriptors.append( + save_resource_files( + resource, + basepath=basepath, + with_remote=False, + without_folders=True, + save_file=_make_save_file(resource), + ) + ) + + denormalized = denormalize_dataset(dataset, basepath=basepath) + descriptor: Descriptor = { + **denormalized.model_dump(by_alias=True, exclude_none=True), + "resources": resource_descriptors, + } + + descriptor_bytes = stringify_descriptor(descriptor).encode() + + make_zenodo_api_request( + endpoint=f"/deposit/depositions/{record_id}/files", + method="POST", + upload=("dataset.json", descriptor_bytes), + api_key=api_key, + sandbox=sandbox, + ) + + html_url = zenodo_record["links"]["html"] + parsed = urllib.parse.urlparse(html_url) + origin = f"{parsed.scheme}://{parsed.netloc}" + + return { + "path": f"{origin}/records/{record_id}/files/dataset.json", + "dataset_url": f"{origin}/uploads/{record_id}", + } diff --git a/dataset/fairspec_dataset/plugins/zenodo/actions/dataset/to_zenodo.py b/dataset/fairspec_dataset/plugins/zenodo/actions/dataset/to_zenodo.py new file mode 100644 index 0000000..f768221 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zenodo/actions/dataset/to_zenodo.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from fairspec_metadata import Dataset + + +def convert_dataset_to_zenodo(dataset: Dataset) -> dict: + metadata: dict = {"upload_type": "dataset"} + + titles = dataset.titles or [] + if titles and titles[0].title: + metadata["title"] = titles[0].title + + descriptions = dataset.descriptions or [] + if descriptions and descriptions[0].description: + metadata["description"] = descriptions[0].description + elif titles and titles[0].title: + metadata["description"] = titles[0].title + else: + metadata["description"] = "Dataset created with fairspec" + + if dataset.version: + metadata["version"] = dataset.version + + rights_list = dataset.rightsList or [] + if rights_list: + rights = rights_list[0] + if rights.rights: + metadata["license"] = rights.rights + + creators = dataset.creators or [] + if creators: + metadata["creators"] = [] + for creator in creators: + zenodo_creator: dict = {"name": creator.name} + affiliations = creator.affiliation or [] + if affiliations and affiliations[0].name: + zenodo_creator["affiliation"] = affiliations[0].name + metadata["creators"].append(zenodo_creator) + else: + metadata["creators"] = [ + {"name": "Unknown Author", "affiliation": "Unknown Affiliation"} + ] + + subjects = dataset.subjects or [] + if subjects: + metadata["keywords"] = [s.subject for s in subjects] + + dates = dataset.dates or [] + if dates: + issued = next((d for d in dates if d.dateType == "Issued"), None) + if issued: + metadata["publication_date"] = issued.date + + if dataset.doi: + metadata["doi"] = dataset.doi + + return {"metadata": metadata} diff --git a/dataset/fairspec_dataset/plugins/zenodo/actions/resource/__init__.py b/dataset/fairspec_dataset/plugins/zenodo/actions/resource/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zenodo/actions/resource/__init__.py @@ -0,0 +1 @@ + diff --git a/dataset/fairspec_dataset/plugins/zenodo/actions/resource/from_zenodo.py b/dataset/fairspec_dataset/plugins/zenodo/actions/resource/from_zenodo.py new file mode 100644 index 0000000..567035a --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zenodo/actions/resource/from_zenodo.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +import re +from typing import TYPE_CHECKING + +from fairspec_metadata import get_file_name_slug +from fairspec_metadata import Integrity, IntegrityType +from fairspec_metadata import Resource + +if TYPE_CHECKING: + from fairspec_dataset.plugins.zenodo.models.file import ZenodoFile + + +def convert_resource_from_zenodo(zenodo_file: ZenodoFile) -> Resource: + links = zenodo_file.links + path = _convert_path(links.self or "" if links else "") + + return Resource( + data=path, + name=get_file_name_slug(zenodo_file.key or "") or zenodo_file.id or "", + integrity=Integrity( + type=IntegrityType.md5, + hash=(zenodo_file.checksum or "").replace("md5:", ""), + ), + unstable_customMetadata={ + "zenodoKey": zenodo_file.key, + "zenodoUrl": path, + }, + ) + + +def _convert_path(link: str) -> str: + result = link.replace("/api/", "/") + result = re.sub(r"/content$", "", result) + return result diff --git a/dataset/fairspec_dataset/plugins/zenodo/actions/resource/to_zenodo.py b/dataset/fairspec_dataset/plugins/zenodo/actions/resource/to_zenodo.py new file mode 100644 index 0000000..b62749e --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zenodo/actions/resource/to_zenodo.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from fairspec_metadata import Descriptor + + +def convert_resource_to_zenodo(resource: Descriptor) -> dict: + zenodo_file: dict = {} + + if resource.get("name"): + zenodo_file["key"] = resource["name"] + + integrity = resource.get("integrity", {}) + if isinstance(integrity, dict) and integrity.get("type") == "md5": + zenodo_file["checksum"] = f"md5:{integrity['hash']}" + + return zenodo_file diff --git a/dataset/fairspec_dataset/plugins/zenodo/models/__init__.py b/dataset/fairspec_dataset/plugins/zenodo/models/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zenodo/models/__init__.py @@ -0,0 +1 @@ + diff --git a/dataset/fairspec_dataset/plugins/zenodo/models/creator.py b/dataset/fairspec_dataset/plugins/zenodo/models/creator.py new file mode 100644 index 0000000..5222f32 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zenodo/models/creator.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from fairspec_metadata.models.base import FairspecModel + + +class ZenodoCreator(FairspecModel): + name: str | None = None + affiliation: str | None = None + identifiers: list[dict] | None = None diff --git a/dataset/fairspec_dataset/plugins/zenodo/models/file.py b/dataset/fairspec_dataset/plugins/zenodo/models/file.py new file mode 100644 index 0000000..e3f710c --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zenodo/models/file.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from fairspec_metadata.models.base import FairspecModel + + +class ZenodoFileLinks(FairspecModel): + self: str | None = None + + +class ZenodoFile(FairspecModel): + id: str | None = None + key: str | None = None + size: int | None = None + checksum: str | None = None + links: ZenodoFileLinks | None = None diff --git a/dataset/fairspec_dataset/plugins/zenodo/models/link.py b/dataset/fairspec_dataset/plugins/zenodo/models/link.py new file mode 100644 index 0000000..10c1c94 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zenodo/models/link.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from fairspec_metadata.models.base import FairspecModel + + +class ZenodoLink(FairspecModel): + self: str | None = None + html: str | None = None + files: str | None = None + bucket: str | None = None + publish: str | None = None + discard: str | None = None + edit: str | None = None diff --git a/dataset/fairspec_dataset/plugins/zenodo/models/metadata.py b/dataset/fairspec_dataset/plugins/zenodo/models/metadata.py new file mode 100644 index 0000000..72b218f --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zenodo/models/metadata.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from fairspec_metadata.models.base import FairspecModel + +from .creator import ZenodoCreator + + +class ZenodoMetadata(FairspecModel): + title: str | None = None + description: str | None = None + upload_type: str | None = None + publication_date: str | None = None + creators: list[ZenodoCreator] | None = None + access_right: str | None = None + license: str | None = None + doi: str | None = None + keywords: list[str] | None = None + related_identifiers: list[dict] | None = None + communities: list[dict] | None = None + version: str | None = None diff --git a/dataset/fairspec_dataset/plugins/zenodo/models/record.py b/dataset/fairspec_dataset/plugins/zenodo/models/record.py new file mode 100644 index 0000000..b6506ea --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zenodo/models/record.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from fairspec_metadata.models.base import FairspecModel + +from .file import ZenodoFile +from .link import ZenodoLink +from .metadata import ZenodoMetadata + + +class ZenodoRecord(FairspecModel): + id: int | None = None + links: ZenodoLink | None = None + metadata: ZenodoMetadata | None = None + files: list[ZenodoFile] | None = None + state: str | None = None + submitted: bool | None = None diff --git a/dataset/fairspec_dataset/plugins/zenodo/plugin.py b/dataset/fairspec_dataset/plugins/zenodo/plugin.py new file mode 100644 index 0000000..8d2cfca --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zenodo/plugin.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +import urllib.parse +from typing import TYPE_CHECKING + +from fairspec_metadata import get_is_remote_path + +from fairspec_dataset.plugin import DatasetPlugin +from .actions.dataset.load import load_dataset_from_zenodo + +if TYPE_CHECKING: + from fairspec_metadata import Descriptor + + +class ZenodoPlugin(DatasetPlugin): + def load_dataset(self, source: str) -> Descriptor | None: + if not _get_is_zenodo(source): + return None + return load_dataset_from_zenodo(source) + + +def _get_is_zenodo(path: str) -> bool: + if not get_is_remote_path(path): + return False + hostname = urllib.parse.urlparse(path).hostname or "" + return hostname.endswith("zenodo.org") diff --git a/dataset/fairspec_dataset/plugins/zenodo/plugin_spec.py b/dataset/fairspec_dataset/plugins/zenodo/plugin_spec.py new file mode 100644 index 0000000..84e1d8c --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zenodo/plugin_spec.py @@ -0,0 +1,60 @@ +from unittest.mock import MagicMock, patch + +from .plugin import ZenodoPlugin + + +class TestLoadDataset: + def setup_method(self): + self.plugin = ZenodoPlugin() + + @patch("fairspec_dataset.plugins.zenodo.plugin.load_dataset_from_zenodo") + def test_loads_from_zenodo_url(self, mock_load: MagicMock): + mock_load.return_value = {"resources": []} + result = self.plugin.load_dataset("https://zenodo.org/records/123456") + mock_load.assert_called_once_with("https://zenodo.org/records/123456") + assert result == {"resources": []} + + @patch("fairspec_dataset.plugins.zenodo.plugin.load_dataset_from_zenodo") + def test_returns_none_for_non_zenodo_urls(self, mock_load: MagicMock): + result = self.plugin.load_dataset("https://example.com/data") + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.zenodo.plugin.load_dataset_from_zenodo") + def test_returns_none_for_local_paths(self, mock_load: MagicMock): + result = self.plugin.load_dataset("/tmp/data") + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.zenodo.plugin.load_dataset_from_zenodo") + def test_returns_none_for_github_urls(self, mock_load: MagicMock): + result = self.plugin.load_dataset("https://github.com/owner/repo") + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.zenodo.plugin.load_dataset_from_zenodo") + def test_handles_sandbox_zenodo(self, mock_load: MagicMock): + mock_load.return_value = {"resources": []} + result = self.plugin.load_dataset("https://sandbox.zenodo.org/records/123456") + mock_load.assert_called_once() + assert result == {"resources": []} + + @patch("fairspec_dataset.plugins.zenodo.plugin.load_dataset_from_zenodo") + def test_handles_zenodo_urls_with_paths(self, mock_load: MagicMock): + mock_load.return_value = {"resources": []} + result = self.plugin.load_dataset("https://zenodo.org/records/123456/files") + mock_load.assert_called_once() + assert result == {"resources": []} + + @patch("fairspec_dataset.plugins.zenodo.plugin.load_dataset_from_zenodo") + def test_handles_zenodo_urls_with_query_params(self, mock_load: MagicMock): + mock_load.return_value = {"resources": []} + result = self.plugin.load_dataset("https://zenodo.org/records/123456?preview=1") + mock_load.assert_called_once() + assert result == {"resources": []} + + @patch("fairspec_dataset.plugins.zenodo.plugin.load_dataset_from_zenodo") + def test_returns_none_for_http_non_zenodo_urls(self, mock_load: MagicMock): + result = self.plugin.load_dataset("http://example.com/data") + mock_load.assert_not_called() + assert result is None diff --git a/dataset/fairspec_dataset/plugins/zenodo/services/__init__.py b/dataset/fairspec_dataset/plugins/zenodo/services/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zenodo/services/__init__.py @@ -0,0 +1 @@ + diff --git a/dataset/fairspec_dataset/plugins/zenodo/services/zenodo.py b/dataset/fairspec_dataset/plugins/zenodo/services/zenodo.py new file mode 100644 index 0000000..4019b90 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zenodo/services/zenodo.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +import json +import urllib.parse +import urllib.request + + +def make_zenodo_api_request( + *, + endpoint: str, + method: str = "GET", + payload: dict | None = None, + upload: tuple[str, bytes] | None = None, + api_key: str | None = None, + sandbox: bool = False, +) -> dict: + base_url = "https://sandbox.zenodo.org/api" if sandbox else "https://zenodo.org/api" + url = f"{base_url}{endpoint}" + + if api_key: + separator = "&" if "?" in url else "?" + url = f"{url}{separator}access_token={api_key}" + + headers: dict[str, str] = {} + data: bytes | None = None + + if upload: + file_name, file_data = upload + boundary = "----FormBoundary7MA4YWxkTrZu0gW" + headers["Content-Type"] = f"multipart/form-data; boundary={boundary}" + + body = b"" + body += f"--{boundary}\r\n".encode() + body += f'Content-Disposition: form-data; name="file"; filename="{file_name}"\r\n'.encode() + body += b"Content-Type: application/octet-stream\r\n\r\n" + body += file_data + body += f"\r\n--{boundary}--\r\n".encode() + data = body + elif payload is not None: + headers["Content-Type"] = "application/json" + data = json.dumps(payload).encode() + + request = urllib.request.Request(url, data=data, headers=headers, method=method) + + with urllib.request.urlopen(request) as response: + response_data = response.read().decode() + if not response_data: + return {} + return json.loads(response_data) diff --git a/dataset/fairspec_dataset/plugins/zip/__init__.py b/dataset/fairspec_dataset/plugins/zip/__init__.py new file mode 100644 index 0000000..4122ff7 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zip/__init__.py @@ -0,0 +1,9 @@ +from .actions.dataset.load import load_dataset_from_zip +from .actions.dataset.save import save_dataset_to_zip +from .plugin import ZipPlugin + +__all__ = [ + "ZipPlugin", + "load_dataset_from_zip", + "save_dataset_to_zip", +] diff --git a/dataset/fairspec_dataset/plugins/zip/actions/__init__.py b/dataset/fairspec_dataset/plugins/zip/actions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dataset/fairspec_dataset/plugins/zip/actions/dataset/__init__.py b/dataset/fairspec_dataset/plugins/zip/actions/dataset/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dataset/fairspec_dataset/plugins/zip/actions/dataset/load.py b/dataset/fairspec_dataset/plugins/zip/actions/dataset/load.py new file mode 100644 index 0000000..8676829 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zip/actions/dataset/load.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +import os +import zipfile + +from fairspec_metadata import load_dataset_descriptor +from fairspec_metadata import Dataset + +from fairspec_dataset.actions.folder.temp import get_temp_folder_path + + +def load_dataset_from_zip(archive_path: str) -> Dataset: + basepath = get_temp_folder_path() + + with zipfile.ZipFile(archive_path, "r") as zf: + zf.extractall(basepath) + + return load_dataset_descriptor(os.path.join(basepath, "dataset.json")) diff --git a/dataset/fairspec_dataset/plugins/zip/actions/dataset/load_spec.py b/dataset/fairspec_dataset/plugins/zip/actions/dataset/load_spec.py new file mode 100644 index 0000000..c5ffe4d --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zip/actions/dataset/load_spec.py @@ -0,0 +1,130 @@ +import pytest +from fairspec_metadata import DescriptionType +from fairspec_metadata import DataciteDescription +from fairspec_metadata import Title +from fairspec_metadata import Dataset +from fairspec_metadata import CsvFileDialect +from fairspec_metadata import Resource +from fairspec_metadata import IntegerColumnProperty +from fairspec_metadata import StringColumnProperty +from fairspec_metadata import TableSchema + +from fairspec_dataset.actions.file.temp import get_temp_file_path, write_temp_file +from .load import load_dataset_from_zip +from .save import save_dataset_to_zip + + +class TestLoadDatasetFromZip: + def test_loads_basic_dataset_from_zip(self): + path = get_temp_file_path(format="zip") + dataset = Dataset(resources=[Resource(name="test_res", data=[{"id": 1}])]) + save_dataset_to_zip(dataset, archive_path=path) + + result = load_dataset_from_zip(path) + + assert result.resources is not None + assert len(result.resources) == 1 + + def test_loads_dataset_with_metadata(self): + path = get_temp_file_path(format="zip") + dataset = Dataset( + titles=[Title(title="Test Dataset")], + descriptions=[ + DataciteDescription( + description="A test", descriptionType=DescriptionType.Abstract + ) + ], + version="1.0", + resources=[Resource(name="test_res", data=[{"id": 1}])], + ) + save_dataset_to_zip(dataset, archive_path=path) + + result = load_dataset_from_zip(path) + + assert result.titles is not None + assert result.titles[0].title == "Test Dataset" + assert result.descriptions is not None + assert result.descriptions[0].description == "A test" + assert result.version == "1.0" + + def test_loads_dataset_with_inline_data_resources(self): + path = get_temp_file_path(format="zip") + data = [{"id": 1, "name": "alice"}, {"id": 2, "name": "bob"}] + dataset = Dataset(resources=[Resource(name="test_res", data=data)]) + save_dataset_to_zip(dataset, archive_path=path) + + result = load_dataset_from_zip(path) + + assert result.resources is not None + assert result.resources[0].data == data + + def test_loads_dataset_with_file_resources(self): + csv_path = write_temp_file("id,name\n1,alice\n2,bob\n", format="csv") + path = get_temp_file_path(format="zip") + dataset = Dataset( + resources=[ + Resource( + name="test_res", + data=csv_path, + fileDialect=CsvFileDialect(), + ) + ] + ) + save_dataset_to_zip(dataset, archive_path=path) + + result = load_dataset_from_zip(path) + + assert result.resources is not None + assert result.resources[0].fileDialect is not None + + def test_loads_dataset_with_table_schema(self): + path = get_temp_file_path(format="zip") + dataset = Dataset( + resources=[ + Resource( + name="test_res", + data=[{"id": 1}], + tableSchema=TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ), + ) + ] + ) + save_dataset_to_zip(dataset, archive_path=path) + + result = load_dataset_from_zip(path) + + assert result.resources is not None + assert isinstance(result.resources[0].tableSchema, TableSchema) + assert result.resources[0].tableSchema.properties is not None + assert len(result.resources[0].tableSchema.properties) == 2 + + def test_loads_dataset_with_multiple_resources(self): + csv_path = write_temp_file("id,name\n1,alice\n", format="csv") + path = get_temp_file_path(format="zip") + dataset = Dataset( + resources=[ + Resource(name="file_res", data=csv_path), + Resource(name="inline_res", data=[{"id": 1}]), + ] + ) + save_dataset_to_zip(dataset, archive_path=path) + + result = load_dataset_from_zip(path) + + assert result.resources is not None + assert len(result.resources) == 2 + assert result.resources[0].name == "file_res" + assert result.resources[1].name == "inline_res" + + def test_throws_error_for_non_existent_zip_file(self): + with pytest.raises(Exception): + load_dataset_from_zip("/non/existent/path.zip") + + def test_throws_error_for_invalid_zip_file(self): + path = write_temp_file("not a zip file", format="zip") + with pytest.raises(Exception): + load_dataset_from_zip(path) diff --git a/dataset/fairspec_dataset/plugins/zip/actions/dataset/save.py b/dataset/fairspec_dataset/plugins/zip/actions/dataset/save.py new file mode 100644 index 0000000..6b2649a --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zip/actions/dataset/save.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +import zipfile +from typing import TYPE_CHECKING + +from fairspec_metadata import denormalize_dataset +from fairspec_metadata import stringify_descriptor + +from fairspec_dataset.actions.dataset.basepath import get_dataset_basepath +from fairspec_dataset.actions.file.path import assert_local_path_vacant +from fairspec_dataset.actions.resource.save import SaveFileProps, save_resource_files +from fairspec_dataset.actions.stream.load import load_file_stream + +if TYPE_CHECKING: + from fairspec_metadata import Dataset + from fairspec_metadata import Descriptor + + +def save_dataset_to_zip( + dataset: Dataset, + *, + archive_path: str, + with_remote: bool = False, +) -> None: + basepath = get_dataset_basepath(dataset) + + assert_local_path_vacant(archive_path) + files: dict[str, bytes] = {} + + resource_descriptors: list[Descriptor] = [] + for resource in dataset.resources or []: + resource_descriptors.append( + save_resource_files( + resource, + basepath=basepath, + with_remote=with_remote, + save_file=_make_save_file(files), + ) + ) + + denormalized = denormalize_dataset(dataset, basepath=basepath) + descriptor: Descriptor = { + **denormalized.model_dump(by_alias=True, exclude_none=True), + "resources": resource_descriptors, + } + + files["dataset.json"] = stringify_descriptor(descriptor).encode("utf-8") + + with zipfile.ZipFile(archive_path, "w", zipfile.ZIP_DEFLATED) as zf: + for name, data in files.items(): + zf.writestr(name, data) + + +def _make_save_file(files: dict[str, bytes]): + def save_file(props: SaveFileProps) -> str: + stream = load_file_stream(props.normalized_path) + files[props.denormalized_path] = stream.read() + return props.denormalized_path + + return save_file diff --git a/dataset/fairspec_dataset/plugins/zip/actions/dataset/save_spec.py b/dataset/fairspec_dataset/plugins/zip/actions/dataset/save_spec.py new file mode 100644 index 0000000..d246e47 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zip/actions/dataset/save_spec.py @@ -0,0 +1,237 @@ +import os + +import pytest +from fairspec_metadata import DescriptionType +from fairspec_metadata import DataciteDescription +from fairspec_metadata import Subject +from fairspec_metadata import Title +from fairspec_metadata import Dataset +from fairspec_metadata import CsvFileDialect +from fairspec_metadata import Resource +from fairspec_metadata import IntegerColumnProperty +from fairspec_metadata import StringColumnProperty +from fairspec_metadata import TableSchema + +from fairspec_dataset.actions.file.temp import get_temp_file_path, write_temp_file +from .load import load_dataset_from_zip +from .save import save_dataset_to_zip + + +class TestSaveDatasetToZip: + def test_saves_basic_dataset_to_zip(self): + path = get_temp_file_path(format="zip") + dataset = Dataset(resources=[Resource(name="test_res", data=[{"id": 1}])]) + + save_dataset_to_zip(dataset, archive_path=path) + + assert os.path.exists(path) + assert os.path.getsize(path) > 0 + + def test_saves_dataset_with_metadata(self): + path = get_temp_file_path(format="zip") + dataset = Dataset( + titles=[Title(title="Test Dataset")], + descriptions=[ + DataciteDescription( + description="A test", descriptionType=DescriptionType.Abstract + ) + ], + version="1.0", + resources=[Resource(name="test_res", data=[{"id": 1}])], + ) + + save_dataset_to_zip(dataset, archive_path=path) + + assert os.path.exists(path) + assert os.path.getsize(path) > 0 + + def test_saves_dataset_with_inline_data_resources(self): + path = get_temp_file_path(format="zip") + dataset = Dataset( + resources=[Resource(name="test_res", data=[{"id": 1}, {"id": 2}])] + ) + + save_dataset_to_zip(dataset, archive_path=path) + + assert os.path.exists(path) + + def test_saves_dataset_with_file_resources(self): + csv_path = write_temp_file("id,name\n1,alice\n2,bob\n", format="csv") + path = get_temp_file_path(format="zip") + dataset = Dataset(resources=[Resource(name="test_res", data=csv_path)]) + + save_dataset_to_zip(dataset, archive_path=path) + + assert os.path.exists(path) + + def test_saves_dataset_with_multiple_resources(self): + csv_path = write_temp_file("id,name\n1,alice\n", format="csv") + path = get_temp_file_path(format="zip") + dataset = Dataset( + resources=[ + Resource(name="file_res", data=csv_path), + Resource(name="inline_res", data=[{"id": 1}]), + ] + ) + + save_dataset_to_zip(dataset, archive_path=path) + + assert os.path.exists(path) + + def test_saves_dataset_with_table_schema(self): + path = get_temp_file_path(format="zip") + dataset = Dataset( + resources=[ + Resource( + name="test_res", + data=[{"id": 1}], + tableSchema=TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ), + ) + ] + ) + + save_dataset_to_zip(dataset, archive_path=path) + + assert os.path.exists(path) + + def test_saves_dataset_with_dialect(self): + csv_path = write_temp_file("id;name\n1;alice\n", format="csv") + path = get_temp_file_path(format="zip") + dataset = Dataset( + resources=[ + Resource( + name="test_res", + data=csv_path, + fileDialect=CsvFileDialect(delimiter=";"), + ) + ] + ) + + save_dataset_to_zip(dataset, archive_path=path) + + assert os.path.exists(path) + + def test_roundtrip_preserves_structure(self): + path = get_temp_file_path(format="zip") + dataset = Dataset( + titles=[Title(title="My Dataset")], + descriptions=[ + DataciteDescription( + description="Desc", descriptionType=DescriptionType.Abstract + ) + ], + resources=[Resource(name="test_res", data=[{"id": 1}])], + ) + save_dataset_to_zip(dataset, archive_path=path) + + result = load_dataset_from_zip(path) + + assert result.titles is not None + assert result.titles[0].title == "My Dataset" + assert result.descriptions is not None + assert result.descriptions[0].description == "Desc" + assert result.resources is not None + assert len(result.resources) == 1 + + def test_roundtrip_preserves_metadata(self): + path = get_temp_file_path(format="zip") + dataset = Dataset( + titles=[Title(title="My Dataset")], + version="2.0", + subjects=[Subject(subject="science")], + resources=[Resource(name="test_res", data=[{"id": 1}])], + ) + save_dataset_to_zip(dataset, archive_path=path) + + result = load_dataset_from_zip(path) + + assert result.titles is not None + assert result.titles[0].title == "My Dataset" + assert result.version == "2.0" + assert result.subjects is not None + assert result.subjects[0].subject == "science" + + def test_roundtrip_preserves_table_schema(self): + path = get_temp_file_path(format="zip") + dataset = Dataset( + resources=[ + Resource( + name="test_res", + data=[{"id": 1}], + tableSchema=TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ), + ) + ] + ) + save_dataset_to_zip(dataset, archive_path=path) + + result = load_dataset_from_zip(path) + + assert result.resources is not None + assert isinstance(result.resources[0].tableSchema, TableSchema) + assert result.resources[0].tableSchema.properties is not None + assert len(result.resources[0].tableSchema.properties) == 2 + + def test_roundtrip_preserves_file_resources(self): + csv_path = write_temp_file("id,name\n1,alice\n2,bob\n", format="csv") + path = get_temp_file_path(format="zip") + dataset = Dataset( + resources=[ + Resource( + name="test_res", + data=csv_path, + fileDialect=CsvFileDialect(), + ) + ] + ) + save_dataset_to_zip(dataset, archive_path=path) + + result = load_dataset_from_zip(path) + + assert result.resources is not None + assert result.resources[0].fileDialect is not None + + def test_throws_error_for_existing_file(self): + path = write_temp_file("existing content", format="zip") + dataset = Dataset(resources=[Resource(name="test_res", data=[{"id": 1}])]) + + with pytest.raises(FileExistsError): + save_dataset_to_zip(dataset, archive_path=path) + + def test_creates_valid_zip_structure(self): + path = get_temp_file_path(format="zip") + dataset = Dataset(resources=[Resource(name="test_res", data=[{"id": 1}])]) + save_dataset_to_zip(dataset, archive_path=path) + + result = load_dataset_from_zip(path) + + assert result.resources is not None + assert result.resources[0].name == "test_res" + + def test_roundtrip_with_multiple_file_resources(self): + csv1 = write_temp_file("id,name\n1,alice\n", filename="data1.csv") + csv2 = write_temp_file("id,name\n2,bob\n", filename="data2.csv") + path = get_temp_file_path(format="zip") + dataset = Dataset( + resources=[ + Resource(name="first_res", data=csv1), + Resource(name="second_res", data=csv2), + ] + ) + save_dataset_to_zip(dataset, archive_path=path) + + result = load_dataset_from_zip(path) + + assert result.resources is not None + assert len(result.resources) == 2 + assert result.resources[0].name == "first_res" + assert result.resources[1].name == "second_res" diff --git a/dataset/fairspec_dataset/plugins/zip/plugin.py b/dataset/fairspec_dataset/plugins/zip/plugin.py new file mode 100644 index 0000000..2ea2880 --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zip/plugin.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack + +from fairspec_metadata import get_file_extension + +from fairspec_dataset.models.dataset import SaveDatasetResult +from fairspec_dataset.plugin import DatasetPlugin +from .actions.dataset.load import load_dataset_from_zip +from .actions.dataset.save import save_dataset_to_zip + +if TYPE_CHECKING: + from fairspec_metadata import Dataset + from fairspec_metadata import Descriptor + + from fairspec_dataset.models.dataset import SaveDatasetOptions + + +class ZipPlugin(DatasetPlugin): + def load_dataset(self, source: str) -> Descriptor | None: + if not _get_is_zip(source): + return None + dataset = load_dataset_from_zip(source) + return dataset.model_dump(by_alias=True, exclude_none=True) + + def save_dataset( + self, dataset: Dataset, **options: Unpack[SaveDatasetOptions] + ) -> SaveDatasetResult | None: + target = options["target"] + if not _get_is_zip(target): + return None + save_dataset_to_zip( + dataset, archive_path=target, with_remote=bool(options.get("with_remote")) + ) + return SaveDatasetResult(path=None) + + +def _get_is_zip(path: str) -> bool: + return get_file_extension(path) == "zip" diff --git a/dataset/fairspec_dataset/plugins/zip/plugin_spec.py b/dataset/fairspec_dataset/plugins/zip/plugin_spec.py new file mode 100644 index 0000000..ad5577f --- /dev/null +++ b/dataset/fairspec_dataset/plugins/zip/plugin_spec.py @@ -0,0 +1,114 @@ +from unittest.mock import MagicMock, patch + +from fairspec_metadata import Dataset +from fairspec_metadata import Resource + +from .plugin import ZipPlugin + + +class TestLoadDataset: + def setup_method(self): + self.plugin = ZipPlugin() + + @patch("fairspec_dataset.plugins.zip.plugin.load_dataset_from_zip") + def test_loads_from_zip_file(self, mock_load: MagicMock): + mock_dataset = MagicMock() + mock_dataset.model_dump.return_value = { + "resources": [{"name": "test-resource", "data": []}] + } + mock_load.return_value = mock_dataset + + result = self.plugin.load_dataset("test.zip") + + mock_load.assert_called_once_with("test.zip") + assert result == {"resources": [{"name": "test-resource", "data": []}]} + + @patch("fairspec_dataset.plugins.zip.plugin.load_dataset_from_zip") + def test_returns_none_for_non_zip(self, mock_load: MagicMock): + result = self.plugin.load_dataset("test.json") + + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.zip.plugin.load_dataset_from_zip") + def test_handles_paths_with_directories(self, mock_load: MagicMock): + mock_dataset = MagicMock() + mock_dataset.model_dump.return_value = {"resources": []} + mock_load.return_value = mock_dataset + + result = self.plugin.load_dataset("/path/to/file.zip") + + mock_load.assert_called_once_with("/path/to/file.zip") + assert result == {"resources": []} + + @patch("fairspec_dataset.plugins.zip.plugin.load_dataset_from_zip") + def test_returns_none_for_no_extension(self, mock_load: MagicMock): + result = self.plugin.load_dataset("test") + + mock_load.assert_not_called() + assert result is None + + +class TestSaveDataset: + def setup_method(self): + self.plugin = ZipPlugin() + self.dataset = Dataset(resources=[Resource(name="test_resource", data=[])]) + + @patch("fairspec_dataset.plugins.zip.plugin.save_dataset_to_zip") + def test_saves_to_zip_file(self, mock_save: MagicMock): + result = self.plugin.save_dataset(self.dataset, target="output.zip") + + mock_save.assert_called_once_with( + self.dataset, archive_path="output.zip", with_remote=False + ) + assert result is not None + assert result.path is None + + @patch("fairspec_dataset.plugins.zip.plugin.save_dataset_to_zip") + def test_returns_none_for_non_zip(self, mock_save: MagicMock): + result = self.plugin.save_dataset(self.dataset, target="output.json") + + mock_save.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.zip.plugin.save_dataset_to_zip") + def test_passes_with_remote_option(self, mock_save: MagicMock): + result = self.plugin.save_dataset( + self.dataset, target="output.zip", with_remote=True + ) + + mock_save.assert_called_once_with( + self.dataset, archive_path="output.zip", with_remote=True + ) + assert result is not None + + @patch("fairspec_dataset.plugins.zip.plugin.save_dataset_to_zip") + def test_with_remote_defaults_to_false(self, mock_save: MagicMock): + self.plugin.save_dataset(self.dataset, target="output.zip") + + mock_save.assert_called_once_with( + self.dataset, archive_path="output.zip", with_remote=False + ) + + @patch("fairspec_dataset.plugins.zip.plugin.save_dataset_to_zip") + def test_handles_paths_with_directories(self, mock_save: MagicMock): + result = self.plugin.save_dataset(self.dataset, target="/path/to/output.zip") + + mock_save.assert_called_once_with( + self.dataset, archive_path="/path/to/output.zip", with_remote=False + ) + assert result is not None + + @patch("fairspec_dataset.plugins.zip.plugin.save_dataset_to_zip") + def test_returns_none_for_no_extension(self, mock_save: MagicMock): + result = self.plugin.save_dataset(self.dataset, target="output") + + mock_save.assert_not_called() + assert result is None + + @patch("fairspec_dataset.plugins.zip.plugin.save_dataset_to_zip") + def test_result_has_path_none(self, mock_save: MagicMock): + result = self.plugin.save_dataset(self.dataset, target="output.zip") + + assert result is not None + assert result.path is None diff --git a/dataset/fairspec_dataset/py.typed b/dataset/fairspec_dataset/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/dataset/pyproject.toml b/dataset/pyproject.toml new file mode 100644 index 0000000..b905b17 --- /dev/null +++ b/dataset/pyproject.toml @@ -0,0 +1,40 @@ +[project] +name = "fairspec-dataset" +version = "0.0.0-dev" +description = "Fairspec Python is a fast data management framework built on top of the Fairspec standard and Polars DataFrames" +readme = "README.md" +requires-python = ">=3.12" +license = "MIT" +keywords = [ + "ckan", + "data", + "dataframe", + "datahub", + "dataset", + "fair", + "fairspec", + "jsonschema", + "metadata", + "polars", + "python", + "quality", + "tableschema", + "validation" +] +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.12", + "Topic :: Software Development :: Libraries :: Python Modules", +] +dependencies = ["fairspec-metadata", "charset-normalizer>=3.0"] + +[[project.authors]] +name = "Evgeny Karev" + +[project.urls] +homepage = "https://github.com/fairspec/fairspec-python" +repository = "https://github.com/fairspec/fairspec-python" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" diff --git a/fairspec/README.md b/fairspec/README.md new file mode 100644 index 0000000..ee38672 --- /dev/null +++ b/fairspec/README.md @@ -0,0 +1,3 @@ +# fairspec + +Fairspec Python is a fast data management framework built on top of the Fairspec standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please read the [project's documentation](https://python.fairspec.org). diff --git a/fairspec/fairspec/__init__.py b/fairspec/fairspec/__init__.py new file mode 100644 index 0000000..126517b --- /dev/null +++ b/fairspec/fairspec/__init__.py @@ -0,0 +1 @@ +from fairspec_library import * # noqa: F403 diff --git a/fairspec/fairspec/main.py b/fairspec/fairspec/main.py new file mode 100644 index 0000000..1324595 --- /dev/null +++ b/fairspec/fairspec/main.py @@ -0,0 +1,6 @@ +from fairspec_terminal.program import program, register_commands + + +def main() -> None: + register_commands() + program() diff --git a/fairspec/pyproject.toml b/fairspec/pyproject.toml new file mode 100644 index 0000000..e983931 --- /dev/null +++ b/fairspec/pyproject.toml @@ -0,0 +1,42 @@ +[project] +name = "fairspec" +version = "0.0.0-dev" +description = "Fairspec Python is a fast data management framework built on top of the Fairspec standard and Polars DataFrames" +readme = "README.md" +requires-python = ">=3.12" +license = "MIT" +keywords = [ + "data", + "dataframe", + "fair", + "fairspec", + "jsonschema", + "polars", + "python", + "quality", + "tableschema", + "validation", +] +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.12", + "Topic :: Software Development :: Libraries :: Python Modules", +] +dependencies = [ + "fairspec-library", + "fairspec-terminal", +] + +[[project.authors]] +name = "Evgeny Karev" + +[project.urls] +homepage = "https://github.com/fairspec/fairspec-python" +repository = "https://github.com/fairspec/fairspec-python" + +[project.scripts] +fairspec = "fairspec.main:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" diff --git a/library/README.md b/library/README.md new file mode 100644 index 0000000..c6f9f79 --- /dev/null +++ b/library/README.md @@ -0,0 +1,3 @@ +# fairspec-library + +Fairspec Python is a fast data management framework built on top of the Fairspec standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please read the [project's documentation](https://python.fairspec.org). diff --git a/library/fairspec_library/__init__.py b/library/fairspec_library/__init__.py new file mode 100644 index 0000000..583808a --- /dev/null +++ b/library/fairspec_library/__init__.py @@ -0,0 +1,29 @@ +from fairspec_dataset import * # noqa: F403 +from fairspec_metadata import * # noqa: F403 +from fairspec_table import * # noqa: F403 + +from .actions.data.load import load_data as load_data +from .actions.data.validate import validate_data as validate_data +from .actions.data_schema.infer import infer_data_schema as infer_data_schema +from .actions.data_schema.render import render_data_schema_as as render_data_schema_as +from .actions.dataset.foreign_key import ( + validate_dataset_foreign_keys as validate_dataset_foreign_keys, +) +from .actions.dataset.infer import infer_dataset as infer_dataset +from .actions.dataset.load import load_dataset as load_dataset +from .actions.dataset.render import render_dataset_as as render_dataset_as +from .actions.dataset.save import save_dataset as save_dataset +from .actions.dataset.validate import validate_dataset as validate_dataset +from .actions.file_dialect.infer import infer_file_dialect as infer_file_dialect +from .actions.resource.infer import infer_resource as infer_resource +from .actions.resource.validate import validate_resource as validate_resource +from .actions.table.infer import infer_table as infer_table +from .actions.table.load import load_table as load_table +from .actions.table.save import save_table as save_table +from .actions.table.validate import validate_table as validate_table +from .actions.table_schema.infer import infer_table_schema as infer_table_schema +from .actions.table_schema.render import render_table_schema_as as render_table_schema_as +from .models.table import ValidateTableOptions as ValidateTableOptions +from .plugin import Plugin as Plugin +from .system import System as System +from .system import system as system diff --git a/library/fairspec_library/actions/__init__.py b/library/fairspec_library/actions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/library/fairspec_library/actions/data/__init__.py b/library/fairspec_library/actions/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/library/fairspec_library/actions/data/load.py b/library/fairspec_library/actions/data/load.py new file mode 100644 index 0000000..55bce3b --- /dev/null +++ b/library/fairspec_library/actions/data/load.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata import ( + get_data_first_path, + get_data_value, + get_supported_file_dialect, + load_descriptor, +) + +if TYPE_CHECKING: + from fairspec_metadata import Resource + + +def load_data(resource: Resource) -> object | None: + data_value = get_data_value(resource) + if data_value: + return data_value + + first_path = get_data_first_path(resource) + if first_path: + dialect = get_supported_file_dialect(resource, ["json"]) + if dialect: + return load_descriptor(first_path) + + return None diff --git a/library/fairspec_library/actions/data/load_spec.py b/library/fairspec_library/actions/data/load_spec.py new file mode 100644 index 0000000..9eb5e52 --- /dev/null +++ b/library/fairspec_library/actions/data/load_spec.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +import json + +from fairspec_dataset import write_temp_file +from fairspec_metadata import Resource + +from .load import load_data + + +class TestLoadData: + def test_should_return_inline_data(self): + resource = Resource(data=[{"id": 1}, {"id": 2}]) + result = load_data(resource) + assert result == [{"id": 1}, {"id": 2}] + + def test_should_return_inline_object(self): + resource = Resource(data={"key": "value"}) + result = load_data(resource) + assert result == {"key": "value"} + + def test_should_load_json_file(self): + data = {"key": "value"} + path = write_temp_file(json.dumps(data), format="json") + resource = Resource(data=path) + result = load_data(resource) + assert result == data + + def test_should_return_none_for_non_json_file(self): + path = write_temp_file("id,name\n1,english", format="csv") + resource = Resource(data=path) + result = load_data(resource) + assert result is None + + def test_should_return_none_for_empty_resource(self): + resource = Resource() + result = load_data(resource) + assert result is None diff --git a/library/fairspec_library/actions/data/validate.py b/library/fairspec_library/actions/data/validate.py new file mode 100644 index 0000000..70e6d0f --- /dev/null +++ b/library/fairspec_library/actions/data/validate.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata import ( + DataError, + FairspecError, + Report, + create_report, + inspect_json, + resolve_data_schema, +) + +from .load import load_data + +if TYPE_CHECKING: + from fairspec_metadata import Resource + + +def validate_data(resource: Resource) -> Report: + errors: list[FairspecError] = [] + + data_schema = resolve_data_schema(resource.dataSchema) + if not data_schema: + return create_report() + + data = load_data(resource) + if data is None: + return create_report() + + notes = inspect_json(data, json_schema=data_schema) + for note in notes: + errors.append( + DataError( + type="data", + message=note["message"], + jsonPointer=note["jsonPointer"], + ) + ) + + return create_report(errors) diff --git a/library/fairspec_library/actions/data/validate_spec.py b/library/fairspec_library/actions/data/validate_spec.py new file mode 100644 index 0000000..fdb3f25 --- /dev/null +++ b/library/fairspec_library/actions/data/validate_spec.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from fairspec_metadata import Resource + +from .validate import validate_data + + +class TestValidateData: + def test_should_return_valid_when_no_schema(self): + resource = Resource(data=[{"id": 1}]) + report = validate_data(resource) + assert report.valid is True + + def test_should_validate_inline_data(self): + resource = Resource( + data={"name": "test", "age": 25}, + dataSchema={ + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + "required": ["name", "age"], + }, + ) + report = validate_data(resource) + assert report.valid is True + + def test_should_detect_invalid_data(self): + resource = Resource( + data={"name": 123}, + dataSchema={ + "type": "object", + "properties": {"name": {"type": "string"}}, + }, + ) + report = validate_data(resource) + assert report.valid is False + assert len(report.errors) > 0 + + def test_should_return_valid_for_no_data(self): + resource = Resource( + dataSchema={"type": "object"}, + ) + report = validate_data(resource) + assert report.valid is True diff --git a/library/fairspec_library/actions/data_schema/__init__.py b/library/fairspec_library/actions/data_schema/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/library/fairspec_library/actions/data_schema/infer.py b/library/fairspec_library/actions/data_schema/infer.py new file mode 100644 index 0000000..9032dbe --- /dev/null +++ b/library/fairspec_library/actions/data_schema/infer.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from genson import SchemaBuilder + +from fairspec_library.actions.data.load import load_data + +if TYPE_CHECKING: + from fairspec_metadata import JsonSchema, Resource + + +def infer_data_schema(resource: Resource) -> JsonSchema | None: + data = load_data(resource) + if not data: + return None + + try: + builder = SchemaBuilder() + builder.add_object(data) + return builder.to_schema() + except Exception: + return None diff --git a/library/fairspec_library/actions/data_schema/infer_spec.py b/library/fairspec_library/actions/data_schema/infer_spec.py new file mode 100644 index 0000000..f86feca --- /dev/null +++ b/library/fairspec_library/actions/data_schema/infer_spec.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import json + +from fairspec_dataset import write_temp_file +from fairspec_metadata import Resource + +from .infer import infer_data_schema + + +class TestInferDataSchema: + def test_should_infer_schema_from_inline_data(self): + resource = Resource(data={"name": "test", "age": 25}) + schema = infer_data_schema(resource) + assert schema is not None + assert schema.get("type") == "object" + + def test_should_infer_schema_from_inline_array(self): + resource = Resource(data=[{"id": 1}, {"id": 2}]) + schema = infer_data_schema(resource) + assert schema is not None + + def test_should_infer_schema_from_json_file(self): + data = {"name": "test", "value": 42} + path = write_temp_file(json.dumps(data), format="json") + resource = Resource(data=path) + schema = infer_data_schema(resource) + assert schema is not None + assert schema.get("type") == "object" + + def test_should_return_none_for_no_data(self): + resource = Resource() + schema = infer_data_schema(resource) + assert schema is None + + def test_should_return_none_for_csv_file(self): + path = write_temp_file("id,name\n1,english", format="csv") + resource = Resource(data=path) + schema = infer_data_schema(resource) + assert schema is None diff --git a/library/fairspec_library/actions/data_schema/render.py b/library/fairspec_library/actions/data_schema/render.py new file mode 100644 index 0000000..c7b9661 --- /dev/null +++ b/library/fairspec_library/actions/data_schema/render.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_library.system import system + +if TYPE_CHECKING: + from fairspec_metadata import DataSchema, RenderDataSchemaOptions + + +def render_data_schema_as( + data_schema: DataSchema, options: RenderDataSchemaOptions +) -> str | None: + for plugin in system.plugins: + result = plugin.render_data_schema_as(data_schema, options) + if result is not None: + return result + + return None diff --git a/library/fairspec_library/actions/dataset/__init__.py b/library/fairspec_library/actions/dataset/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/library/fairspec_library/actions/dataset/foreign_key.py b/library/fairspec_library/actions/dataset/foreign_key.py new file mode 100644 index 0000000..e2434d9 --- /dev/null +++ b/library/fairspec_library/actions/dataset/foreign_key.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack + +import polars as pl +from fairspec_metadata import ( + FairspecError, + ForeignKey, + ForeignKeyError, + Report, + create_report, + resolve_table_schema, +) + +from fairspec_library.actions.table.load import load_table +from fairspec_library.models.table import ValidateTableOptions + +if TYPE_CHECKING: + from fairspec_metadata import Dataset, Resource + + +def validate_dataset_foreign_keys( + dataset: Dataset, **options: Unpack[ValidateTableOptions] +) -> Report: + errors: list[FairspecError] = [] + + for resource in dataset.resources or []: + table_schema = resolve_table_schema(resource.tableSchema) + if not table_schema: + continue + + for foreign_key in table_schema.foreignKeys or []: + fk_errors = _validate_foreign_key( + resource, foreign_key, dataset, **options + ) + errors.extend(fk_errors) + + return create_report(errors) + + +def _validate_foreign_key( + resource: Resource, + foreign_key: ForeignKey, + dataset: Dataset, + **options: Unpack[ValidateTableOptions], +) -> list[ForeignKeyError]: + reference = foreign_key.reference + columns = foreign_key.columns + ref_columns = reference.columns + + if not columns or not ref_columns or len(columns) != len(ref_columns): + return [] + + ref_resource = _find_resource(dataset, reference.resource) + if not ref_resource: + return [] + + table = load_table(resource, denormalized=True, **options) + if table is None: + return [] + + ref_table = load_table(ref_resource, denormalized=True, **options) + if ref_table is None: + return [] + + rename_mapping = dict(zip(ref_columns, columns)) + ref_selected = ref_table.select( + [pl.col(name).alias(rename_mapping[name]) for name in ref_columns] + ).unique() + + violations: pl.DataFrame = table.select(columns).join( # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + ref_selected, on=columns, how="anti" + ).unique().collect() + + errors: list[ForeignKeyError] = [] + for row in violations.to_dicts(): + cells = [str(row[c]) for c in columns] + errors.append( + ForeignKeyError( + type="foreignKey", + resourceName=resource.name, + foreignKey=foreign_key, + cells=cells, + ) + ) + + return errors + + +def _find_resource(dataset: Dataset, name: str | None) -> Resource | None: + if not name: + return None + + for resource in dataset.resources or []: + if resource.name == name: + return resource + + return None diff --git a/library/fairspec_library/actions/dataset/foreign_key_spec.py b/library/fairspec_library/actions/dataset/foreign_key_spec.py new file mode 100644 index 0000000..09c0a2d --- /dev/null +++ b/library/fairspec_library/actions/dataset/foreign_key_spec.py @@ -0,0 +1,114 @@ +from __future__ import annotations + +from fairspec_dataset import write_temp_file +from fairspec_metadata import ( + Dataset, + ForeignKey, + ForeignKeyReference, + IntegerColumnProperty, + Resource, + StringColumnProperty, + TableSchema, +) + +from .foreign_key import validate_dataset_foreign_keys + + +class TestValidateDatasetForeignKeys: + def test_should_validate_valid_foreign_keys(self): + path1 = write_temp_file("id,name\n1,english\n2,中文", format="csv") + path2 = write_temp_file("id,name_id\n1,1\n2,2", format="csv") + dataset = Dataset( + resources=[ + Resource( + data=path1, + name="names", + tableSchema=TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ), + ), + Resource( + data=path2, + name="refs", + tableSchema=TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name_id": IntegerColumnProperty(), + }, + foreignKeys=[ + ForeignKey( + columns=["name_id"], + reference=ForeignKeyReference( + resource="names", + columns=["id"], + ), + ) + ], + ), + ), + ] + ) + report = validate_dataset_foreign_keys(dataset) + assert report.valid is True + + def test_should_detect_foreign_key_violation(self): + path1 = write_temp_file("id,name\n1,english\n2,中文", format="csv") + path2 = write_temp_file("id,name_id\n1,1\n2,999", format="csv") + dataset = Dataset( + resources=[ + Resource( + data=path1, + name="names", + tableSchema=TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ), + ), + Resource( + data=path2, + name="refs", + tableSchema=TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name_id": IntegerColumnProperty(), + }, + foreignKeys=[ + ForeignKey( + columns=["name_id"], + reference=ForeignKeyReference( + resource="names", + columns=["id"], + ), + ) + ], + ), + ), + ] + ) + report = validate_dataset_foreign_keys(dataset) + assert report.valid is False + assert len(report.errors) > 0 + + def test_should_handle_no_foreign_keys(self): + path = write_temp_file("id,name\n1,english", format="csv") + dataset = Dataset( + resources=[ + Resource( + data=path, + name="test", + tableSchema=TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ), + ) + ] + ) + report = validate_dataset_foreign_keys(dataset) + assert report.valid is True diff --git a/library/fairspec_library/actions/dataset/infer.py b/library/fairspec_library/actions/dataset/infer.py new file mode 100644 index 0000000..04eafcf --- /dev/null +++ b/library/fairspec_library/actions/dataset/infer.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from fairspec_metadata import Dataset + +from fairspec_library.actions.resource.infer import infer_resource + + +def infer_dataset(dataset: Dataset) -> Dataset: + dataset = dataset.model_copy(deep=True) + + if dataset.resources: + for index, resource in enumerate(dataset.resources): + dataset.resources[index] = infer_resource( + resource, resource_number=index + 1 + ) + + return dataset diff --git a/library/fairspec_library/actions/dataset/infer_spec.py b/library/fairspec_library/actions/dataset/infer_spec.py new file mode 100644 index 0000000..7cdfcca --- /dev/null +++ b/library/fairspec_library/actions/dataset/infer_spec.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +from fairspec_dataset import write_temp_file +from fairspec_metadata import Dataset, Resource + +from .infer import infer_dataset + + +class TestInferDataset: + def test_should_infer_resource_names(self): + path = write_temp_file("id,name\n1,english", format="csv") + dataset = Dataset(resources=[Resource(data=path)]) + result = infer_dataset(dataset) + assert result.resources is not None + assert len(result.resources) == 1 + assert result.resources[0].name is not None + + def test_should_not_mutate_original(self): + path = write_temp_file("id,name\n1,english", format="csv") + dataset = Dataset(resources=[Resource(data=path)]) + result = infer_dataset(dataset) + assert result is not dataset + + def test_should_handle_empty_resources(self): + dataset = Dataset(resources=[]) + result = infer_dataset(dataset) + assert result.resources == [] + + def test_should_handle_no_resources(self): + dataset = Dataset() + result = infer_dataset(dataset) + assert result.resources is None diff --git a/library/fairspec_library/actions/dataset/load.py b/library/fairspec_library/actions/dataset/load.py new file mode 100644 index 0000000..2bd2983 --- /dev/null +++ b/library/fairspec_library/actions/dataset/load.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_dataset import DatasetPlugin + +from fairspec_library.system import system + +if TYPE_CHECKING: + from fairspec_metadata import Descriptor + + +def load_dataset(source: str) -> Descriptor | None: + for plugin in system.plugins: + if isinstance(plugin, DatasetPlugin): + result = plugin.load_dataset(source) + if result is not None: + return result + + return None diff --git a/library/fairspec_library/actions/dataset/load_spec.py b/library/fairspec_library/actions/dataset/load_spec.py new file mode 100644 index 0000000..f30a7ce --- /dev/null +++ b/library/fairspec_library/actions/dataset/load_spec.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +import json +import os + +from fairspec_dataset import get_temp_folder_path + +from .load import load_dataset + + +class TestLoadDataset: + def test_should_load_dataset_from_descriptor_path(self): + folder = get_temp_folder_path() + descriptor = { + "resources": [{"data": "data.csv", "name": "data"}] + } + path = os.path.join(folder, "datapackage.json") + with open(path, "w") as f: + json.dump(descriptor, f) + result = load_dataset(path) + assert result is not None + assert "resources" in result + + def test_should_return_none_for_unsupported_source(self): + result = load_dataset("nonexistent-source") + assert result is None diff --git a/library/fairspec_library/actions/dataset/render.py b/library/fairspec_library/actions/dataset/render.py new file mode 100644 index 0000000..8bbb283 --- /dev/null +++ b/library/fairspec_library/actions/dataset/render.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_library.system import system + +if TYPE_CHECKING: + from fairspec_metadata import Dataset, RenderDatasetOptions + + +def render_dataset_as(dataset: Dataset, options: RenderDatasetOptions) -> str | None: + for plugin in system.plugins: + result = plugin.render_dataset_as(dataset, options) + if result is not None: + return result + + return None diff --git a/library/fairspec_library/actions/dataset/save.py b/library/fairspec_library/actions/dataset/save.py new file mode 100644 index 0000000..a763950 --- /dev/null +++ b/library/fairspec_library/actions/dataset/save.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from typing import Unpack + +from fairspec_dataset import DatasetPlugin, SaveDatasetOptions, SaveDatasetResult +from fairspec_metadata import Dataset + +from fairspec_library.system import system + + +def save_dataset( + dataset: Dataset, **options: Unpack[SaveDatasetOptions] +) -> SaveDatasetResult | None: + for plugin in system.plugins: + if isinstance(plugin, DatasetPlugin): + result = plugin.save_dataset(dataset, **options) + if result is not None: + return result + + return None diff --git a/library/fairspec_library/actions/dataset/save_spec.py b/library/fairspec_library/actions/dataset/save_spec.py new file mode 100644 index 0000000..21abc26 --- /dev/null +++ b/library/fairspec_library/actions/dataset/save_spec.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from fairspec_dataset import get_temp_file_path +from fairspec_metadata import Dataset, Resource + +from .save import save_dataset + + +class TestSaveDataset: + def test_should_save_dataset_to_zip(self): + path = get_temp_file_path(format="zip") + dataset = Dataset(resources=[Resource(data=[{"id": 1}], name="data")]) + result = save_dataset(dataset, target=path) + assert result is not None + + def test_should_return_none_for_unsupported_target(self): + dataset = Dataset(resources=[Resource(data=[{"id": 1}], name="data")]) + result = save_dataset(dataset, target="/tmp/unknown.xyz") + assert result is None diff --git a/library/fairspec_library/actions/dataset/validate.py b/library/fairspec_library/actions/dataset/validate.py new file mode 100644 index 0000000..9a97020 --- /dev/null +++ b/library/fairspec_library/actions/dataset/validate.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack + +from fairspec_metadata import ( + Dataset, + FairspecException, + Report, + create_report, + infer_resource_name, + load_dataset_descriptor, + normalize_dataset, +) + +from fairspec_library.actions.dataset.foreign_key import validate_dataset_foreign_keys +from fairspec_library.actions.resource.validate import validate_resource +from fairspec_library.models.table import ValidateTableOptions + +if TYPE_CHECKING: + from fairspec_metadata import FairspecError + + +def validate_dataset( + source: Dataset | str, **options: Unpack[ValidateTableOptions] +) -> Report: + if isinstance(source, str): + try: + descriptor = load_dataset_descriptor(source) + source = Dataset.model_validate(descriptor) + except FairspecException as exception: + if exception.report: + return exception.report + return create_report() + + dataset = normalize_dataset(source) + errors = _validate_dataset_resources(dataset, **options) + fk_report = validate_dataset_foreign_keys(dataset, **options) + errors.extend(fk_report.errors) + + return create_report(errors) + + +def _validate_dataset_resources( + dataset: Dataset, **options: Unpack[ValidateTableOptions] +) -> list[FairspecError]: + errors: list[FairspecError] = [] + + for index, resource in enumerate(dataset.resources or []): + if not resource.name: + resource.name = infer_resource_name(resource, resource_number=index + 1) + + report = validate_resource(resource, **options) + for error in report.errors: + error.resourceName = resource.name + errors.extend(report.errors) + + return errors diff --git a/library/fairspec_library/actions/dataset/validate_spec.py b/library/fairspec_library/actions/dataset/validate_spec.py new file mode 100644 index 0000000..15804b1 --- /dev/null +++ b/library/fairspec_library/actions/dataset/validate_spec.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +from fairspec_metadata import ( + Dataset, + IntegerColumnProperty, + Resource, + StringColumnProperty, + TableSchema, +) + +from .validate import validate_dataset + + +class TestValidateDataset: + def test_should_validate_valid_dataset(self): + dataset = Dataset( + resources=[ + Resource( + data=[{"id": 1, "name": "english"}, {"id": 2, "name": "中文"}], + name="test", + tableSchema=TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ), + ) + ] + ) + report = validate_dataset(dataset) + assert report.valid is True + + def test_should_detect_invalid_resource(self): + dataset = Dataset( + resources=[ + Resource( + data=[{"id": "BAD", "name": "english"}], + name="test", + tableSchema=TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ), + ) + ] + ) + report = validate_dataset(dataset) + assert report.valid is False + + def test_should_handle_empty_dataset(self): + dataset = Dataset(resources=[]) + report = validate_dataset(dataset) + assert report.valid is True diff --git a/library/fairspec_library/actions/file_dialect/__init__.py b/library/fairspec_library/actions/file_dialect/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/library/fairspec_library/actions/file_dialect/infer.py b/library/fairspec_library/actions/file_dialect/infer.py new file mode 100644 index 0000000..855b3ed --- /dev/null +++ b/library/fairspec_library/actions/file_dialect/infer.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack + +from fairspec_dataset import DatasetPlugin, InferFileDialectOptions + +from fairspec_library.system import system + +if TYPE_CHECKING: + from fairspec_metadata import FileDialect, Resource + + +def infer_file_dialect( + resource: Resource, **options: Unpack[InferFileDialectOptions] +) -> FileDialect | None: + for plugin in system.plugins: + if isinstance(plugin, DatasetPlugin): + result = plugin.infer_file_dialect(resource, **options) + if result is not None: + return result + + return None diff --git a/library/fairspec_library/actions/file_dialect/infer_spec.py b/library/fairspec_library/actions/file_dialect/infer_spec.py new file mode 100644 index 0000000..a4f15f1 --- /dev/null +++ b/library/fairspec_library/actions/file_dialect/infer_spec.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from fairspec_dataset import write_temp_file +from fairspec_metadata import Resource + +from .infer import infer_file_dialect + + +class TestInferFileDialect: + def test_should_infer_csv_dialect(self): + path = write_temp_file("id,name\n1,english\n2,中文", format="csv") + resource = Resource(data=path) + dialect = infer_file_dialect(resource) + assert dialect is not None + + def test_should_return_none_for_json_file(self): + path = write_temp_file('[{"id": 1}]', format="json") + resource = Resource(data=path) + dialect = infer_file_dialect(resource) + assert dialect is None + + def test_should_infer_xlsx_dialect(self): + resource = Resource(data="test.xlsx") + dialect = infer_file_dialect(resource) + assert dialect is not None + + def test_should_return_none_for_unknown_format(self): + resource = Resource(data="test.unknown") + dialect = infer_file_dialect(resource) + assert dialect is None + + def test_should_return_none_for_no_data(self): + resource = Resource() + dialect = infer_file_dialect(resource) + assert dialect is None diff --git a/library/fairspec_library/actions/resource/__init__.py b/library/fairspec_library/actions/resource/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/library/fairspec_library/actions/resource/infer.py b/library/fairspec_library/actions/resource/infer.py new file mode 100644 index 0000000..b532623 --- /dev/null +++ b/library/fairspec_library/actions/resource/infer.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +from fairspec_metadata import ( + Resource, + infer_resource_name, + resolve_file_dialect, +) +from fairspec_dataset import infer_integrity, infer_textual + +from fairspec_library.actions.data_schema.infer import infer_data_schema +from fairspec_library.actions.file_dialect.infer import infer_file_dialect +from fairspec_library.actions.table_schema.infer import infer_table_schema + + +def infer_resource( + resource: Resource, *, resource_number: int | None = None +) -> Resource: + resource = resource.model_copy(deep=True) + + if not resource.name: + resource.name = infer_resource_name( + resource, resource_number=resource_number + ) + + if not resource.fileDialect: + resource.fileDialect = infer_file_dialect(resource) + + if resource.textual is None: + resolved_dialect = resolve_file_dialect(resource.fileDialect) + if resolved_dialect: + resource.textual = infer_textual(resource) + + if not resource.integrity: + resource.integrity = infer_integrity(resource) + + if not resource.dataSchema: + resource.dataSchema = infer_data_schema(resource) + + if not resource.tableSchema: + resource.tableSchema = infer_table_schema(resource) + + return resource diff --git a/library/fairspec_library/actions/resource/infer_spec.py b/library/fairspec_library/actions/resource/infer_spec.py new file mode 100644 index 0000000..2c7795e --- /dev/null +++ b/library/fairspec_library/actions/resource/infer_spec.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from fairspec_dataset import write_temp_file +from fairspec_metadata import Resource + +from .infer import infer_resource + + +class TestInferResource: + def test_should_infer_name(self): + path = write_temp_file("id,name\n1,english", format="csv") + resource = Resource(data=path) + result = infer_resource(resource) + assert result.name is not None + + def test_should_infer_file_dialect(self): + path = write_temp_file("id,name\n1,english", format="csv") + resource = Resource(data=path) + result = infer_resource(resource) + assert result.fileDialect is not None + + def test_should_not_overwrite_existing_name(self): + path = write_temp_file("id,name\n1,english", format="csv") + resource = Resource(data=path, name="custom") + result = infer_resource(resource) + assert result.name == "custom" + + def test_should_not_mutate_original(self): + path = write_temp_file("id,name\n1,english", format="csv") + resource = Resource(data=path) + result = infer_resource(resource) + assert result is not resource + + def test_should_handle_resource_number(self): + resource = Resource(data=[{"id": 1}]) + result = infer_resource(resource, resource_number=5) + assert result.name == "resource5" diff --git a/library/fairspec_library/actions/resource/validate.py b/library/fairspec_library/actions/resource/validate.py new file mode 100644 index 0000000..235df57 --- /dev/null +++ b/library/fairspec_library/actions/resource/validate.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack + +from fairspec_dataset import validate_file +from fairspec_metadata import Report, create_report + +from fairspec_library.actions.data.validate import validate_data +from fairspec_library.actions.table.validate import validate_table +from fairspec_library.models.table import ValidateTableOptions + +if TYPE_CHECKING: + from fairspec_metadata import FairspecError, Resource + + +def validate_resource( + resource: Resource, **options: Unpack[ValidateTableOptions] +) -> Report: + errors: list[FairspecError] = [] + + file_report = validate_file(resource) + errors.extend(file_report.errors) + if not file_report.valid: + return create_report(errors) + + data_report = validate_data(resource) + errors.extend(data_report.errors) + if not data_report.valid: + return create_report(errors) + + table_report = validate_table(resource, **options) + errors.extend(table_report.errors) + + return create_report(errors) diff --git a/library/fairspec_library/actions/resource/validate_spec.py b/library/fairspec_library/actions/resource/validate_spec.py new file mode 100644 index 0000000..e500ff5 --- /dev/null +++ b/library/fairspec_library/actions/resource/validate_spec.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +from fairspec_dataset import write_temp_file +from fairspec_metadata import IntegerColumnProperty, Resource, StringColumnProperty, TableSchema + +from .validate import validate_resource + + +class TestValidateResource: + def test_should_validate_valid_resource(self): + path = write_temp_file("id,name\n1,english\n2,中文", format="csv") + resource = Resource( + data=path, + tableSchema=TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ), + ) + report = validate_resource(resource) + assert report.valid is True + + def test_should_detect_type_error(self): + path = write_temp_file("id,name\nBAD,english", format="csv") + resource = Resource( + data=path, + tableSchema=TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ), + ) + report = validate_resource(resource) + assert report.valid is False + + def test_should_validate_resource_without_schema(self): + path = write_temp_file("id,name\n1,english", format="csv") + resource = Resource(data=path) + report = validate_resource(resource) + assert report.valid is True diff --git a/library/fairspec_library/actions/table/__init__.py b/library/fairspec_library/actions/table/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/library/fairspec_library/actions/table/infer.py b/library/fairspec_library/actions/table/infer.py new file mode 100644 index 0000000..ae1ba02 --- /dev/null +++ b/library/fairspec_library/actions/table/infer.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack + +from fairspec_metadata import Resource, resolve_table_schema +from fairspec_table import denormalize_table +from fairspec_table.models.table import LoadTableOptions + +from fairspec_library.actions.file_dialect.infer import infer_file_dialect +from fairspec_library.actions.table.load import load_table +from fairspec_library.actions.table_schema.infer import infer_table_schema + +if TYPE_CHECKING: + from fairspec_table import Table + + +def infer_table( + resource: Resource, **options: Unpack[LoadTableOptions] +) -> Table | None: + resource = resource.model_copy(deep=True) + + if not resource.fileDialect: + resource.fileDialect = infer_file_dialect(resource) + + table = load_table(resource, denormalized=True, **options) + if table is None: + return None + + table_schema = resolve_table_schema(resource.tableSchema) + if not table_schema: + table_schema = infer_table_schema(resource, **options) + + if table_schema: + table = denormalize_table(table, table_schema) + + return table diff --git a/library/fairspec_library/actions/table/infer_spec.py b/library/fairspec_library/actions/table/infer_spec.py new file mode 100644 index 0000000..e263faa --- /dev/null +++ b/library/fairspec_library/actions/table/infer_spec.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +import polars as pl +from fairspec_dataset import write_temp_file +from fairspec_metadata import Resource + +from .infer import infer_table + + +class TestInferTable: + def test_should_infer_and_load_table(self): + path = write_temp_file("id,name\n1,english\n2,中文", format="csv") + resource = Resource(data=path) + table = infer_table(resource) + assert table is not None + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(frame) == 2 + + def test_should_return_none_for_empty_resource(self): + resource = Resource() + table = infer_table(resource) + assert table is None diff --git a/library/fairspec_library/actions/table/load.py b/library/fairspec_library/actions/table/load.py new file mode 100644 index 0000000..5a6bb66 --- /dev/null +++ b/library/fairspec_library/actions/table/load.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack + +from fairspec_table import TablePlugin +from fairspec_table.models.table import LoadTableOptions + +from fairspec_library.system import system + +if TYPE_CHECKING: + from fairspec_metadata import Resource + from fairspec_table import Table + + +def load_table( + resource: Resource, **options: Unpack[LoadTableOptions] +) -> Table | None: + for plugin in system.plugins: + if isinstance(plugin, TablePlugin): + result = plugin.load_table(resource, **options) + if result is not None: + return result + + return None diff --git a/library/fairspec_library/actions/table/load_spec.py b/library/fairspec_library/actions/table/load_spec.py new file mode 100644 index 0000000..0576c85 --- /dev/null +++ b/library/fairspec_library/actions/table/load_spec.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +import polars as pl +from fairspec_dataset import write_temp_file +from fairspec_metadata import Resource + +from .load import load_table + + +class TestLoadTable: + def test_should_load_csv_table(self): + path = write_temp_file("id,name\n1,english\n2,中文", format="csv") + resource = Resource(data=path) + table = load_table(resource) + assert table is not None + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + def test_should_load_inline_table(self): + resource = Resource(data=[{"id": 1, "name": "english"}, {"id": 2, "name": "中文"}]) + table = load_table(resource) + assert table is not None + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + def test_should_return_none_for_empty_resource(self): + resource = Resource() + table = load_table(resource) + assert table is None + + def test_should_load_json_table(self): + path = write_temp_file('[{"id": 1, "name": "english"}]', format="json") + resource = Resource(data=path) + table = load_table(resource) + assert table is not None diff --git a/library/fairspec_library/actions/table/save.py b/library/fairspec_library/actions/table/save.py new file mode 100644 index 0000000..9107156 --- /dev/null +++ b/library/fairspec_library/actions/table/save.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +from typing import Unpack + +from fairspec_table import TablePlugin +from fairspec_table.models.table import SaveTableOptions, Table + +from fairspec_library.system import system + + +def save_table(table: Table, **options: Unpack[SaveTableOptions]) -> str | None: + for plugin in system.plugins: + if isinstance(plugin, TablePlugin): + result = plugin.save_table(table, **options) + if result is not None: + return result + + return None diff --git a/library/fairspec_library/actions/table/save_spec.py b/library/fairspec_library/actions/table/save_spec.py new file mode 100644 index 0000000..3df2407 --- /dev/null +++ b/library/fairspec_library/actions/table/save_spec.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +import polars as pl +from fairspec_dataset import get_temp_file_path + +from .save import save_table + + +class TestSaveTable: + def test_should_save_table_to_csv(self): + path = get_temp_file_path(format="csv") + table = pl.DataFrame({"id": [1, 2], "name": ["english", "中文"]}).lazy() + result = save_table(table, path=path) + assert result is not None + + def test_should_save_table_to_json(self): + path = get_temp_file_path(format="json") + table = pl.DataFrame({"id": [1, 2], "name": ["english", "中文"]}).lazy() + result = save_table(table, path=path) + assert result is not None + + def test_should_return_none_for_unknown_format(self): + path = get_temp_file_path(format="unknown") + table = pl.DataFrame({"id": [1, 2]}).lazy() + result = save_table(table, path=path) + assert result is None diff --git a/library/fairspec_library/actions/table/validate.py b/library/fairspec_library/actions/table/validate.py new file mode 100644 index 0000000..bda3790 --- /dev/null +++ b/library/fairspec_library/actions/table/validate.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack + +from fairspec_metadata import Report, create_report, resolve_table_schema +from fairspec_table import inspect_table + +from fairspec_library.actions.file_dialect.infer import infer_file_dialect +from fairspec_library.actions.table.load import load_table +from fairspec_library.actions.table_schema.infer import infer_table_schema +from fairspec_library.models.table import ValidateTableOptions + +if TYPE_CHECKING: + from fairspec_metadata import Resource + + +def validate_table( + resource: Resource, **options: Unpack[ValidateTableOptions] +) -> Report: + resource = resource.model_copy(deep=True) + + if not resource.fileDialect: + resource.fileDialect = infer_file_dialect(resource) + + no_infer = options.get("noInfer", False) + max_errors = options.get("maxErrors", 1000) + + table_schema = resolve_table_schema(resource.tableSchema) + if not table_schema and not no_infer: + table_schema = infer_table_schema(resource, **options) + + table = load_table(resource, denormalized=True, **options) + if table is None: + return create_report() + + errors = inspect_table(table, table_schema=table_schema, max_errors=max_errors) + return create_report(list(errors)) diff --git a/library/fairspec_library/actions/table/validate_spec.py b/library/fairspec_library/actions/table/validate_spec.py new file mode 100644 index 0000000..eec4a08 --- /dev/null +++ b/library/fairspec_library/actions/table/validate_spec.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +from fairspec_dataset import write_temp_file +from fairspec_metadata import IntegerColumnProperty, Resource, StringColumnProperty, TableSchema + +from .validate import validate_table + + +class TestValidateTable: + def test_should_validate_valid_table(self): + path = write_temp_file("id,name\n1,english\n2,中文", format="csv") + resource = Resource( + data=path, + tableSchema=TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ), + ) + report = validate_table(resource) + assert report.valid is True + + def test_should_detect_type_error(self): + path = write_temp_file("id,name\nBAD,english", format="csv") + resource = Resource( + data=path, + tableSchema=TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ), + ) + report = validate_table(resource) + assert report.valid is False + assert len(report.errors) > 0 + + def test_should_validate_without_schema(self): + path = write_temp_file("id,name\n1,english", format="csv") + resource = Resource(data=path) + report = validate_table(resource) + assert report.valid is True diff --git a/library/fairspec_library/actions/table_schema/__init__.py b/library/fairspec_library/actions/table_schema/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/library/fairspec_library/actions/table_schema/infer.py b/library/fairspec_library/actions/table_schema/infer.py new file mode 100644 index 0000000..d7ebde1 --- /dev/null +++ b/library/fairspec_library/actions/table_schema/infer.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack + +from fairspec_table import TablePlugin, infer_table_schema_from_table +from fairspec_table.models.schema import InferTableSchemaOptions + +from fairspec_library.actions.table.load import load_table +from fairspec_library.system import system + +if TYPE_CHECKING: + from fairspec_metadata import Resource, TableSchema + + +def infer_table_schema( + resource: Resource, **options: Unpack[InferTableSchemaOptions] +) -> TableSchema | None: + for plugin in system.plugins: + if isinstance(plugin, TablePlugin): + result = plugin.infer_table_schema(resource, **options) + if result is not None: + return result + + table = load_table(resource, denormalized=True) + if table is None: + return None + + return infer_table_schema_from_table(table, **options) diff --git a/library/fairspec_library/actions/table_schema/infer_spec.py b/library/fairspec_library/actions/table_schema/infer_spec.py new file mode 100644 index 0000000..4541ddf --- /dev/null +++ b/library/fairspec_library/actions/table_schema/infer_spec.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from fairspec_dataset import write_temp_file +from fairspec_metadata import Resource + +from .infer import infer_table_schema + + +class TestInferTableSchema: + def test_should_infer_schema_from_csv(self): + path = write_temp_file("id,name\n1,english\n2,中文", format="csv") + resource = Resource(data=path) + schema = infer_table_schema(resource) + assert schema is not None + assert schema.properties is not None + + def test_should_return_none_for_empty_resource(self): + resource = Resource() + schema = infer_table_schema(resource) + assert schema is None + + def test_should_infer_schema_from_inline_data(self): + resource = Resource(data=[{"id": 1, "name": "english"}, {"id": 2, "name": "中文"}]) + schema = infer_table_schema(resource) + assert schema is not None diff --git a/library/fairspec_library/actions/table_schema/render.py b/library/fairspec_library/actions/table_schema/render.py new file mode 100644 index 0000000..f9d79b2 --- /dev/null +++ b/library/fairspec_library/actions/table_schema/render.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_library.system import system + +if TYPE_CHECKING: + from fairspec_metadata import RenderTableSchemaOptions, TableSchema + + +def render_table_schema_as( + table_schema: TableSchema, options: RenderTableSchemaOptions +) -> str | None: + for plugin in system.plugins: + result = plugin.render_table_schema_as(table_schema, options) + if result is not None: + return result + + return None diff --git a/library/fairspec_library/models/__init__.py b/library/fairspec_library/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/library/fairspec_library/models/table.py b/library/fairspec_library/models/table.py new file mode 100644 index 0000000..e1dadd2 --- /dev/null +++ b/library/fairspec_library/models/table.py @@ -0,0 +1,6 @@ +from fairspec_table.models.table import LoadTableOptions + + +class ValidateTableOptions(LoadTableOptions, total=False): + noInfer: bool + maxErrors: int diff --git a/library/fairspec_library/plugin.py b/library/fairspec_library/plugin.py new file mode 100644 index 0000000..43a2cb0 --- /dev/null +++ b/library/fairspec_library/plugin.py @@ -0,0 +1,3 @@ +from fairspec_table import TablePlugin + +Plugin = TablePlugin diff --git a/library/fairspec_library/py.typed b/library/fairspec_library/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/library/fairspec_library/system.py b/library/fairspec_library/system.py new file mode 100644 index 0000000..4e6126f --- /dev/null +++ b/library/fairspec_library/system.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from fairspec_dataset import ( + CkanPlugin, + DescriptorPlugin, + FolderPlugin, + GithubPlugin, + ZenodoPlugin, + ZipPlugin, +) +from fairspec_metadata import MetadataPlugin +from fairspec_table import ( + ArrowPlugin, + CsvPlugin, + InlinePlugin, + JsonPlugin, + ParquetPlugin, + SqlitePlugin, + XlsxPlugin, +) + + +class System: + plugins: list[MetadataPlugin] + + def __init__(self) -> None: + self.plugins = [] + + def register(self, plugin_class: type[MetadataPlugin]) -> None: + self.plugins.insert(0, plugin_class()) + + +system = System() + +system.register(CkanPlugin) +system.register(DescriptorPlugin) +system.register(GithubPlugin) +system.register(ZenodoPlugin) +system.register(FolderPlugin) +system.register(ZipPlugin) + +system.register(ArrowPlugin) +system.register(CsvPlugin) +system.register(InlinePlugin) +system.register(JsonPlugin) +system.register(ParquetPlugin) +system.register(SqlitePlugin) +system.register(XlsxPlugin) diff --git a/library/pyproject.toml b/library/pyproject.toml new file mode 100644 index 0000000..30126d0 --- /dev/null +++ b/library/pyproject.toml @@ -0,0 +1,43 @@ +[project] +name = "fairspec-library" +version = "0.0.0-dev" +description = "Fairspec Python is a fast data management framework built on top of the Fairspec standard and Polars DataFrames" +readme = "README.md" +requires-python = ">=3.12" +license = "MIT" +keywords = [ + "data", + "dataframe", + "fair", + "fairspec", + "jsonschema", + "library", + "metadata", + "polars", + "python", + "quality", + "tableschema", + "validation", +] +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.12", + "Topic :: Software Development :: Libraries :: Python Modules", +] +dependencies = [ + "fairspec-dataset", + "fairspec-metadata", + "fairspec-table", + "genson>=1.3", +] + +[[project.authors]] +name = "Evgeny Karev" + +[project.urls] +homepage = "https://github.com/fairspec/fairspec-python" +repository = "https://github.com/fairspec/fairspec-python" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" diff --git a/metadata/README.md b/metadata/README.md index bc373fa..ae7b490 100644 --- a/metadata/README.md +++ b/metadata/README.md @@ -1,3 +1,3 @@ # fairspec-metadata -Fairspec Python is a fast data management framework built on top of the Fairspec standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please read the [project's documentation](https://typescript.fairspec.org). +Fairspec Python is a fast data management framework built on top of the Fairspec standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please read the [project's documentation](https://python.fairspec.org). diff --git a/metadata/fairspec_metadata/__init__.py b/metadata/fairspec_metadata/__init__.py index e69de29..5a05558 100644 --- a/metadata/fairspec_metadata/__init__.py +++ b/metadata/fairspec_metadata/__init__.py @@ -0,0 +1,504 @@ +from .actions.column.create import create_column_from_property +from .actions.column.property import ( + get_base_property_type, + get_column_properties, + get_is_nullable_property_type, +) +from .actions.data_schema.assert_ import assert_data_schema +from .actions.data_schema.load import load_data_schema +from .actions.data_schema.resolve import resolve_data_schema +from .actions.data_schema.save import save_data_schema +from .actions.data_schema.validate import ( + DataSchemaValidationResult, + validate_data_schema, +) +from .actions.dataset.assert_ import assert_dataset +from .actions.dataset.denormalize import denormalize_dataset +from .actions.dataset.load import load_dataset_descriptor +from .actions.dataset.normalize import normalize_dataset +from .actions.dataset.save import save_dataset_descriptor +from .actions.dataset.validate import ( + DatasetValidationResult, + validate_dataset_descriptor, +) +from .actions.descriptor.copy import copy_descriptor +from .actions.descriptor.general import get_is_descriptor +from .actions.descriptor.load import load_descriptor +from .actions.descriptor.parse import parse_descriptor +from .actions.descriptor.save import save_descriptor +from .actions.descriptor.stringify import stringify_descriptor +from .actions.descriptor.validate import validate_descriptor +from .actions.file_dialect.assert_ import assert_file_dialect +from .actions.file_dialect.infer import infer_file_dialect_format +from .actions.file_dialect.load import load_file_dialect +from .actions.file_dialect.resolve import resolve_file_dialect +from .actions.file_dialect.save import save_file_dialect +from .actions.file_dialect.support import get_supported_file_dialect +from .actions.file_dialect.validate import ( + FileDialectValidationResult, + validate_file_dialect, +) +from .actions.json.inspect import inspect_json +from .actions.json_schema.assert_ import assert_json_schema +from .actions.json_schema.inspect import inspect_json_schema +from .actions.json_schema.load import load_json_schema +from .actions.json_schema.resolve import resolve_json_schema +from .actions.json_schema.save import save_json_schema +from .actions.path.basepath import get_basepath, resolve_basepath +from .actions.path.denormalize import denormalize_path +from .actions.path.general import ( + get_file_basename, + get_file_extension, + get_file_name, + get_file_name_slug, + get_file_protocol, + get_is_remote_path, +) +from .actions.path.normalize import normalize_path +from .actions.profile.assert_ import assert_profile +from .actions.profile.load import load_profile +from .actions.profile.registry import profile_registry +from .actions.report.create import create_report +from .actions.resource.data import ( + get_data_first_path, + get_data_path, + get_data_paths, + get_data_records, + get_data_value, +) +from .actions.resource.denormalize import denormalize_resource +from .actions.resource.general import get_is_remote_resource +from .actions.resource.infer import infer_resource_name +from .actions.resource.normalize import normalize_resource +from .actions.table_schema.assert_ import assert_table_schema +from .actions.table_schema.column import get_columns +from .actions.table_schema.load import load_table_schema +from .actions.table_schema.resolve import resolve_table_schema +from .actions.table_schema.save import save_table_schema +from .actions.table_schema.validate import ( + TableSchemaValidationResult, + validate_table_schema, +) +from .models.base import FairspecModel +from .models.catalog import Catalog, CatalogDataset +from .models.datacite.alternate_identifier import ( + AlternateIdentifier, + AlternateIdentifiers, +) +from .models.datacite.common import ( + ContributorType, + ContentTypeGeneral, + CreatorNameType, + DateType, + DescriptionType, + FunderIdentifierType, + Latitude, + Longitude, + NumberType, + RelatedIdentifierType, + RelationType, + TitleType, +) +from .models.datacite.content_type import ContentTypes +from .models.datacite.contributor import Contributor, Contributors +from .models.datacite.creator import ( + Creator, + CreatorAffiliation, + CreatorNameIdentifier, + Creators, +) +from .models.datacite.date import DataciteDate, DateValue, Dates +from .models.datacite.description import DataciteDescription, Descriptions +from .models.datacite.formats import Formats +from .models.datacite.funding_reference import FundingReference, FundingReferences +from .models.datacite.geo_location import ( + GeoLocation, + GeoLocationBox, + GeoLocationPoint, + GeoLocationPolygonItem, + GeoLocations, +) +from .models.datacite.identifier import Doi, DoiPrefix, DoiSuffix +from .models.datacite.language import Language +from .models.datacite.publication_year import PublicationYear +from .models.datacite.publisher import Publisher +from .models.datacite.related_identifier import ( + RelatedIdentifier, + RelatedIdentifiers, + RelatedObject, +) +from .models.datacite.related_item import ( + RelatedItem, + RelatedItemIdentifier, + RelatedItems, +) +from .models.datacite.rights import Rights, RightsList +from .models.datacite.size import Sizes +from .models.datacite.subject import Subject, Subjects +from .models.datacite.title import Title, Titles +from .models.datacite.version import Version +from .models.column.array import ArrayColumn, ArrayColumnProperty +from .models.column.base import BaseColumn, BaseColumnProperty, BasePropertyType +from .models.column.base64 import Base64Column, Base64ColumnProperty +from .models.column.boolean import BooleanColumn, BooleanColumnProperty +from .models.column.categorical import ( + CategoricalColumn, + IntegerCategoricalColumnProperty, + IntegerCategoryItem, + StringCategoricalColumnProperty, + StringCategoryItem, +) +from .models.column.column import Column, ColumnProperty, ColumnType +from .models.column.date import DateColumn, DateColumnProperty +from .models.column.date_time import DateTimeColumn, DateTimeColumnProperty +from .models.column.decimal import DecimalColumn, DecimalColumnProperty +from .models.column.duration import DurationColumn, DurationColumnProperty +from .models.column.email import EmailColumn, EmailColumnProperty +from .models.column.geojson import GeojsonColumn, GeojsonColumnProperty +from .models.column.hex import HexColumn, HexColumnProperty +from .models.column.integer import IntegerColumn, IntegerColumnProperty +from .models.column.list import ListColumn, ListColumnProperty +from .models.column.number import NumberColumn, NumberColumnProperty +from .models.column.object import ObjectColumn, ObjectColumnProperty +from .models.column.string import StringColumn, StringColumnProperty +from .models.column.time import TimeColumn, TimeColumnProperty +from .models.column.topojson import TopojsonColumn, TopojsonColumnProperty +from .models.column.unknown import UnknownColumn, UnknownColumnProperty +from .models.column.url import UrlColumn, UrlColumnProperty +from .models.column.wkb import WkbColumn, WkbColumnProperty +from .models.column.wkt import WktColumn, WktColumnProperty +from .models.data import Data, ResourceData, ResourceDataPath, ResourceDataValue +from .models.data_schema import DataSchema, RenderDataSchemaOptions +from .models.datacite.datacite import Datacite +from .models.dataset import ( + ConvertDatasetFromOptions, + ConvertDatasetToOptions, + Dataset, + RenderDatasetOptions, +) +from .models.descriptor import Descriptor +from .models.error.base import BaseError +from .models.error.cell import ( + CellConstError, + CellEnumError, + CellError, + CellExclusiveMaximumError, + CellExclusiveMinimumError, + CellJsonError, + CellMaximumError, + CellMaxItemsError, + CellMaxLengthError, + CellMinimumError, + CellMinItemsError, + CellMinLengthError, + CellMissingError, + CellMultipleOfError, + CellPatternError, + CellTypeError, + CellUniqueError, +) +from .models.error.column import ColumnError, ColumnMissingError, ColumnTypeError +from .models.error.data import DataError +from .models.error.error import FairspecError +from .models.error.file import FileError, IntegrityError, TextualError +from .models.error.foreign_key import ForeignKeyError +from .models.error.metadata import MetadataError +from .models.error.resource import ( + ResourceError, + ResourceMissingError, + ResourceTypeError, +) +from .models.error.row import RowError, RowPrimaryKeyError, RowUniqueKeyError +from .models.error.table import TableError +from .models.exception import FairspecException +from .models.file_dialect.common import RowType +from .models.file_dialect.arrow import ArrowFileDialect +from .models.file_dialect.csv import CsvFileDialect +from .models.file_dialect.file_dialect import FileDialect +from .models.file_dialect.json import JsonFileDialect +from .models.file_dialect.jsonl import JsonlFileDialect +from .models.file_dialect.ods import OdsFileDialect +from .models.file_dialect.parquet import ParquetFileDialect +from .models.file_dialect.sqlite import SqliteFileDialect +from .models.file_dialect.tsv import TsvFileDialect +from .models.file_dialect.unknown import UnknownFileDialect +from .models.file_dialect.xlsx import XlsxFileDialect +from .models.foreign_key import ForeignKey, ForeignKeyReference +from .models.integrity import Integrity, IntegrityType +from .models.json_schema import JsonSchema +from .models.path import ExternalPath, InternalPath, Path +from .models.profile import Profile, ProfileRegistry, ProfileType +from .models.report import Report +from .models.resource import Resource +from .models.table_schema import ( + ConvertTableSchemaFromOptions, + ConvertTableSchemaToOptions, + RenderTableSchemaOptions, + TableSchema, + TableSchemaMissingValueItem, +) +from .models.unique_key import UniqueKey +from .plugin import MetadataPlugin +from .settings import FAIRSPEC_VERSION + +__all__ = [ + "ArrowFileDialect", + "assert_data_schema", + "assert_dataset", + "assert_file_dialect", + "assert_json_schema", + "assert_profile", + "assert_table_schema", + "copy_descriptor", + "create_column_from_property", + "create_report", + "DataSchemaValidationResult", + "DatasetValidationResult", + "denormalize_dataset", + "denormalize_path", + "denormalize_resource", + "FileDialectValidationResult", + "get_base_property_type", + "get_basepath", + "get_column_properties", + "get_columns", + "get_data_first_path", + "get_data_path", + "get_data_paths", + "get_data_records", + "get_data_value", + "get_file_basename", + "get_file_extension", + "get_file_name", + "get_file_name_slug", + "get_file_protocol", + "get_is_descriptor", + "get_is_nullable_property_type", + "get_is_remote_path", + "get_is_remote_resource", + "get_supported_file_dialect", + "infer_file_dialect_format", + "infer_resource_name", + "inspect_json", + "inspect_json_schema", + "load_data_schema", + "load_dataset_descriptor", + "load_descriptor", + "load_file_dialect", + "load_json_schema", + "load_profile", + "load_table_schema", + "normalize_dataset", + "normalize_path", + "normalize_resource", + "parse_descriptor", + "profile_registry", + "resolve_basepath", + "resolve_data_schema", + "resolve_file_dialect", + "resolve_json_schema", + "resolve_table_schema", + "save_data_schema", + "save_dataset_descriptor", + "save_descriptor", + "save_file_dialect", + "save_json_schema", + "save_table_schema", + "stringify_descriptor", + "TableSchemaValidationResult", + "validate_data_schema", + "validate_dataset_descriptor", + "validate_descriptor", + "validate_file_dialect", + "validate_table_schema", + "AlternateIdentifier", + "AlternateIdentifiers", + "ArrayColumn", + "ArrayColumnProperty", + "Base64Column", + "Base64ColumnProperty", + "BaseColumn", + "BaseColumnProperty", + "BaseError", + "BasePropertyType", + "BooleanColumn", + "BooleanColumnProperty", + "Catalog", + "CatalogDataset", + "CategoricalColumn", + "CellConstError", + "CellEnumError", + "CellError", + "CellExclusiveMaximumError", + "CellExclusiveMinimumError", + "CellJsonError", + "CellMaxItemsError", + "CellMaxLengthError", + "CellMaximumError", + "CellMinItemsError", + "CellMinLengthError", + "CellMinimumError", + "CellMissingError", + "CellMultipleOfError", + "CellPatternError", + "CellTypeError", + "CellUniqueError", + "Column", + "ColumnError", + "ContentTypeGeneral", + "ContentTypes", + "Contributor", + "Contributors", + "ContributorType", + "ConvertDatasetFromOptions", + "ConvertDatasetToOptions", + "ConvertTableSchemaFromOptions", + "ConvertTableSchemaToOptions", + "ColumnMissingError", + "ColumnProperty", + "ColumnType", + "ColumnTypeError", + "Creator", + "CreatorAffiliation", + "CreatorNameIdentifier", + "CreatorNameType", + "Creators", + "CsvFileDialect", + "Data", + "DataError", + "DataSchema", + "Datacite", + "DataciteDate", + "DataciteDescription", + "Dataset", + "DateColumn", + "DateColumnProperty", + "DateTimeColumn", + "DateTimeColumnProperty", + "Dates", + "DateType", + "DateValue", + "DecimalColumn", + "DecimalColumnProperty", + "Descriptions", + "DescriptionType", + "Descriptor", + "Doi", + "DoiPrefix", + "DoiSuffix", + "DurationColumn", + "DurationColumnProperty", + "EmailColumn", + "EmailColumnProperty", + "ExternalPath", + "FAIRSPEC_VERSION", + "FairspecError", + "FairspecModel", + "FairspecException", + "FileDialect", + "FileError", + "ForeignKey", + "ForeignKeyError", + "ForeignKeyReference", + "Formats", + "FunderIdentifierType", + "FundingReference", + "FundingReferences", + "GeoLocation", + "GeoLocationBox", + "GeoLocationPoint", + "GeoLocationPolygonItem", + "GeoLocations", + "GeojsonColumn", + "GeojsonColumnProperty", + "HexColumn", + "HexColumnProperty", + "Integrity", + "IntegerCategoricalColumnProperty", + "IntegerCategoryItem", + "IntegerColumn", + "IntegerColumnProperty", + "IntegrityError", + "IntegrityType", + "InternalPath", + "JsonFileDialect", + "JsonSchema", + "JsonlFileDialect", + "Language", + "Latitude", + "ListColumn", + "ListColumnProperty", + "Longitude", + "MetadataError", + "MetadataPlugin", + "NumberColumn", + "NumberColumnProperty", + "NumberType", + "ObjectColumn", + "ObjectColumnProperty", + "OdsFileDialect", + "ParquetFileDialect", + "Path", + "Profile", + "ProfileRegistry", + "ProfileType", + "PublicationYear", + "Publisher", + "RelatedIdentifier", + "RelatedIdentifierType", + "RelatedIdentifiers", + "RelatedItem", + "RelatedItemIdentifier", + "RelatedItems", + "RelatedObject", + "RelationType", + "RenderDataSchemaOptions", + "RenderDatasetOptions", + "RenderTableSchemaOptions", + "Report", + "Resource", + "ResourceData", + "ResourceDataPath", + "ResourceDataValue", + "ResourceError", + "ResourceMissingError", + "ResourceTypeError", + "Rights", + "RightsList", + "RowError", + "RowPrimaryKeyError", + "RowType", + "RowUniqueKeyError", + "Sizes", + "SqliteFileDialect", + "StringCategoricalColumnProperty", + "StringCategoryItem", + "StringColumn", + "StringColumnProperty", + "Subject", + "Subjects", + "TableError", + "TableSchema", + "TableSchemaMissingValueItem", + "TextualError", + "TimeColumn", + "TimeColumnProperty", + "Title", + "Titles", + "TitleType", + "TopojsonColumn", + "TopojsonColumnProperty", + "TsvFileDialect", + "UniqueKey", + "UnknownColumn", + "UnknownColumnProperty", + "UnknownFileDialect", + "UrlColumn", + "UrlColumnProperty", + "Version", + "WkbColumn", + "WkbColumnProperty", + "WktColumn", + "WktColumnProperty", + "XlsxFileDialect", +] diff --git a/metadata/fairspec_metadata/actions/column/create.py b/metadata/fairspec_metadata/actions/column/create.py new file mode 100644 index 0000000..d20c6d6 --- /dev/null +++ b/metadata/fairspec_metadata/actions/column/create.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, cast + +from fairspec_metadata.models.column.array import ArrayColumn, ArrayColumnProperty +from fairspec_metadata.models.column.base import BaseColumn, BaseColumnProperty +from fairspec_metadata.models.column.base64 import Base64Column, Base64ColumnProperty +from fairspec_metadata.models.column.boolean import ( + BooleanColumn, + BooleanColumnProperty, +) +from fairspec_metadata.models.column.categorical import ( + CategoricalColumn, + IntegerCategoricalColumnProperty, + StringCategoricalColumnProperty, +) +from fairspec_metadata.models.column.column import ColumnType +from fairspec_metadata.models.column.date import DateColumn, DateColumnProperty +from fairspec_metadata.models.column.date_time import ( + DateTimeColumn, + DateTimeColumnProperty, +) +from fairspec_metadata.models.column.decimal import DecimalColumn, DecimalColumnProperty +from fairspec_metadata.models.column.duration import ( + DurationColumn, + DurationColumnProperty, +) +from fairspec_metadata.models.column.email import EmailColumn, EmailColumnProperty +from fairspec_metadata.models.column.geojson import GeojsonColumn, GeojsonColumnProperty +from fairspec_metadata.models.column.hex import HexColumn, HexColumnProperty +from fairspec_metadata.models.column.integer import IntegerColumn, IntegerColumnProperty +from fairspec_metadata.models.column.list import ListColumn, ListColumnProperty +from fairspec_metadata.models.column.number import NumberColumn, NumberColumnProperty +from fairspec_metadata.models.column.object import ObjectColumn, ObjectColumnProperty +from fairspec_metadata.models.column.string import StringColumn, StringColumnProperty +from fairspec_metadata.models.column.time import TimeColumn, TimeColumnProperty +from fairspec_metadata.models.column.topojson import ( + TopojsonColumn, + TopojsonColumnProperty, +) +from fairspec_metadata.models.column.unknown import UnknownColumn, UnknownColumnProperty +from fairspec_metadata.models.column.url import UrlColumn, UrlColumnProperty +from fairspec_metadata.models.column.wkb import WkbColumn, WkbColumnProperty +from fairspec_metadata.models.column.wkt import WktColumn, WktColumnProperty + +from .property import get_base_property_type, get_is_nullable_property_type + +if TYPE_CHECKING: + from fairspec_metadata.models.column.column import Column + from fairspec_metadata.models.descriptor import Descriptor + +_COLUMN_CLASS_MAP: dict[ + ColumnType, tuple[type[BaseColumn], type[BaseColumnProperty]] +] = { + ColumnType.array: (ArrayColumn, ArrayColumnProperty), + ColumnType.base64: (Base64Column, Base64ColumnProperty), + ColumnType.boolean: (BooleanColumn, BooleanColumnProperty), + ColumnType.date: (DateColumn, DateColumnProperty), + ColumnType.date_time: (DateTimeColumn, DateTimeColumnProperty), + ColumnType.decimal: (DecimalColumn, DecimalColumnProperty), + ColumnType.duration: (DurationColumn, DurationColumnProperty), + ColumnType.email: (EmailColumn, EmailColumnProperty), + ColumnType.geojson: (GeojsonColumn, GeojsonColumnProperty), + ColumnType.hex: (HexColumn, HexColumnProperty), + ColumnType.integer: (IntegerColumn, IntegerColumnProperty), + ColumnType.list: (ListColumn, ListColumnProperty), + ColumnType.number: (NumberColumn, NumberColumnProperty), + ColumnType.object: (ObjectColumn, ObjectColumnProperty), + ColumnType.string: (StringColumn, StringColumnProperty), + ColumnType.time: (TimeColumn, TimeColumnProperty), + ColumnType.topojson: (TopojsonColumn, TopojsonColumnProperty), + ColumnType.unknown: (UnknownColumn, UnknownColumnProperty), + ColumnType.url: (UrlColumn, UrlColumnProperty), + ColumnType.wkb: (WkbColumn, WkbColumnProperty), + ColumnType.wkt: (WktColumn, WktColumnProperty), +} + +_CATEGORICAL_PROPERTY_MAP: dict[str | None, type[BaseColumnProperty]] = { + "integer": IntegerCategoricalColumnProperty, + "string": StringCategoricalColumnProperty, +} + + +def create_column_from_property(name: str, property: Descriptor) -> Column: + base_type = get_base_property_type(property.get("type")) + format = property.get("format") + nullable = get_is_nullable_property_type(property.get("type")) or None + column_type = _get_column_type(base_type, format) + + if column_type == ColumnType.categorical: + cat_property_cls = _CATEGORICAL_PROPERTY_MAP.get( + base_type, StringCategoricalColumnProperty + ) + property_model = cat_property_cls.model_validate(property) + # Upcast concrete subclass to Column union (type checker can't infer this) + return cast( + "Column", + CategoricalColumn.model_validate( + { + "type": column_type, + "name": name, + "nullable": nullable, + "property": property_model, + } + ), + ) + + column_cls, property_cls = _COLUMN_CLASS_MAP[column_type] + property_model = property_cls.model_validate(property) + # Upcast concrete subclass to Column union (type checker can't infer this) + return cast( + "Column", + column_cls.model_validate( + { + "type": column_type, + "name": name, + "nullable": nullable, + "property": property_model, + } + ), + ) + + +def _get_column_type(base_type: str | None, format: str | None) -> ColumnType: + match base_type: + case "boolean": + return ColumnType.boolean + case "integer": + if format == "categorical": + return ColumnType.categorical + return ColumnType.integer + case "number": + return ColumnType.number + case "string": + match format: + case "categorical": + return ColumnType.categorical + case "decimal": + return ColumnType.decimal + case "list": + return ColumnType.list + case "base64": + return ColumnType.base64 + case "hex": + return ColumnType.hex + case "email": + return ColumnType.email + case "url": + return ColumnType.url + case "date-time": + return ColumnType.date_time + case "date": + return ColumnType.date + case "time": + return ColumnType.time + case "duration": + return ColumnType.duration + case "wkt": + return ColumnType.wkt + case "wkb": + return ColumnType.wkb + case _: + return ColumnType.string + case "array": + return ColumnType.array + case "object": + match format: + case "geojson": + return ColumnType.geojson + case "topojson": + return ColumnType.topojson + case _: + return ColumnType.object + case _: + return ColumnType.unknown diff --git a/metadata/fairspec_metadata/actions/column/create_spec.py b/metadata/fairspec_metadata/actions/column/create_spec.py new file mode 100644 index 0000000..b46ad2a --- /dev/null +++ b/metadata/fairspec_metadata/actions/column/create_spec.py @@ -0,0 +1,83 @@ +from .create import create_column_from_property + + +class TestCreateColumnFromProperty: + def test_creates_string_column(self): + column = create_column_from_property("name", {"type": "string"}) + assert column.type == "string" + assert column.nullable is None + + def test_creates_nullable_column_for_type_null(self): + column = create_column_from_property("name", {"type": ["string", "null"]}) + assert column.type == "string" + assert column.nullable is True + + def test_creates_nullable_column_for_null_type(self): + column = create_column_from_property("name", {"type": ["null", "string"]}) + assert column.type == "string" + assert column.nullable is True + + def test_creates_nullable_date_column(self): + column = create_column_from_property( + "created", {"type": ["string", "null"], "format": "date"} + ) + assert column.type == "date" + assert column.nullable is True + + def test_creates_integer_column(self): + column = create_column_from_property("id", {"type": "integer"}) + assert column.type == "integer" + + def test_creates_boolean_column(self): + column = create_column_from_property("flag", {"type": "boolean"}) + assert column.type == "boolean" + + def test_creates_number_column(self): + column = create_column_from_property("value", {"type": "number"}) + assert column.type == "number" + + def test_creates_array_column(self): + column = create_column_from_property("items", {"type": "array"}) + assert column.type == "array" + + def test_creates_object_column(self): + column = create_column_from_property("meta", {"type": "object"}) + assert column.type == "object" + + def test_creates_geojson_column(self): + column = create_column_from_property( + "geo", {"type": "object", "format": "geojson"} + ) + assert column.type == "geojson" + + def test_creates_topojson_column(self): + column = create_column_from_property( + "topo", {"type": "object", "format": "topojson"} + ) + assert column.type == "topojson" + + def test_creates_categorical_column_from_string(self): + column = create_column_from_property( + "cat", {"type": "string", "format": "categorical"} + ) + assert column.type == "categorical" + + def test_creates_categorical_column_from_integer(self): + column = create_column_from_property( + "cat", {"type": "integer", "format": "categorical"} + ) + assert column.type == "categorical" + + def test_creates_unknown_column_for_none_type(self): + column = create_column_from_property("x", {}) + assert column.type == "unknown" + + def test_creates_email_column(self): + column = create_column_from_property( + "email", {"type": "string", "format": "email"} + ) + assert column.type == "email" + + def test_creates_url_column(self): + column = create_column_from_property("url", {"type": "string", "format": "url"}) + assert column.type == "url" diff --git a/metadata/fairspec_metadata/actions/column/property.py b/metadata/fairspec_metadata/actions/column/property.py new file mode 100644 index 0000000..8f5d8dc --- /dev/null +++ b/metadata/fairspec_metadata/actions/column/property.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from collections.abc import Sequence + +from fairspec_metadata.models.column.column import Column, ColumnProperty + + +def get_base_property_type(type: str | Sequence[str] | None) -> str | None: + if type is None: + return None + if isinstance(type, str): + return type + for t in type: + if t != "null": + return t + return "null" + + +def get_is_nullable_property_type(type: str | Sequence[str] | None) -> bool: + if type is None: + return False + if isinstance(type, str): + return False + return "null" in type + + +def get_column_properties(columns: list[Column]) -> dict[str, ColumnProperty]: + return {column.name: column.property for column in columns} diff --git a/metadata/fairspec_metadata/actions/column/property_spec.py b/metadata/fairspec_metadata/actions/column/property_spec.py new file mode 100644 index 0000000..4e15eef --- /dev/null +++ b/metadata/fairspec_metadata/actions/column/property_spec.py @@ -0,0 +1,29 @@ +from .property import get_base_property_type, get_is_nullable_property_type + + +class TestGetBasePropertyType: + def test_returns_type_for_string(self): + assert get_base_property_type("string") == "string" + + def test_returns_base_type_for_type_null(self): + assert get_base_property_type(["string", "null"]) == "string" + + def test_returns_base_type_for_null_type(self): + assert get_base_property_type(["null", "string"]) == "string" + + def test_returns_none_for_none(self): + assert get_base_property_type(None) is None + + +class TestGetIsNullablePropertyType: + def test_returns_false_for_string(self): + assert get_is_nullable_property_type("string") is False + + def test_returns_true_for_type_null(self): + assert get_is_nullable_property_type(["string", "null"]) is True + + def test_returns_true_for_null_type(self): + assert get_is_nullable_property_type(["null", "string"]) is True + + def test_returns_false_for_none(self): + assert get_is_nullable_property_type(None) is False diff --git a/metadata/fairspec_metadata/actions/data_schema/assert_.py b/metadata/fairspec_metadata/actions/data_schema/assert_.py new file mode 100644 index 0000000..8a12f8e --- /dev/null +++ b/metadata/fairspec_metadata/actions/data_schema/assert_.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from fairspec_metadata.models.data_schema import DataSchema +from fairspec_metadata.models.descriptor import Descriptor +from fairspec_metadata.models.exception import FairspecException + +from .validate import validate_data_schema + + +def assert_data_schema(source: Descriptor) -> DataSchema: + result = validate_data_schema(source) + + if not result.data_schema: + raise FairspecException("Invalid Data Schema", report=result) + + return result.data_schema diff --git a/metadata/fairspec_metadata/actions/data_schema/load.py b/metadata/fairspec_metadata/actions/data_schema/load.py new file mode 100644 index 0000000..c150898 --- /dev/null +++ b/metadata/fairspec_metadata/actions/data_schema/load.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from fairspec_metadata.actions.descriptor.load import load_descriptor +from fairspec_metadata.models.data_schema import DataSchema + +from .assert_ import assert_data_schema + + +def load_data_schema(path: str) -> DataSchema: + descriptor = load_descriptor(path) + return assert_data_schema(descriptor) diff --git a/metadata/fairspec_metadata/actions/data_schema/resolve.py b/metadata/fairspec_metadata/actions/data_schema/resolve.py new file mode 100644 index 0000000..d2cf8aa --- /dev/null +++ b/metadata/fairspec_metadata/actions/data_schema/resolve.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from fairspec_metadata.actions.json_schema.load import load_json_schema +from fairspec_metadata.models.data_schema import DataSchema + + +def resolve_data_schema( + data_schema: DataSchema | str | None = None, +) -> DataSchema | None: + if data_schema is None: + return None + + if not isinstance(data_schema, str): + return data_schema + + return load_json_schema(data_schema) diff --git a/metadata/fairspec_metadata/actions/data_schema/save.py b/metadata/fairspec_metadata/actions/data_schema/save.py new file mode 100644 index 0000000..5e0ec4e --- /dev/null +++ b/metadata/fairspec_metadata/actions/data_schema/save.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from fairspec_metadata.actions.descriptor.copy import copy_descriptor +from fairspec_metadata.actions.descriptor.save import save_descriptor +from fairspec_metadata.models.data_schema import DataSchema +from fairspec_metadata.settings import FAIRSPEC_VERSION + + +def save_data_schema( + data_schema: DataSchema, + *, + path: str, + overwrite: bool = False, +) -> None: + descriptor = copy_descriptor(data_schema) + + if "$schema" not in descriptor: + descriptor["$schema"] = ( + f"https://fairspec.org/profiles/{FAIRSPEC_VERSION}/data-schema.json" + ) + + save_descriptor(descriptor, path=path, overwrite=overwrite) diff --git a/metadata/fairspec_metadata/actions/data_schema/validate.py b/metadata/fairspec_metadata/actions/data_schema/validate.py new file mode 100644 index 0000000..5363306 --- /dev/null +++ b/metadata/fairspec_metadata/actions/data_schema/validate.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from fairspec_metadata.actions.descriptor.load import load_descriptor +from fairspec_metadata.actions.descriptor.validate import validate_descriptor +from fairspec_metadata.actions.profile.load import load_profile +from fairspec_metadata.models.data_schema import DataSchema +from fairspec_metadata.models.descriptor import Descriptor +from fairspec_metadata.models.profile import ProfileType +from fairspec_metadata.models.report import Report + + +class DataSchemaValidationResult(Report): + data_schema: DataSchema | None + + +def validate_data_schema( + source: Descriptor | str, + *, + root_json_pointer: str | None = None, +) -> DataSchemaValidationResult: + descriptor = load_descriptor(source) if isinstance(source, str) else source + + schema = descriptor.get("$schema") + schema_url = ( + schema + if isinstance(schema, str) + else "https://fairspec.org/profiles/latest/data-schema.json" + ) + + profile = load_profile(schema_url, profile_type=ProfileType.data_schema) + + report = validate_descriptor( + descriptor, + profile=profile, + root_json_pointer=root_json_pointer, + ) + + data_schema: DataSchema | None = None + if report.valid: + data_schema = descriptor + + return DataSchemaValidationResult( + valid=report.valid, + errors=report.errors, + data_schema=data_schema, + ) diff --git a/metadata/fairspec_metadata/actions/dataset/assert_.py b/metadata/fairspec_metadata/actions/dataset/assert_.py new file mode 100644 index 0000000..255e96d --- /dev/null +++ b/metadata/fairspec_metadata/actions/dataset/assert_.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from fairspec_metadata.models.dataset import Dataset +from fairspec_metadata.models.descriptor import Descriptor +from fairspec_metadata.models.exception import FairspecException + +from .validate import validate_dataset_descriptor + + +def assert_dataset(source: Descriptor, *, basepath: str | None = None) -> Dataset: + result = validate_dataset_descriptor(source, basepath=basepath) + + if not result.dataset: + raise FairspecException("Invalid Dataset", report=result) + + return result.dataset diff --git a/metadata/fairspec_metadata/actions/dataset/denormalize.py b/metadata/fairspec_metadata/actions/dataset/denormalize.py new file mode 100644 index 0000000..38108a4 --- /dev/null +++ b/metadata/fairspec_metadata/actions/dataset/denormalize.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from fairspec_metadata.actions.resource.denormalize import denormalize_resource +from fairspec_metadata.models.dataset import Dataset + + +def denormalize_dataset(dataset: Dataset, *, basepath: str | None = None) -> Dataset: + result = dataset.model_dump(by_alias=True, exclude_none=True) + + if dataset.resources: + result["resources"] = [ + denormalize_resource(resource, basepath=basepath) + for resource in dataset.resources + ] + + return Dataset(**result) diff --git a/metadata/fairspec_metadata/actions/dataset/fixtures/dataset-invalid.json b/metadata/fairspec_metadata/actions/dataset/fixtures/dataset-invalid.json new file mode 100644 index 0000000..d3c0052 --- /dev/null +++ b/metadata/fairspec_metadata/actions/dataset/fixtures/dataset-invalid.json @@ -0,0 +1,8 @@ +{ + "resources": [ + { + "name": 1, + "data": "table.csv" + } + ] +} diff --git a/metadata/fairspec_metadata/actions/dataset/fixtures/dataset.json b/metadata/fairspec_metadata/actions/dataset/fixtures/dataset.json new file mode 100644 index 0000000..3397391 --- /dev/null +++ b/metadata/fairspec_metadata/actions/dataset/fixtures/dataset.json @@ -0,0 +1,66 @@ +{ + "$schema": "https://fairspec.org/profiles/latest/dataset.json", + "titles": [ + { + "title": "title" + } + ], + "creators": [ + { + "name": "title" + } + ], + "publisher": { + "name": "publisher" + }, + "publicationYear": "2017", + "subjects": [ + { + "subject": "keyword1" + }, + { + "subject": "keyword2" + } + ], + "contributors": [ + { + "name": "title", + "contributorType": "ContactPerson" + } + ], + "dates": [ + { + "date": "2017-01-01", + "dateType": "Created" + } + ], + "version": "1.0", + "rightsList": [ + { + "rights": "MIT" + } + ], + "descriptions": [ + { + "description": "description", + "descriptionType": "Abstract" + } + ], + "relatedIdentifiers": [ + { + "relatedIdentifier": "http://example.com", + "relatedIdentifierType": "URL", + "relationType": "IsDescribedBy" + } + ], + "resources": [ + { + "name": "name", + "data": "table.csv", + "fileDialect": { + "format": "csv" + }, + "tableSchema": "schema.json" + } + ] +} diff --git a/metadata/fairspec_metadata/actions/dataset/fixtures/schema.json b/metadata/fairspec_metadata/actions/dataset/fixtures/schema.json new file mode 100644 index 0000000..55af1b1 --- /dev/null +++ b/metadata/fairspec_metadata/actions/dataset/fixtures/schema.json @@ -0,0 +1,12 @@ +{ + "$schema": "https://fairspec.org/profiles/latest/table-schema.json", + "required": ["id", "name"], + "properties": { + "id": { + "type": "integer" + }, + "name": { + "type": "string" + } + } +} diff --git a/metadata/fairspec_metadata/actions/dataset/fixtures/table.csv b/metadata/fairspec_metadata/actions/dataset/fixtures/table.csv new file mode 100644 index 0000000..84b1ed5 --- /dev/null +++ b/metadata/fairspec_metadata/actions/dataset/fixtures/table.csv @@ -0,0 +1,3 @@ +id,name +1,english +2,中文 diff --git a/metadata/fairspec_metadata/actions/dataset/load.py b/metadata/fairspec_metadata/actions/dataset/load.py new file mode 100644 index 0000000..4b795b0 --- /dev/null +++ b/metadata/fairspec_metadata/actions/dataset/load.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from fairspec_metadata.actions.descriptor.load import load_descriptor +from fairspec_metadata.actions.path.basepath import resolve_basepath +from fairspec_metadata.models.dataset import Dataset + +from .assert_ import assert_dataset + + +def load_dataset_descriptor(path: str) -> Dataset: + basepath = resolve_basepath(path) + descriptor = load_descriptor(path) + return assert_dataset(descriptor, basepath=basepath) diff --git a/metadata/fairspec_metadata/actions/dataset/normalize.py b/metadata/fairspec_metadata/actions/dataset/normalize.py new file mode 100644 index 0000000..b567711 --- /dev/null +++ b/metadata/fairspec_metadata/actions/dataset/normalize.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from fairspec_metadata.actions.resource.normalize import normalize_resource +from fairspec_metadata.models.dataset import Dataset + + +def normalize_dataset(dataset: Dataset, *, basepath: str | None = None) -> Dataset: + result = dataset.model_dump(by_alias=True, exclude_none=True) + + if dataset.resources: + result["resources"] = [ + normalize_resource(resource, basepath=basepath) + for resource in dataset.resources + ] + + return Dataset(**result) diff --git a/metadata/fairspec_metadata/actions/dataset/save.py b/metadata/fairspec_metadata/actions/dataset/save.py new file mode 100644 index 0000000..9624110 --- /dev/null +++ b/metadata/fairspec_metadata/actions/dataset/save.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata.actions.descriptor.save import save_descriptor +from fairspec_metadata.actions.path.basepath import get_basepath +from fairspec_metadata.settings import FAIRSPEC_VERSION + +from .denormalize import denormalize_dataset + +if TYPE_CHECKING: + from fairspec_metadata.models.dataset import Dataset + + +def save_dataset_descriptor( + dataset: Dataset, + *, + path: str, + overwrite: bool = False, +) -> None: + basepath = get_basepath(path) + denormalized = denormalize_dataset(dataset, basepath=basepath) + descriptor = denormalized.model_dump(by_alias=True, exclude_none=True) + + if "$schema" not in descriptor: + descriptor["$schema"] = ( + f"https://fairspec.org/profiles/{FAIRSPEC_VERSION}/dataset.json" + ) + + save_descriptor(descriptor, path=path, overwrite=overwrite) diff --git a/metadata/fairspec_metadata/actions/dataset/save_spec.py b/metadata/fairspec_metadata/actions/dataset/save_spec.py new file mode 100644 index 0000000..8290432 --- /dev/null +++ b/metadata/fairspec_metadata/actions/dataset/save_spec.py @@ -0,0 +1,102 @@ +import json + +import pytest + +from fairspec_metadata.models.datacite.creator import Creator +from fairspec_metadata.models.datacite.title import Title +from fairspec_metadata.models.dataset import Dataset +from fairspec_metadata.models.resource import Resource +from fairspec_metadata.settings import FAIRSPEC_VERSION + +from .save import save_dataset_descriptor + + +class TestSaveDatasetDescriptor: + def test_saves_dataset(self, tmp_path): + path = str(tmp_path / "dataset.json") + dataset = Dataset( + creators=[Creator(name="Test Creator")], + titles=[Title(title="Test Dataset")], + resources=[ + Resource(name="test_resource", data=str(tmp_path / "data.csv")), + ], + ) + save_dataset_descriptor(dataset, path=path) + with open(path, encoding="utf-8") as f: + content = json.load(f) + assert content["$schema"].endswith("dataset.json") + assert content["creators"][0]["name"] == "Test Creator" + assert content["resources"][0]["name"] == "test_resource" + + def test_sets_default_schema(self, tmp_path): + path = str(tmp_path / "dataset.json") + dataset = Dataset( + resources=[ + Resource(data=str(tmp_path / "data.csv")), + ], + ) + save_dataset_descriptor(dataset, path=path) + with open(path, encoding="utf-8") as f: + content = json.load(f) + expected = f"https://fairspec.org/profiles/{FAIRSPEC_VERSION}/dataset.json" + assert content["$schema"] == expected + + def test_preserves_custom_schema(self, tmp_path): + path = str(tmp_path / "dataset.json") + dataset = Dataset( + profile="https://custom.schema.url/dataset.json", + resources=[ + Resource(data=str(tmp_path / "data.csv")), + ], + ) + save_dataset_descriptor(dataset, path=path) + with open(path, encoding="utf-8") as f: + content = json.load(f) + assert content["$schema"] == "https://custom.schema.url/dataset.json" + + def test_throws_when_file_exists(self, tmp_path): + path = str(tmp_path / "dataset.json") + dataset = Dataset(resources=[Resource(data=str(tmp_path / "data.csv"))]) + save_dataset_descriptor(dataset, path=path) + with pytest.raises(FileExistsError): + save_dataset_descriptor(dataset, path=path) + + def test_overwrites_when_flag_set(self, tmp_path): + path = str(tmp_path / "dataset.json") + dataset1 = Dataset( + creators=[Creator(name="Initial")], + resources=[Resource(data=str(tmp_path / "data.csv"))], + ) + dataset2 = Dataset( + creators=[Creator(name="Updated")], + resources=[Resource(data=str(tmp_path / "data.csv"))], + ) + save_dataset_descriptor(dataset1, path=path) + save_dataset_descriptor(dataset2, path=path, overwrite=True) + with open(path, encoding="utf-8") as f: + content = json.load(f) + assert content["creators"][0]["name"] == "Updated" + + def test_saves_to_nested_directory(self, tmp_path): + path = str(tmp_path / "nested" / "dir" / "dataset.json") + dataset = Dataset( + resources=[ + Resource(data=str(tmp_path / "nested" / "dir" / "data.csv")), + ], + ) + save_dataset_descriptor(dataset, path=path) + with open(path, encoding="utf-8") as f: + content = json.load(f) + assert "resources" in content + + def test_denormalizes_resource_paths(self, tmp_path): + path = str(tmp_path / "dataset.json") + dataset = Dataset( + resources=[ + Resource(name="test", data=str(tmp_path / "data.csv")), + ], + ) + save_dataset_descriptor(dataset, path=path) + with open(path, encoding="utf-8") as f: + content = json.load(f) + assert content["resources"][0]["data"] == "data.csv" diff --git a/metadata/fairspec_metadata/actions/dataset/validate.py b/metadata/fairspec_metadata/actions/dataset/validate.py new file mode 100644 index 0000000..4c4e3ef --- /dev/null +++ b/metadata/fairspec_metadata/actions/dataset/validate.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +from fairspec_metadata.actions.data_schema.validate import validate_data_schema +from fairspec_metadata.actions.descriptor.load import load_descriptor +from fairspec_metadata.actions.descriptor.validate import validate_descriptor +from fairspec_metadata.actions.file_dialect.validate import validate_file_dialect +from fairspec_metadata.actions.profile.load import load_profile +from fairspec_metadata.actions.table_schema.validate import validate_table_schema +from fairspec_metadata.models.dataset import Dataset +from fairspec_metadata.models.descriptor import Descriptor +from fairspec_metadata.models.profile import ProfileType +from fairspec_metadata.models.report import Report + +from .normalize import normalize_dataset + + +class DatasetValidationResult(Report): + dataset: Dataset | None = None + + +def validate_dataset_descriptor( + source: Descriptor | str, + *, + basepath: str | None = None, +) -> DatasetValidationResult: + descriptor = load_descriptor(source) if isinstance(source, str) else source + + schema = descriptor.get("$schema") + schema_url = ( + schema + if isinstance(schema, str) + else "https://fairspec.org/profiles/latest/dataset.json" + ) + + profile = load_profile(schema_url, profile_type=ProfileType.dataset) + + report = validate_descriptor(descriptor, profile=profile) + + normalized: Dataset | None = None + if report.valid: + # Valid -> we can cast + normalized = normalize_dataset(Dataset(**descriptor), basepath=basepath) + + if normalized: + for index, resource in enumerate(normalized.resources or []): + root_json_pointer = f"/resources/{index}" + + if isinstance(resource.fileDialect, str): + file_dialect_result = validate_file_dialect( + resource.fileDialect, + root_json_pointer=root_json_pointer, + ) + report.errors.extend(file_dialect_result.errors) + + if isinstance(resource.dataSchema, str): + data_schema_result = validate_data_schema( + resource.dataSchema, + root_json_pointer=root_json_pointer, + ) + report.errors.extend(data_schema_result.errors) + + if isinstance(resource.tableSchema, str): + table_schema_result = validate_table_schema( + resource.tableSchema, + root_json_pointer=root_json_pointer, + ) + report.errors.extend(table_schema_result.errors) + + if report.errors: + normalized = None + report.valid = False + + return DatasetValidationResult( + valid=report.valid, + errors=report.errors, + dataset=normalized, + ) diff --git a/metadata/fairspec_metadata/actions/dataset/validate_spec.py b/metadata/fairspec_metadata/actions/dataset/validate_spec.py new file mode 100644 index 0000000..fb0169d --- /dev/null +++ b/metadata/fairspec_metadata/actions/dataset/validate_spec.py @@ -0,0 +1,30 @@ +from .validate import validate_dataset_descriptor + + +class TestValidateDatasetDescriptor: + def test_valid_dataset(self): + dataset = {"resources": [{"data": "data.csv"}]} + result = validate_dataset_descriptor(dataset) + assert result.valid is True + assert result.errors == [] + + def test_invalid_dataset(self): + dataset = {"resources": "not-an-array"} + result = validate_dataset_descriptor(dataset) + assert result.valid is False + assert len(result.errors) > 0 + + def test_missing_schema_is_valid(self): + dataset = {"resources": [{"data": "data.csv"}]} + result = validate_dataset_descriptor(dataset) + assert result.valid is True + + def test_dataset_with_datacite(self): + dataset = { + "creators": [{"name": "John Doe"}], + "titles": [{"title": "Example Dataset"}], + "resources": [{"data": "data.csv"}], + } + result = validate_dataset_descriptor(dataset) + assert result.valid is True + assert result.errors == [] diff --git a/metadata/fairspec_metadata/actions/descriptor/copy.py b/metadata/fairspec_metadata/actions/descriptor/copy.py new file mode 100644 index 0000000..6bda707 --- /dev/null +++ b/metadata/fairspec_metadata/actions/descriptor/copy.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +import copy + +from fairspec_metadata.models.descriptor import Descriptor + + +def copy_descriptor(descriptor: Descriptor) -> Descriptor: + return copy.deepcopy(descriptor) diff --git a/metadata/fairspec_metadata/actions/descriptor/fixtures/schema.json b/metadata/fairspec_metadata/actions/descriptor/fixtures/schema.json new file mode 100644 index 0000000..ebf1483 --- /dev/null +++ b/metadata/fairspec_metadata/actions/descriptor/fixtures/schema.json @@ -0,0 +1 @@ +{"fields": [{"name": "id", "type": "integer"}, {"name": "name", "type": "string"}]} \ No newline at end of file diff --git a/metadata/fairspec_metadata/actions/descriptor/general.py b/metadata/fairspec_metadata/actions/descriptor/general.py new file mode 100644 index 0000000..95412ac --- /dev/null +++ b/metadata/fairspec_metadata/actions/descriptor/general.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from typing import Any + + +def get_is_descriptor(value: Any) -> bool: + return isinstance(value, dict) diff --git a/metadata/fairspec_metadata/actions/descriptor/load.py b/metadata/fairspec_metadata/actions/descriptor/load.py new file mode 100644 index 0000000..a5eff24 --- /dev/null +++ b/metadata/fairspec_metadata/actions/descriptor/load.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import json +import urllib.request + +from .parse import parse_descriptor +from fairspec_metadata.actions.path.general import ( + get_file_protocol, + get_is_remote_path, +) +from fairspec_metadata.models.descriptor import Descriptor + + +def load_descriptor( + path: str, + *, + only_remote: bool = False, +) -> Descriptor: + is_remote = get_is_remote_path(path) + + if not is_remote and only_remote: + raise Error("Cannot load descriptor for security reasons") + + if is_remote: + return _load_remote_descriptor(path) + return _load_local_descriptor(path) + + +class Error(Exception): + pass + + +def _load_local_descriptor(path: str) -> Descriptor: + with open(path, encoding="utf-8") as file: + text = file.read() + return parse_descriptor(text) + + +def _load_remote_descriptor(path: str) -> Descriptor: + protocol = get_file_protocol(path) + if protocol not in ("http", "https"): + raise Error(f"Unsupported remote protocol: {protocol}") + + with urllib.request.urlopen(path) as response: # noqa: S310 + descriptor: Descriptor = json.loads(response.read()) + + return descriptor diff --git a/metadata/fairspec_metadata/actions/descriptor/load_spec.py b/metadata/fairspec_metadata/actions/descriptor/load_spec.py new file mode 100644 index 0000000..884803a --- /dev/null +++ b/metadata/fairspec_metadata/actions/descriptor/load_spec.py @@ -0,0 +1,40 @@ +import json +import os +from unittest.mock import patch + +import pytest + +from .load import Error, load_descriptor + + +FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures") + + +class TestLoadDescriptor: + def test_load_local_descriptor(self): + path = os.path.join(FIXTURES_DIR, "schema.json") + descriptor = load_descriptor(path) + assert descriptor["fields"][0]["name"] == "id" + assert descriptor["fields"][1]["name"] == "name" + + def test_load_remote_descriptor(self): + expected = {"name": "test"} + response_bytes = json.dumps(expected).encode() + + with patch( + "fairspec_metadata.actions.descriptor.load.urllib.request.urlopen" + ) as mock: + mock.return_value.__enter__ = lambda s: s + mock.return_value.__exit__ = lambda s, *a: None + mock.return_value.read.return_value = response_bytes + descriptor = load_descriptor("https://example.com/test.json") + + assert descriptor == expected + + def test_load_remote_descriptor_bad_protocol(self): + with pytest.raises(Error, match="Unsupported remote protocol: ftp"): + load_descriptor("ftp://example.com/file.json") + + def test_only_remote_rejects_local(self): + with pytest.raises(Error, match="security"): + load_descriptor("local.json", only_remote=True) diff --git a/metadata/fairspec_metadata/actions/descriptor/parse.py b/metadata/fairspec_metadata/actions/descriptor/parse.py new file mode 100644 index 0000000..26d24e7 --- /dev/null +++ b/metadata/fairspec_metadata/actions/descriptor/parse.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +import json + +from fairspec_metadata.models.descriptor import Descriptor + + +class Error(Exception): + pass + + +def parse_descriptor(text: str) -> Descriptor: + value = json.loads(text) + if not isinstance(value, dict): + raise Error(f"Invalid descriptor: {text}") + return value diff --git a/metadata/fairspec_metadata/actions/descriptor/save.py b/metadata/fairspec_metadata/actions/descriptor/save.py new file mode 100644 index 0000000..574c6c9 --- /dev/null +++ b/metadata/fairspec_metadata/actions/descriptor/save.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +import os + +from .stringify import stringify_descriptor +from fairspec_metadata.models.descriptor import Descriptor + + +def save_descriptor( + descriptor: Descriptor, + *, + path: str, + overwrite: bool = False, +) -> None: + text = stringify_descriptor(descriptor) + os.makedirs(os.path.dirname(path) or ".", exist_ok=True) + mode = "w" if overwrite else "x" + with open(path, mode, encoding="utf-8") as file: + file.write(text) diff --git a/metadata/fairspec_metadata/actions/descriptor/save_spec.py b/metadata/fairspec_metadata/actions/descriptor/save_spec.py new file mode 100644 index 0000000..1c8e5f8 --- /dev/null +++ b/metadata/fairspec_metadata/actions/descriptor/save_spec.py @@ -0,0 +1,47 @@ +import json + +import pytest + +from .save import save_descriptor + + +class TestSaveDescriptor: + def test_save_basic(self, tmp_path): + path = str(tmp_path / "output.json") + save_descriptor({"name": "test"}, path=path) + with open(path, encoding="utf-8") as f: + assert json.load(f) == {"name": "test"} + + def test_save_creates_nested_dirs(self, tmp_path): + path = str(tmp_path / "a" / "b" / "output.json") + save_descriptor({"name": "nested"}, path=path) + with open(path, encoding="utf-8") as f: + assert json.load(f) == {"name": "nested"} + + def test_save_uses_two_space_indent(self, tmp_path): + path = str(tmp_path / "output.json") + save_descriptor({"a": 1}, path=path) + with open(path, encoding="utf-8") as f: + text = f.read() + assert ' "a": 1' in text + + def test_save_exclusive_fails_on_existing(self, tmp_path): + path = str(tmp_path / "output.json") + save_descriptor({"a": 1}, path=path) + with pytest.raises(FileExistsError): + save_descriptor({"a": 2}, path=path) + + def test_save_overwrite_succeeds_on_existing(self, tmp_path): + path = str(tmp_path / "output.json") + save_descriptor({"a": 1}, path=path) + save_descriptor({"a": 2}, path=path, overwrite=True) + with open(path, encoding="utf-8") as f: + assert json.load(f) == {"a": 2} + + def test_save_file_content_is_valid_json(self, tmp_path): + path = str(tmp_path / "output.json") + descriptor = {"key": "value", "nested": {"a": [1, 2, 3]}} + save_descriptor(descriptor, path=path) + with open(path, encoding="utf-8") as f: + loaded = json.load(f) + assert loaded == descriptor diff --git a/metadata/fairspec_metadata/actions/descriptor/stringify.py b/metadata/fairspec_metadata/actions/descriptor/stringify.py new file mode 100644 index 0000000..50986e9 --- /dev/null +++ b/metadata/fairspec_metadata/actions/descriptor/stringify.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +import json + +from fairspec_metadata.models.descriptor import Descriptor + + +def stringify_descriptor(descriptor: Descriptor) -> str: + return json.dumps(descriptor, indent=2) diff --git a/metadata/fairspec_metadata/actions/descriptor/validate.py b/metadata/fairspec_metadata/actions/descriptor/validate.py new file mode 100644 index 0000000..7525fb0 --- /dev/null +++ b/metadata/fairspec_metadata/actions/descriptor/validate.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from fairspec_metadata.actions.json.inspect import inspect_json +from fairspec_metadata.actions.report.create import create_report +from fairspec_metadata.models.descriptor import Descriptor +from fairspec_metadata.models.error.metadata import MetadataError +from fairspec_metadata.models.profile import Profile +from fairspec_metadata.models.report import Report + + +def validate_descriptor( + descriptor: Descriptor, + *, + profile: Profile, + root_json_pointer: str | None = None, +) -> Report: + errors = inspect_json( + descriptor, + json_schema=profile, + root_json_pointer=root_json_pointer, + ) + return create_report( + [ + MetadataError( + type="metadata", + message=error["message"], + jsonPointer=error["jsonPointer"], + ) + for error in errors + ] + ) diff --git a/metadata/fairspec_metadata/actions/file_dialect/assert_.py b/metadata/fairspec_metadata/actions/file_dialect/assert_.py new file mode 100644 index 0000000..a5eafcd --- /dev/null +++ b/metadata/fairspec_metadata/actions/file_dialect/assert_.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata.models.descriptor import Descriptor +from fairspec_metadata.models.exception import FairspecException + +from .validate import validate_file_dialect + +if TYPE_CHECKING: + from fairspec_metadata.models.file_dialect.file_dialect import FileDialect + + +def assert_file_dialect(source: Descriptor) -> FileDialect: + result = validate_file_dialect(source) + + if not result.file_dialect: + raise FairspecException("Invalid dialect", report=result) + + return result.file_dialect diff --git a/metadata/fairspec_metadata/actions/file_dialect/infer.py b/metadata/fairspec_metadata/actions/file_dialect/infer.py new file mode 100644 index 0000000..73888f1 --- /dev/null +++ b/metadata/fairspec_metadata/actions/file_dialect/infer.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata.actions.path.general import get_file_extension +from fairspec_metadata.actions.resource.data import get_data_first_path + +if TYPE_CHECKING: + from fairspec_metadata.models.resource import Resource + +_EXTENSION_TO_FORMAT: dict[str, str] = { + "csv": "csv", + "tsv": "tsv", + "json": "json", + "jsonl": "jsonl", + "ndjson": "jsonl", + "xlsx": "xlsx", + "ods": "ods", + "parquet": "parquet", + "arrow": "arrow", + "feather": "arrow", + "sqlite": "sqlite", +} + + +def infer_file_dialect_format(resource: Resource) -> str | None: + path = get_data_first_path(resource) + if not path: + return None + + extension = get_file_extension(path) + if not extension: + return None + + return _EXTENSION_TO_FORMAT.get(extension) diff --git a/metadata/fairspec_metadata/actions/file_dialect/infer_spec.py b/metadata/fairspec_metadata/actions/file_dialect/infer_spec.py new file mode 100644 index 0000000..45435f1 --- /dev/null +++ b/metadata/fairspec_metadata/actions/file_dialect/infer_spec.py @@ -0,0 +1,44 @@ +from fairspec_metadata import Resource + +from .infer import infer_file_dialect_format + + +class TestInferFileDialectFormat: + def test_csv(self): + assert infer_file_dialect_format(Resource(data="table.csv")) == "csv" + + def test_tsv(self): + assert infer_file_dialect_format(Resource(data="table.tsv")) == "tsv" + + def test_json(self): + assert infer_file_dialect_format(Resource(data="table.json")) == "json" + + def test_jsonl(self): + assert infer_file_dialect_format(Resource(data="table.jsonl")) == "jsonl" + + def test_ndjson_maps_to_jsonl(self): + assert infer_file_dialect_format(Resource(data="table.ndjson")) == "jsonl" + + def test_xlsx(self): + assert infer_file_dialect_format(Resource(data="table.xlsx")) == "xlsx" + + def test_ods(self): + assert infer_file_dialect_format(Resource(data="table.ods")) == "ods" + + def test_parquet(self): + assert infer_file_dialect_format(Resource(data="table.parquet")) == "parquet" + + def test_arrow(self): + assert infer_file_dialect_format(Resource(data="table.arrow")) == "arrow" + + def test_feather_maps_to_arrow(self): + assert infer_file_dialect_format(Resource(data="table.feather")) == "arrow" + + def test_sqlite(self): + assert infer_file_dialect_format(Resource(data="table.sqlite")) == "sqlite" + + def test_unknown_extension(self): + assert infer_file_dialect_format(Resource(data="table.xyz")) is None + + def test_no_data(self): + assert infer_file_dialect_format(Resource()) is None diff --git a/metadata/fairspec_metadata/actions/file_dialect/load.py b/metadata/fairspec_metadata/actions/file_dialect/load.py new file mode 100644 index 0000000..9faa669 --- /dev/null +++ b/metadata/fairspec_metadata/actions/file_dialect/load.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata.actions.descriptor.load import load_descriptor + +from .assert_ import assert_file_dialect + +if TYPE_CHECKING: + from fairspec_metadata.models.file_dialect.file_dialect import FileDialect + + +def load_file_dialect(path: str) -> FileDialect: + descriptor = load_descriptor(path) + return assert_file_dialect(descriptor) diff --git a/metadata/fairspec_metadata/actions/file_dialect/resolve.py b/metadata/fairspec_metadata/actions/file_dialect/resolve.py new file mode 100644 index 0000000..0387e17 --- /dev/null +++ b/metadata/fairspec_metadata/actions/file_dialect/resolve.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from .load import load_file_dialect + +if TYPE_CHECKING: + from fairspec_metadata.models.file_dialect.file_dialect import FileDialect + + +def resolve_file_dialect( + file_dialect: FileDialect | str | None = None, +) -> FileDialect | None: + if file_dialect is None: + return None + + if not isinstance(file_dialect, str): + return file_dialect + + return load_file_dialect(file_dialect) diff --git a/metadata/fairspec_metadata/actions/file_dialect/save.py b/metadata/fairspec_metadata/actions/file_dialect/save.py new file mode 100644 index 0000000..fd1dc4d --- /dev/null +++ b/metadata/fairspec_metadata/actions/file_dialect/save.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from fairspec_metadata.actions.descriptor.save import save_descriptor +from fairspec_metadata.models.file_dialect.file_dialect import FileDialect +from fairspec_metadata.settings import FAIRSPEC_VERSION + + +def save_file_dialect( + file_dialect: FileDialect, + *, + path: str, + overwrite: bool = False, +) -> None: + descriptor = file_dialect.model_dump(by_alias=True, exclude_none=True) + + if "$schema" not in descriptor: + descriptor["$schema"] = ( + f"https://fairspec.org/profiles/{FAIRSPEC_VERSION}/file-dialect.json" + ) + + save_descriptor(descriptor, path=path, overwrite=overwrite) diff --git a/metadata/fairspec_metadata/actions/file_dialect/support.py b/metadata/fairspec_metadata/actions/file_dialect/support.py new file mode 100644 index 0000000..5c4194b --- /dev/null +++ b/metadata/fairspec_metadata/actions/file_dialect/support.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from pydantic import TypeAdapter + +from fairspec_metadata.actions.resource.data import get_data_path +from fairspec_metadata.models.file_dialect.file_dialect import FileDialect + +from .infer import infer_file_dialect_format +from .resolve import resolve_file_dialect + +if TYPE_CHECKING: + from fairspec_metadata.models.resource import Resource + +_file_dialect_adapter = TypeAdapter(FileDialect) + + +def get_supported_file_dialect( + resource: Resource, supported_formats: list[str] +) -> FileDialect | None: + data_path = get_data_path(resource) + if not data_path: + return None + + resolved = resolve_file_dialect(resource.fileDialect) + if resolved is None: + format = infer_file_dialect_format(resource) + if format: + resolved = _file_dialect_adapter.validate_python({"format": format}) + + if resolved is None: + return None + + format_value = getattr(resolved, "format", None) + + for supported_format in supported_formats: + if format_value == supported_format: + return resolved + + return None diff --git a/metadata/fairspec_metadata/actions/file_dialect/validate.py b/metadata/fairspec_metadata/actions/file_dialect/validate.py new file mode 100644 index 0000000..65f7f51 --- /dev/null +++ b/metadata/fairspec_metadata/actions/file_dialect/validate.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +from pydantic import TypeAdapter + +from fairspec_metadata.actions.descriptor.load import load_descriptor +from fairspec_metadata.actions.descriptor.validate import validate_descriptor +from fairspec_metadata.actions.profile.load import load_profile +from fairspec_metadata.models.descriptor import Descriptor +from fairspec_metadata.models.file_dialect.file_dialect import FileDialect +from fairspec_metadata.models.profile import ProfileType +from fairspec_metadata.models.report import Report + + +class FileDialectValidationResult(Report): + file_dialect: FileDialect | None + + +def validate_file_dialect( + source: Descriptor | str, + *, + root_json_pointer: str | None = None, +) -> FileDialectValidationResult: + descriptor = load_descriptor(source) if isinstance(source, str) else source + + schema = descriptor.get("$schema") + schema_url = ( + schema + if isinstance(schema, str) + else "https://fairspec.org/profiles/latest/file-dialect.json" + ) + + profile = load_profile(schema_url, profile_type=ProfileType.file_dialect) + + report = validate_descriptor( + descriptor, + profile=profile, + root_json_pointer=root_json_pointer, + ) + + file_dialect: FileDialect | None = None + if report.valid: + # Valid -> we can cast + file_dialect = TypeAdapter(FileDialect).validate_python(descriptor) + + return FileDialectValidationResult( + valid=report.valid, + errors=report.errors, + file_dialect=file_dialect, + ) diff --git a/metadata/fairspec_metadata/actions/json/inspect.py b/metadata/fairspec_metadata/actions/json/inspect.py new file mode 100644 index 0000000..1c2f870 --- /dev/null +++ b/metadata/fairspec_metadata/actions/json/inspect.py @@ -0,0 +1,59 @@ +from __future__ import annotations + +from typing import Any + +import referencing +from jsonschema import Draft202012Validator + +from fairspec_metadata.actions.json_schema.load import load_json_schema +from fairspec_metadata.models.json_schema import JsonSchema + + +def inspect_json( + value: Any, + *, + json_schema: JsonSchema | str, + root_json_pointer: str | None = None, +) -> list[dict[str, str]]: + if isinstance(json_schema, str): + json_schema = load_json_schema(json_schema) + + registry = referencing.Registry(retrieve=_retrieve) # type: ignore[call-arg] + validator = Draft202012Validator( + json_schema, registry=registry, format_checker=None + ) + + errors: list[dict[str, str]] = [] + for error in validator.iter_errors(value): + instance_path = _deque_to_json_pointer(error.absolute_path) + root_path = root_json_pointer or "" + json_pointer = _combine_json_pointers(root_path, instance_path) + + errors.append( + { + "message": error.message, + "jsonPointer": json_pointer, + } + ) + + return errors + + +def _retrieve(uri: str) -> referencing.Resource: + schema = load_json_schema(uri, only_remote=True) + return referencing.Resource.from_contents(schema) + + +def _deque_to_json_pointer(path: object) -> str: + parts = list(path) # type: ignore[arg-type] + if not parts: + return "/" + return "/" + "/".join(str(p) for p in parts) + + +def _combine_json_pointers(root: str, instance: str) -> str: + if root == "" or root == "/": + return instance + if instance == "/": + return root + return root + instance diff --git a/metadata/fairspec_metadata/actions/json/inspect_spec.py b/metadata/fairspec_metadata/actions/json/inspect_spec.py new file mode 100644 index 0000000..e0a9498 --- /dev/null +++ b/metadata/fairspec_metadata/actions/json/inspect_spec.py @@ -0,0 +1,49 @@ +from .inspect import inspect_json + + +SCHEMA = { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "address": { + "type": "object", + "properties": { + "city": {"type": "string"}, + }, + "required": ["city"], + }, + }, + "required": ["name"], +} + + +class TestInspectJson: + def test_valid_value(self): + errors = inspect_json({"name": "test"}, json_schema=SCHEMA) + assert errors == [] + + def test_type_mismatch(self): + errors = inspect_json({"name": 123}, json_schema=SCHEMA) + assert len(errors) == 1 + assert errors[0]["jsonPointer"] == "/name" + + def test_missing_required(self): + errors = inspect_json({}, json_schema=SCHEMA) + assert len(errors) == 1 + assert "name" in errors[0]["message"] + + def test_nested_validation(self): + errors = inspect_json( + {"name": "test", "address": {}}, + json_schema=SCHEMA, + ) + assert len(errors) == 1 + assert errors[0]["jsonPointer"] == "/address" + + def test_multiple_errors(self): + errors = inspect_json( + {"name": 123, "age": "not_a_number"}, + json_schema=SCHEMA, + ) + assert len(errors) == 2 diff --git a/metadata/fairspec_metadata/actions/json_schema/assert_.py b/metadata/fairspec_metadata/actions/json_schema/assert_.py new file mode 100644 index 0000000..09ebd16 --- /dev/null +++ b/metadata/fairspec_metadata/actions/json_schema/assert_.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +import json + +from .inspect import inspect_json_schema +from fairspec_metadata.models.descriptor import Descriptor +from fairspec_metadata.models.json_schema import JsonSchema + + +class Error(Exception): + pass + + +def assert_json_schema(descriptor: Descriptor) -> JsonSchema: + errors = inspect_json_schema(descriptor) + + if errors: + preview = json.dumps(descriptor)[:100] + raise Error(f'JsonSchema "{preview}" is not valid') + + return descriptor diff --git a/metadata/fairspec_metadata/actions/json_schema/inspect.py b/metadata/fairspec_metadata/actions/json_schema/inspect.py new file mode 100644 index 0000000..c873971 --- /dev/null +++ b/metadata/fairspec_metadata/actions/json_schema/inspect.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +from jsonschema import Draft202012Validator + +from fairspec_metadata.models.descriptor import Descriptor + + +def inspect_json_schema( + descriptor: Descriptor, + *, + root_json_pointer: str | None = None, +) -> list[dict[str, str]]: + errors: list[dict[str, str]] = [] + + validator = Draft202012Validator(Draft202012Validator.META_SCHEMA) + for error in validator.iter_errors(descriptor): + instance_path = _deque_to_json_pointer(error.absolute_path) + root_path = root_json_pointer or "" + json_pointer = _combine_json_pointers(root_path, instance_path) + + errors.append( + { + "message": error.message, + "jsonPointer": json_pointer, + } + ) + + return errors + + +def _deque_to_json_pointer(path: object) -> str: + parts = list(path) # type: ignore[arg-type] + if not parts: + return "/" + return "/" + "/".join(str(p) for p in parts) + + +def _combine_json_pointers(root: str, instance: str) -> str: + if root == "" or root == "/": + return instance + if instance == "/": + return root + return root + instance diff --git a/metadata/fairspec_metadata/actions/json_schema/inspect_spec.py b/metadata/fairspec_metadata/actions/json_schema/inspect_spec.py new file mode 100644 index 0000000..27322e8 --- /dev/null +++ b/metadata/fairspec_metadata/actions/json_schema/inspect_spec.py @@ -0,0 +1,68 @@ +from .inspect import inspect_json_schema + + +class TestInspectJsonSchemaValid: + def test_valid_empty_object_schema(self): + errors = inspect_json_schema({"type": "object"}) + assert errors == [] + + def test_valid_string_schema(self): + errors = inspect_json_schema({"type": "string"}) + assert errors == [] + + def test_valid_schema_with_properties(self): + errors = inspect_json_schema( + { + "type": "object", + "properties": {"name": {"type": "string"}}, + } + ) + assert errors == [] + + def test_valid_schema_with_required(self): + errors = inspect_json_schema( + { + "type": "object", + "required": ["name"], + "properties": {"name": {"type": "string"}}, + } + ) + assert errors == [] + + def test_valid_array_schema(self): + errors = inspect_json_schema( + { + "type": "array", + "items": {"type": "integer"}, + } + ) + assert errors == [] + + def test_valid_draft_2020_12_schema(self): + errors = inspect_json_schema( + { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + } + ) + assert errors == [] + + +class TestInspectJsonSchemaInvalid: + def test_invalid_type_value(self): + errors = inspect_json_schema({"type": "invalid"}) + assert len(errors) > 0 + assert errors[0]["jsonPointer"] == "/type" + + def test_invalid_required_not_array(self): + errors = inspect_json_schema({"required": "name"}) + assert len(errors) > 0 + + def test_invalid_properties_not_object(self): + errors = inspect_json_schema({"properties": "invalid"}) + assert len(errors) > 0 + + def test_root_json_pointer_combined(self): + errors = inspect_json_schema({"type": "invalid"}, root_json_pointer="/root") + assert len(errors) > 0 + assert errors[0]["jsonPointer"] == "/root/type" diff --git a/metadata/fairspec_metadata/actions/json_schema/load.py b/metadata/fairspec_metadata/actions/json_schema/load.py new file mode 100644 index 0000000..6de6188 --- /dev/null +++ b/metadata/fairspec_metadata/actions/json_schema/load.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from functools import lru_cache + +from fairspec_metadata.actions.descriptor.load import load_descriptor +from .assert_ import assert_json_schema +from fairspec_metadata.models.json_schema import JsonSchema + + +@lru_cache(maxsize=100) +def load_json_schema( + path: str, + *, + only_remote: bool = False, +) -> JsonSchema: + from fairspec_metadata.actions.profile.registry import profile_registry + + for item in profile_registry: + if item.path == path: + return item.profile + + descriptor = load_descriptor(path, only_remote=only_remote) + return assert_json_schema(descriptor) diff --git a/metadata/fairspec_metadata/actions/json_schema/resolve.py b/metadata/fairspec_metadata/actions/json_schema/resolve.py new file mode 100644 index 0000000..c6f48cd --- /dev/null +++ b/metadata/fairspec_metadata/actions/json_schema/resolve.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from .load import load_json_schema +from fairspec_metadata.models.json_schema import JsonSchema + + +def resolve_json_schema( + json_schema: JsonSchema | str | None = None, +) -> JsonSchema | None: + if json_schema is None: + return None + + if isinstance(json_schema, str): + return load_json_schema(json_schema) + + return json_schema diff --git a/metadata/fairspec_metadata/actions/json_schema/save.py b/metadata/fairspec_metadata/actions/json_schema/save.py new file mode 100644 index 0000000..76c593f --- /dev/null +++ b/metadata/fairspec_metadata/actions/json_schema/save.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from fairspec_metadata.actions.descriptor.save import save_descriptor +from fairspec_metadata.models.json_schema import JsonSchema + + +def save_json_schema( + json_schema: JsonSchema, + *, + path: str, + overwrite: bool = False, +) -> None: + save_descriptor(json_schema, path=path, overwrite=overwrite) diff --git a/metadata/fairspec_metadata/actions/path/basepath.py b/metadata/fairspec_metadata/actions/path/basepath.py new file mode 100644 index 0000000..5d0708b --- /dev/null +++ b/metadata/fairspec_metadata/actions/path/basepath.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +import os +import urllib.parse +import urllib.request + +from .general import get_is_remote_path, safe_relpath + + +def get_basepath(path: str) -> str: + if get_is_remote_path(path): + parsed = urllib.parse.urlparse(path) + url_path = parsed.path or "/" + url = urllib.parse.urlunparse( + ( + parsed.scheme, + parsed.netloc, + url_path, + parsed.params, + parsed.query, + parsed.fragment, + ) + ) + return "/".join(url.split("/")[:-1]) + + resolved = os.path.abspath(path) + parent = os.path.dirname(resolved) + rel = safe_relpath(parent) + return "" if rel == "." else rel + + +def resolve_basepath(path: str) -> str: + if get_is_remote_path(path): + request = urllib.request.Request(path, method="HEAD") + with urllib.request.urlopen(request) as response: + path = response.url + return get_basepath(path) diff --git a/metadata/fairspec_metadata/actions/path/basepath_spec.py b/metadata/fairspec_metadata/actions/path/basepath_spec.py new file mode 100644 index 0000000..98c8146 --- /dev/null +++ b/metadata/fairspec_metadata/actions/path/basepath_spec.py @@ -0,0 +1,59 @@ +import os + +import pytest + +from .basepath import get_basepath + + +class TestGetBasepath: + @pytest.mark.parametrize( + "path, expected", + [ + ( + "http://example.com/path/to/file.txt", + "http://example.com/path/to", + ), + ( + "https://example.com/path/to/file.txt", + "https://example.com/path/to", + ), + ( + "https://example.com/path/to/file.txt?query=param", + "https://example.com/path/to", + ), + ( + "https://example.com/path/to/file.txt#section", + "https://example.com/path/to", + ), + ( + "https://example.com/path/to/", + "https://example.com/path/to", + ), + ( + "https://example.com", + "https://example.com", + ), + ( + "some/path/to/file.txt", + os.path.join("some", "path", "to"), + ), + ( + "some/path/to/", + os.path.join("some", "path"), + ), + ("file.txt", ""), + ], + ids=[ + "http URL with file", + "https URL with file", + "URL with query parameters", + "URL with hash", + "URL with no file", + "URL with only domain", + "local file path", + "local path with no file", + "root level file", + ], + ) + def test_get_basepath(self, path: str, expected: str): + assert get_basepath(path) == expected diff --git a/metadata/fairspec_metadata/actions/path/denormalize.py b/metadata/fairspec_metadata/actions/path/denormalize.py new file mode 100644 index 0000000..0dbfffa --- /dev/null +++ b/metadata/fairspec_metadata/actions/path/denormalize.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +import os +import urllib.parse + +from .general import get_is_remote_path + + +def denormalize_path(path: str, *, basepath: str | None = None) -> str: + is_path_remote = get_is_remote_path(path) + is_basepath_remote = get_is_remote_path(basepath or "") + + if is_path_remote: + parsed = urllib.parse.urlparse(path) + return urllib.parse.urlunparse( + ( + parsed.scheme, + parsed.netloc, + parsed.path, + parsed.params, + parsed.query, + parsed.fragment, + ) + ) + + if is_basepath_remote: + normalized_basepath = urllib.parse.urlparse(basepath or "").geturl() + if not path.startswith(normalized_basepath): + raise Error(path, basepath) + return path.removeprefix(f"{normalized_basepath}/") + + normalized_path = os.path.abspath(path) + normalized_basepath = os.path.abspath(basepath or "") + if not normalized_path.startswith(normalized_basepath): + raise Error(path, basepath) + + relative = os.path.relpath(normalized_path, normalized_basepath) + return relative.replace(os.sep, "/") + + +class Error(Exception): + def __init__(self, path: str, basepath: str | None): + super().__init__(f"Path {path} is not a subpath of {basepath}") diff --git a/metadata/fairspec_metadata/actions/path/denormalize_spec.py b/metadata/fairspec_metadata/actions/path/denormalize_spec.py new file mode 100644 index 0000000..2475880 --- /dev/null +++ b/metadata/fairspec_metadata/actions/path/denormalize_spec.py @@ -0,0 +1,56 @@ +import pytest + +from .denormalize import denormalize_path + + +class TestDenormalizePath: + @pytest.mark.parametrize( + "path, basepath, expected", + [ + ( + "http://example.com/path/to/file.txt", + None, + "http://example.com/path/to/file.txt", + ), + ( + "http://example.com/path/to/file.txt", + "data", + "http://example.com/path/to/file.txt", + ), + ( + "/tmp/data/file.csv", + "/tmp", + "data/file.csv", + ), + ( + "/tmp/file.csv", + "/tmp", + "file.csv", + ), + ( + "/tmp/data/nested/deep/file.csv", + "/tmp/data/nested", + "deep/file.csv", + ), + ( + "/home/user/projects/data/file.csv", + "/home/user/projects", + "data/file.csv", + ), + ], + ids=[ + "remote URL without basepath", + "remote URL with basepath", + "local file in subfolder", + "local file in direct child folder", + "local file with deeply nested basepath", + "local file with multi-level basepath", + ], + ) + def test_denormalize_path( + self, + path: str, + basepath: str | None, + expected: str, + ): + assert denormalize_path(path, basepath=basepath) == expected diff --git a/metadata/fairspec_metadata/actions/path/general.py b/metadata/fairspec_metadata/actions/path/general.py new file mode 100644 index 0000000..8975ef4 --- /dev/null +++ b/metadata/fairspec_metadata/actions/path/general.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +import os +import re +import urllib.parse + + +def safe_relpath(path: str, start: str | None = None) -> str: + try: + return os.path.relpath(path) if start is None else os.path.relpath(path, start) + except ValueError: + return os.path.abspath(path) + + +def get_file_protocol(path: str) -> str: + try: + parsed = urllib.parse.urlparse(path) + protocol = parsed.scheme + if len(protocol) < 2: + return "file" + return protocol + except Exception: + return "file" + + +def get_is_remote_path(path: str) -> bool: + return get_file_protocol(path) != "file" + + +def get_file_name(path: str) -> str | None: + if get_is_remote_path(path): + pathname = urllib.parse.urlparse(path).path + file_name = pathname.split("/")[-1] + return file_name if file_name and "." in file_name else None + + resolved = os.path.abspath(path) + file_name = os.path.basename(resolved) + return file_name if file_name and "." in file_name else None + + +def get_file_extension(path: str) -> str | None: + file_name = get_file_name(path) + if not file_name: + return None + extension = file_name.split(".")[-1] + if file_name == f".{extension}": + return None + return extension + + +def get_file_basename(path: str) -> str | None: + file_name = get_file_name(path) + extension = get_file_extension(path) + if extension and file_name: + return file_name.replace(f".{extension}", "", 1) + return file_name + + +def get_file_name_slug(path: str) -> str | None: + basename = get_file_basename(path) + if not basename: + return None + return _slugify(basename) + + +def _slugify(text: str) -> str: + text = re.sub(r"([a-z])([A-Z])", r"\1_\2", text) + text = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", text) + slug = re.sub(r"[^a-z0-9]+", "_", text.lower()).strip("_") + slug = re.sub(r"_+", "_", slug) + return re.sub(r"[^a-zA-Z0-9_]", "", slug) diff --git a/metadata/fairspec_metadata/actions/path/general_spec.py b/metadata/fairspec_metadata/actions/path/general_spec.py new file mode 100644 index 0000000..968e9ad --- /dev/null +++ b/metadata/fairspec_metadata/actions/path/general_spec.py @@ -0,0 +1,167 @@ +import os + +import pytest + +from .general import ( + get_file_extension, + get_file_name, + get_file_name_slug, + get_is_remote_path, +) + + +class TestGetIsRemotePath: + @pytest.mark.parametrize( + "path, expected", + [ + ("http://example.com/path/to/file.txt", True), + ("https://example.com/path/to/file.txt", True), + ("ftp://example.com/path/to/file.txt", True), + ("file:///path/to/file.txt", False), + ("/path/to/file.txt", False), + ("path/to/file.txt", False), + ("./file.txt", False), + ("../file.txt", False), + ("", False), + ("http:example.com", True), + ], + ids=[ + "http URL", + "https URL", + "ftp URL", + "file URL", + "absolute path", + "relative path", + "current directory path", + "parent directory path", + "empty string", + "protocol without slashes", + ], + ) + def test_get_is_remote_path(self, path: str, expected: bool): + assert get_is_remote_path(path) == expected + + +class TestGetFileName: + @pytest.mark.parametrize( + "path, expected", + [ + ("file.txt", "file.txt"), + ("some/path/to/file.txt", "file.txt"), + ( + "http://example.com/path/to/file.txt", + "file.txt", + ), + ( + "https://example.com/path/to/file.txt", + "file.txt", + ), + ( + "https://example.com/path/to/file.txt?query=param", + "file.txt", + ), + ( + "https://example.com/path/to/file.txt#section", + "file.txt", + ), + ( + "https://example.com/path/to/file.txt?query=param#section", + "file.txt", + ), + ("https://example.com/path/", None), + (f"some{os.sep}path{os.sep}", None), + ], + ids=[ + "simple filename", + "directory path with filename", + "remote HTTP URL", + "remote HTTPS URL", + "URL with query parameters", + "URL with hash", + "URL with query and hash", + "URL with no filename", + "local path with no filename", + ], + ) + def test_get_file_name(self, path: str, expected: str | None): + assert get_file_name(path) == expected + + +class TestGetFileExtension: + def test_infers_format_from_single_string_path(self): + assert get_file_extension("/data/users.csv") == "csv" + + def test_infers_format_from_url_path(self): + assert get_file_extension("https://example.com/data/products.json") == "json" + + def test_preserve_extension_case(self): + assert get_file_extension("/data/file.CSV") == "CSV" + + def test_returns_format_name_even_for_unsupported_extensions( + self, + ): + assert get_file_extension("/data/file.tar.gz") == "gz" + + def test_returns_none_when_path_has_no_extension(self): + assert get_file_extension("/data/file") is None + + def test_returns_none_when_filename_cannot_be_determined( + self, + ): + assert get_file_extension("/data/folder/") is None + + def test_handles_multiple_extensions(self): + assert get_file_extension("/data/file.backup.csv") == "csv" + + def test_handles_hidden_files_with_extension(self): + assert get_file_extension("/data/.gitignore") is None + + def test_handles_url_with_query_parameters(self): + assert get_file_extension("https://example.com/file.json?key=value") == "json" + + def test_handles_url_with_hash(self): + assert get_file_extension("https://example.com/file.pdf#page=1") == "pdf" + + +class TestGetFileNameSlug: + def test_returns_slugified_basename_from_single_string_path( + self, + ): + assert get_file_name_slug("/data/users.csv") == "users" + + def test_returns_slugified_basename_from_url_path(self): + assert ( + get_file_name_slug("https://example.com/data/products.json") == "products" + ) + + def test_returns_none_when_path_has_no_filename(self): + assert get_file_name_slug("/data/folder/") is None + + def test_handles_complex_filename_with_multiple_dots(self): + assert get_file_name_slug("/data/file.backup.csv") == "file_backup" + + def test_slugifies_filename_with_spaces_and_special_characters( + self, + ): + assert get_file_name_slug("/data/My Data File!.csv") == "my_data_file" + + def test_returns_none_for_empty_string(self): + assert get_file_name_slug("") is None + + def test_handles_simple_filename_without_directory(self): + assert get_file_name_slug("document.txt") == "document" + + def test_handles_url_with_query_parameters(self): + assert get_file_name_slug("https://example.com/file.json?key=value") == "file" + + def test_handles_url_with_hash(self): + assert get_file_name_slug("https://example.com/report.pdf#page=1") == "report" + + def test_handles_hidden_files(self): + assert get_file_name_slug("/data/.gitignore") == "gitignore" + + def test_slugifies_uppercase_letters_to_lowercase(self): + assert get_file_name_slug("/data/MyDocument.PDF") == "my_document" + + def test_replaces_hyphens_with_underscores(self): + assert get_file_name_slug("/data/my-file-name.csv") == "my_file_name" diff --git a/metadata/fairspec_metadata/actions/path/normalize.py b/metadata/fairspec_metadata/actions/path/normalize.py new file mode 100644 index 0000000..619efb6 --- /dev/null +++ b/metadata/fairspec_metadata/actions/path/normalize.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +import os +import posixpath +import urllib.parse + +from .general import get_is_remote_path, safe_relpath + + +def normalize_path(path: str, *, basepath: str | None = None) -> str: + is_path_remote = get_is_remote_path(path) + is_basepath_remote = get_is_remote_path(basepath or "") + + if is_path_remote: + parsed = urllib.parse.urlparse(path) + return urllib.parse.urlunparse( + ( + parsed.scheme, + parsed.netloc, + parsed.path, + parsed.params, + parsed.query, + parsed.fragment, + ) + ) + + if is_basepath_remote: + joined = f"{basepath}/{path}" + parsed = urllib.parse.urlparse(joined) + normalized_url_path = posixpath.normpath(parsed.path) + normalized_url = urllib.parse.urlunparse( + ( + parsed.scheme, + parsed.netloc, + normalized_url_path, + parsed.params, + parsed.query, + parsed.fragment, + ) + ) + if not normalized_url.startswith(basepath or ""): + raise Error(path, basepath) + return normalized_url + + normalized_path = os.path.join(basepath, path) if basepath else path + relative_path = os.path.relpath(normalized_path, basepath or "") + if relative_path.startswith(".."): + raise Error(path, basepath) + + return safe_relpath(os.path.abspath(normalized_path)) + + +class Error(Exception): + def __init__(self, path: str, basepath: str | None): + super().__init__(f"Path {path} is not a subpath of {basepath}") diff --git a/metadata/fairspec_metadata/actions/path/normalize_spec.py b/metadata/fairspec_metadata/actions/path/normalize_spec.py new file mode 100644 index 0000000..018839c --- /dev/null +++ b/metadata/fairspec_metadata/actions/path/normalize_spec.py @@ -0,0 +1,76 @@ +import os + +import pytest + +from .normalize import normalize_path + + +class TestNormalizePath: + @pytest.mark.parametrize( + "path, basepath, expected", + [ + ( + "path/to/file.txt", + None, + os.path.join("path", "to", "file.txt"), + ), + ( + "file.txt", + "path/to", + os.path.join("path", "to", "file.txt"), + ), + ( + "http://example.com/path/to/file.txt", + None, + "http://example.com/path/to/file.txt", + ), + ( + "http://example.com/path/to/file.txt?query=param", + None, + "http://example.com/path/to/file.txt?query=param", + ), + ( + "path/to/file.txt", + "http://example.com", + "http://example.com/path/to/file.txt", + ), + ( + "file.txt", + "/absolute/path", + os.path.relpath("/absolute/path/file.txt"), + ), + ( + "path/to/file.txt", + "", + os.path.join("path", "to", "file.txt"), + ), + ], + ids=[ + "local path without basepath", + "local path with local basepath", + "remote path", + "remote path with query string", + "local path with remote basepath", + "local path with absolute basepath", + "path with empty basepath", + ], + ) + def test_valid(self, path: str, basepath: str | None, expected: str): + assert normalize_path(path, basepath=basepath) == expected + + @pytest.mark.parametrize( + "path, basepath", + [ + ("/absolute/path/to/file.txt", None), + ("../file.txt", "/folder"), + ("../file.txt", "http://example.com/data"), + ], + ids=[ + "absolute path", + "local traversed path", + "remote traversed path", + ], + ) + def test_throw(self, path: str, basepath: str | None): + with pytest.raises(Exception): + normalize_path(path, basepath=basepath) diff --git a/metadata/fairspec_metadata/actions/profile/assert_.py b/metadata/fairspec_metadata/actions/profile/assert_.py new file mode 100644 index 0000000..009998e --- /dev/null +++ b/metadata/fairspec_metadata/actions/profile/assert_.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +import re + +from fairspec_metadata.models.json_schema import JsonSchema +from fairspec_metadata.models.profile import Profile, ProfileType + + +class Error(Exception): + pass + + +def assert_profile( + json_schema: JsonSchema, + *, + path: str, + type: ProfileType, +) -> Profile: + regex = re.compile( + rf"^https://fairspec\.org/profiles/(\d+\.\d+\.\d+|latest)/{re.escape(type.value)}\.json$" + ) + + paths = [path] + if isinstance(json_schema.get("allOf"), list): + for ref in json_schema["allOf"]: + if isinstance(ref, str): + paths.append(ref) + + if type == ProfileType.data_schema: + if path == "https://json-schema.org/draft/2020-12/schema": + return json_schema + + for p in paths: + if regex.search(p): + return json_schema + + raise Error(f"Profile at path {path} is not a valid {type.value} profile") diff --git a/metadata/fairspec_metadata/actions/profile/assert_spec.py b/metadata/fairspec_metadata/actions/profile/assert_spec.py new file mode 100644 index 0000000..5efde17 --- /dev/null +++ b/metadata/fairspec_metadata/actions/profile/assert_spec.py @@ -0,0 +1,89 @@ +import pytest + +from fairspec_metadata.models.profile import ProfileType + +from .assert_ import Error, assert_profile + + +class TestAssertProfileValid: + def test_valid_dataset_profile(self): + schema = {"type": "object"} + result = assert_profile( + schema, + path="https://fairspec.org/profiles/latest/dataset.json", + type=ProfileType.dataset, + ) + assert result == schema + + def test_valid_versioned_profile(self): + schema = {"type": "object"} + result = assert_profile( + schema, + path="https://fairspec.org/profiles/1.0.0/catalog.json", + type=ProfileType.catalog, + ) + assert result == schema + + def test_valid_profile_via_allof(self): + schema = { + "type": "object", + "allOf": ["https://fairspec.org/profiles/latest/table-schema.json"], + } + result = assert_profile( + schema, + path="https://example.com/custom.json", + type=ProfileType.table_schema, + ) + assert result == schema + + def test_data_schema_accepts_json_schema_draft(self): + schema = {"type": "object"} + result = assert_profile( + schema, + path="https://json-schema.org/draft/2020-12/schema", + type=ProfileType.data_schema, + ) + assert result == schema + + def test_valid_file_dialect_profile(self): + schema = {"type": "object"} + result = assert_profile( + schema, + path="https://fairspec.org/profiles/latest/file-dialect.json", + type=ProfileType.file_dialect, + ) + assert result == schema + + +class TestAssertProfileInvalid: + def test_wrong_type_in_path(self): + with pytest.raises(Error, match="not a valid dataset profile"): + assert_profile( + {"type": "object"}, + path="https://fairspec.org/profiles/latest/catalog.json", + type=ProfileType.dataset, + ) + + def test_wrong_domain(self): + with pytest.raises(Error): + assert_profile( + {"type": "object"}, + path="https://other.org/profiles/latest/dataset.json", + type=ProfileType.dataset, + ) + + def test_invalid_version_format(self): + with pytest.raises(Error): + assert_profile( + {"type": "object"}, + path="https://fairspec.org/profiles/v1/dataset.json", + type=ProfileType.dataset, + ) + + def test_json_schema_draft_only_for_data_schema(self): + with pytest.raises(Error): + assert_profile( + {"type": "object"}, + path="https://json-schema.org/draft/2020-12/schema", + type=ProfileType.dataset, + ) diff --git a/metadata/fairspec_metadata/actions/profile/load.py b/metadata/fairspec_metadata/actions/profile/load.py new file mode 100644 index 0000000..406d1bc --- /dev/null +++ b/metadata/fairspec_metadata/actions/profile/load.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from fairspec_metadata.actions.json_schema.load import load_json_schema +from .assert_ import assert_profile +from .registry import profile_registry +from fairspec_metadata.models.profile import Profile, ProfileType + + +def load_profile(path: str, *, profile_type: ProfileType) -> Profile: + json_schema = None + for item in profile_registry: + if item.path == path: + json_schema = item.profile + break + + if json_schema is None: + json_schema = load_json_schema(path) + + return assert_profile(json_schema, path=path, type=profile_type) diff --git a/metadata/fairspec_metadata/actions/profile/registry.py b/metadata/fairspec_metadata/actions/profile/registry.py new file mode 100644 index 0000000..d962069 --- /dev/null +++ b/metadata/fairspec_metadata/actions/profile/registry.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from fairspec_metadata.models.profile import ( + ProfileRegistry, + ProfileRegistryItem, + ProfileType, +) + +_PROFILES_DIR = Path(__file__).parent.parent.parent / "profiles" + + +def _load_profile(filename: str) -> dict: + with open(_PROFILES_DIR / filename, encoding="utf-8") as file: + return json.load(file) + + +profile_registry: ProfileRegistry = [ + ProfileRegistryItem( + type=ProfileType.catalog, + path="https://fairspec.org/profiles/latest/catalog.json", + version="latest", + profile=_load_profile("catalog.json"), + ), + ProfileRegistryItem( + type=ProfileType.dataset, + path="https://fairspec.org/profiles/latest/dataset.json", + version="latest", + profile=_load_profile("dataset.json"), + ), + ProfileRegistryItem( + type=ProfileType.file_dialect, + path="https://fairspec.org/profiles/latest/file-dialect.json", + version="latest", + profile=_load_profile("file-dialect.json"), + ), + ProfileRegistryItem( + type=ProfileType.data_schema, + path="https://fairspec.org/profiles/latest/data-schema.json", + version="latest", + profile=_load_profile("data-schema.json"), + ), + ProfileRegistryItem( + type=ProfileType.table_schema, + path="https://fairspec.org/profiles/latest/table-schema.json", + version="latest", + profile=_load_profile("table-schema.json"), + ), +] diff --git a/metadata/fairspec_metadata/actions/report/create.py b/metadata/fairspec_metadata/actions/report/create.py new file mode 100644 index 0000000..6d696a2 --- /dev/null +++ b/metadata/fairspec_metadata/actions/report/create.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +from fairspec_metadata.models.error.error import FairspecError +from fairspec_metadata.models.report import Report + + +def create_report( + errors: list[FairspecError] | None = None, + *, + max_errors: int | None = None, +) -> Report: + errors = (errors or [])[:max_errors] + valid = len(errors) == 0 + return Report(valid=valid, errors=errors) diff --git a/metadata/fairspec_metadata/actions/report/create_spec.py b/metadata/fairspec_metadata/actions/report/create_spec.py new file mode 100644 index 0000000..bac596b --- /dev/null +++ b/metadata/fairspec_metadata/actions/report/create_spec.py @@ -0,0 +1,28 @@ +from fairspec_metadata.models.error.metadata import MetadataError + +from .create import create_report + + +class TestCreateReport: + def test_valid_report_with_no_errors(self): + report = create_report() + assert report.valid is True + assert report.errors == [] + + def test_invalid_report_with_errors(self): + errors = [ + MetadataError(type="metadata", message="error1", jsonPointer="/a"), + MetadataError(type="metadata", message="error2", jsonPointer="/b"), + ] + report = create_report(errors) + assert report.valid is False + assert len(report.errors) == 2 + + def test_max_errors_limits_errors(self): + errors = [ + MetadataError(type="metadata", message=f"error{i}", jsonPointer=f"/{i}") + for i in range(10) + ] + report = create_report(errors, max_errors=3) + assert report.valid is False + assert len(report.errors) == 3 diff --git a/metadata/fairspec_metadata/actions/resource/data.py b/metadata/fairspec_metadata/actions/resource/data.py new file mode 100644 index 0000000..1ecfee7 --- /dev/null +++ b/metadata/fairspec_metadata/actions/resource/data.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from fairspec_metadata.models.data import ResourceDataValue + from fairspec_metadata.models.resource import Resource + + +def get_data_path(resource: Resource) -> str | list[str] | None: + data = resource.data + + if isinstance(data, str): + return data + + if isinstance(data, list) and all(isinstance(item, str) for item in data): + return data + + return None + + +def get_data_value(resource: Resource) -> ResourceDataValue | None: + data_path = get_data_path(resource) + + if not data_path: + return resource.data + + return None + + +def get_data_records(resource: Resource) -> list[dict] | None: + data_value = get_data_value(resource) + if not data_value: + return None + + return data_value if isinstance(data_value, list) else None + + +def get_data_paths(resource: Resource) -> list[str]: + data_path = get_data_path(resource) + if not data_path: + return [] + return data_path if isinstance(data_path, list) else [data_path] + + +def get_data_first_path(resource: Resource) -> str | None: + data_path = get_data_path(resource) + if not data_path: + return None + return data_path[0] if isinstance(data_path, list) else data_path diff --git a/metadata/fairspec_metadata/actions/resource/data_spec.py b/metadata/fairspec_metadata/actions/resource/data_spec.py new file mode 100644 index 0000000..80a3204 --- /dev/null +++ b/metadata/fairspec_metadata/actions/resource/data_spec.py @@ -0,0 +1,64 @@ +from fairspec_metadata import Resource + +from .data import ( + get_data_first_path, + get_data_path, + get_data_paths, + get_data_records, + get_data_value, +) + + +class TestGetDataPath: + def test_returns_string_data(self): + assert get_data_path(Resource(data="table.csv")) == "table.csv" + + def test_returns_list_of_strings(self): + assert get_data_path(Resource(data=["a.csv", "b.csv"])) == ["a.csv", "b.csv"] + + def test_returns_none_for_inline_data(self): + assert get_data_path(Resource(data=[{"id": 1}])) is None + + def test_returns_none_for_missing_data(self): + assert get_data_path(Resource()) is None + + +class TestGetDataValue: + def test_returns_inline_data(self): + assert get_data_value(Resource(data=[{"id": 1}])) == [{"id": 1}] + + def test_returns_none_for_path(self): + assert get_data_value(Resource(data="table.csv")) is None + + +class TestGetDataRecords: + def test_returns_records_from_list(self): + assert get_data_records(Resource(data=[{"id": 1}])) == [{"id": 1}] + + def test_returns_none_for_dict(self): + assert get_data_records(Resource(data={"id": 1})) is None + + def test_returns_none_for_path(self): + assert get_data_records(Resource(data="table.csv")) is None + + +class TestGetDataPaths: + def test_returns_list_from_string(self): + assert get_data_paths(Resource(data="table.csv")) == ["table.csv"] + + def test_returns_list_from_list(self): + assert get_data_paths(Resource(data=["a.csv", "b.csv"])) == ["a.csv", "b.csv"] + + def test_returns_empty_for_missing(self): + assert get_data_paths(Resource()) == [] + + +class TestGetDataFirstPath: + def test_returns_string_data(self): + assert get_data_first_path(Resource(data="table.csv")) == "table.csv" + + def test_returns_first_from_list(self): + assert get_data_first_path(Resource(data=["a.csv", "b.csv"])) == "a.csv" + + def test_returns_none_for_missing(self): + assert get_data_first_path(Resource()) is None diff --git a/metadata/fairspec_metadata/actions/resource/denormalize.py b/metadata/fairspec_metadata/actions/resource/denormalize.py new file mode 100644 index 0000000..74f2689 --- /dev/null +++ b/metadata/fairspec_metadata/actions/resource/denormalize.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata.actions.path.denormalize import denormalize_path + +if TYPE_CHECKING: + from fairspec_metadata.models.descriptor import Descriptor + from fairspec_metadata.models.resource import Resource + + +def denormalize_resource( + resource: Resource, *, basepath: str | None = None +) -> Descriptor: + resource = resource.model_copy(deep=True) + + if isinstance(resource.data, str): + resource.data = denormalize_path(resource.data, basepath=basepath) + + if isinstance(resource.data, list): + for index, path in enumerate(resource.data): + if isinstance(path, str): + resource.data[index] = denormalize_path(path, basepath=basepath) + + for name in ("fileDialect", "dataSchema", "tableSchema"): + value = getattr(resource, name, None) + if isinstance(value, str): + setattr(resource, name, denormalize_path(value, basepath=basepath)) + + return resource.model_dump(by_alias=True, exclude_none=True) diff --git a/metadata/fairspec_metadata/actions/resource/general.py b/metadata/fairspec_metadata/actions/resource/general.py new file mode 100644 index 0000000..fd831ac --- /dev/null +++ b/metadata/fairspec_metadata/actions/resource/general.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata.actions.path.general import get_is_remote_path + +from .data import get_data_path + +if TYPE_CHECKING: + from fairspec_metadata.models.resource import Resource + + +def get_is_remote_resource(resource: Resource) -> bool: + data_path = get_data_path(resource) + if not data_path: + return False + + paths = data_path if isinstance(data_path, list) else [data_path] + return any(get_is_remote_path(path) for path in paths) diff --git a/metadata/fairspec_metadata/actions/resource/infer.py b/metadata/fairspec_metadata/actions/resource/infer.py new file mode 100644 index 0000000..3bb61e3 --- /dev/null +++ b/metadata/fairspec_metadata/actions/resource/infer.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata.actions.path.general import get_file_name_slug + +from .data import get_data_first_path + +if TYPE_CHECKING: + from fairspec_metadata.models.resource import Resource + + +def infer_resource_name( + resource: Resource, *, resource_number: int | None = None +) -> str: + first_path = get_data_first_path(resource) + + if first_path: + name = get_file_name_slug(first_path) + if name: + return name + + return f"resource{resource_number if resource_number is not None else ''}" diff --git a/metadata/fairspec_metadata/actions/resource/infer_spec.py b/metadata/fairspec_metadata/actions/resource/infer_spec.py new file mode 100644 index 0000000..770a005 --- /dev/null +++ b/metadata/fairspec_metadata/actions/resource/infer_spec.py @@ -0,0 +1,33 @@ +from fairspec_metadata import Resource + +from .infer import infer_resource_name + + +class TestInferResourceName: + def test_infers_name_from_single_path(self): + assert infer_resource_name(Resource(data="/data/users.csv")) == "users" + + def test_infers_name_from_first_path_in_array(self): + resource = Resource(data=["/data/users.csv", "/data/backup.csv"]) + assert infer_resource_name(resource) == "users" + + def test_infers_name_from_url(self): + resource = Resource(data="https://example.com/data/products.json") + assert infer_resource_name(resource) == "products" + + def test_returns_default_name_when_no_path(self): + assert infer_resource_name(Resource()) == "resource" + + def test_returns_default_name_when_no_filename(self): + assert infer_resource_name(Resource(data="/data/folder/")) == "resource" + + def test_handles_complex_filename(self): + resource = Resource(data="/data/file.backup.csv") + assert infer_resource_name(resource) == "file_backup" + + def test_slugifies_filename(self): + resource = Resource(data="/data/My Data File!.csv") + assert infer_resource_name(resource) == "my_data_file" + + def test_returns_numbered_default(self): + assert infer_resource_name(Resource(), resource_number=1) == "resource1" diff --git a/metadata/fairspec_metadata/actions/resource/normalize.py b/metadata/fairspec_metadata/actions/resource/normalize.py new file mode 100644 index 0000000..546ef9d --- /dev/null +++ b/metadata/fairspec_metadata/actions/resource/normalize.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata.actions.path.normalize import normalize_path + +if TYPE_CHECKING: + from fairspec_metadata.models.descriptor import Descriptor + from fairspec_metadata.models.resource import Resource + + +def normalize_resource( + resource: Resource, *, basepath: str | None = None +) -> Descriptor: + resource = resource.model_copy(deep=True) + + if isinstance(resource.data, str): + resource.data = normalize_path(resource.data, basepath=basepath) + + if isinstance(resource.data, list): + for index, path in enumerate(resource.data): + if isinstance(path, str): + resource.data[index] = normalize_path(path, basepath=basepath) + + for name in ("fileDialect", "dataSchema", "tableSchema"): + value = getattr(resource, name, None) + if isinstance(value, str): + setattr(resource, name, normalize_path(value, basepath=basepath)) + + return resource.model_dump(by_alias=True, exclude_none=True) diff --git a/metadata/fairspec_metadata/actions/table_schema/assert_.py b/metadata/fairspec_metadata/actions/table_schema/assert_.py new file mode 100644 index 0000000..9851f1d --- /dev/null +++ b/metadata/fairspec_metadata/actions/table_schema/assert_.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from fairspec_metadata.models.descriptor import Descriptor +from fairspec_metadata.models.exception import FairspecException +from fairspec_metadata.models.table_schema import TableSchema + +from .validate import validate_table_schema + + +def assert_table_schema(source: Descriptor) -> TableSchema: + result = validate_table_schema(source) + + if not result.table_schema: + raise FairspecException("Invalid Table Schema", report=result) + + return result.table_schema diff --git a/metadata/fairspec_metadata/actions/table_schema/column.py b/metadata/fairspec_metadata/actions/table_schema/column.py new file mode 100644 index 0000000..28feca3 --- /dev/null +++ b/metadata/fairspec_metadata/actions/table_schema/column.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata.actions.column.create import create_column_from_property + +if TYPE_CHECKING: + from fairspec_metadata.models.column.column import Column + from fairspec_metadata.models.descriptor import Descriptor + + +def get_columns(table_schema: Descriptor) -> list[Column]: + columns: list[Column] = [] + + for name, property in (table_schema.get("properties") or {}).items(): + column = create_column_from_property(name, property) + column.required = ( + table_schema.get("allRequired") + or (name in (table_schema.get("required") or [])) + or None + ) + columns.append(column) + + return columns diff --git a/metadata/fairspec_metadata/actions/table_schema/column_spec.py b/metadata/fairspec_metadata/actions/table_schema/column_spec.py new file mode 100644 index 0000000..4bb73c0 --- /dev/null +++ b/metadata/fairspec_metadata/actions/table_schema/column_spec.py @@ -0,0 +1,55 @@ +from .column import get_columns + + +class TestGetColumns: + def test_sets_required_for_listed_columns(self): + columns = get_columns( + { + "required": ["id"], + "properties": { + "id": {"type": "number"}, + "name": {"type": "string"}, + }, + } + ) + id_col = next(c for c in columns if c.name == "id") + name_col = next(c for c in columns if c.name == "name") + assert id_col.required is True + assert name_col.required is None + + def test_sets_required_for_all_with_all_required(self): + columns = get_columns( + { + "allRequired": True, + "properties": { + "id": {"type": "number"}, + "name": {"type": "string"}, + }, + } + ) + assert all(c.required is True for c in columns) + + def test_sets_nullable_for_nullable_types(self): + columns = get_columns( + { + "properties": { + "id": {"type": ["number", "null"]}, + }, + } + ) + assert len(columns) == 1 + assert columns[0].name == "id" + assert columns[0].nullable is True + + def test_sets_both_required_and_nullable(self): + columns = get_columns( + { + "allRequired": True, + "properties": { + "id": {"type": ["number", "null"]}, + }, + } + ) + assert len(columns) == 1 + assert columns[0].required is True + assert columns[0].nullable is True diff --git a/metadata/fairspec_metadata/actions/table_schema/load.py b/metadata/fairspec_metadata/actions/table_schema/load.py new file mode 100644 index 0000000..df8da0c --- /dev/null +++ b/metadata/fairspec_metadata/actions/table_schema/load.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from fairspec_metadata.actions.descriptor.load import load_descriptor +from fairspec_metadata.models.table_schema import TableSchema + +from .assert_ import assert_table_schema + + +def load_table_schema(path: str) -> TableSchema: + descriptor = load_descriptor(path) + return assert_table_schema(descriptor) diff --git a/metadata/fairspec_metadata/actions/table_schema/resolve.py b/metadata/fairspec_metadata/actions/table_schema/resolve.py new file mode 100644 index 0000000..e959d58 --- /dev/null +++ b/metadata/fairspec_metadata/actions/table_schema/resolve.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from fairspec_metadata.models.table_schema import TableSchema + +from .load import load_table_schema + + +def resolve_table_schema( + table_schema: TableSchema | str | None = None, +) -> TableSchema | None: + if table_schema is None: + return None + + if not isinstance(table_schema, str): + return table_schema + + return load_table_schema(table_schema) diff --git a/metadata/fairspec_metadata/actions/table_schema/save.py b/metadata/fairspec_metadata/actions/table_schema/save.py new file mode 100644 index 0000000..3fd0d23 --- /dev/null +++ b/metadata/fairspec_metadata/actions/table_schema/save.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from fairspec_metadata.actions.descriptor.save import save_descriptor +from fairspec_metadata.models.table_schema import TableSchema +from fairspec_metadata.settings import FAIRSPEC_VERSION + + +def save_table_schema( + table_schema: TableSchema, + *, + path: str, + overwrite: bool = False, +) -> None: + descriptor = table_schema.model_dump(by_alias=True, exclude_none=True) + + if "$schema" not in descriptor: + descriptor["$schema"] = ( + f"https://fairspec.org/profiles/{FAIRSPEC_VERSION}/table-schema.json" + ) + + save_descriptor(descriptor, path=path, overwrite=overwrite) diff --git a/metadata/fairspec_metadata/actions/table_schema/validate.py b/metadata/fairspec_metadata/actions/table_schema/validate.py new file mode 100644 index 0000000..945cebb --- /dev/null +++ b/metadata/fairspec_metadata/actions/table_schema/validate.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +from fairspec_metadata.actions.descriptor.load import load_descriptor +from fairspec_metadata.actions.descriptor.validate import validate_descriptor +from fairspec_metadata.actions.profile.load import load_profile +from fairspec_metadata.models.descriptor import Descriptor +from fairspec_metadata.models.profile import ProfileType +from fairspec_metadata.models.report import Report +from fairspec_metadata.models.table_schema import TableSchema + + +class TableSchemaValidationResult(Report): + table_schema: TableSchema | None + + +def validate_table_schema( + source: Descriptor | str, + *, + root_json_pointer: str | None = None, +) -> TableSchemaValidationResult: + descriptor = load_descriptor(source) if isinstance(source, str) else source + + schema = descriptor.get("$schema") + schema_url = ( + schema + if isinstance(schema, str) + else "https://fairspec.org/profiles/latest/table-schema.json" + ) + + profile = load_profile(schema_url, profile_type=ProfileType.table_schema) + + report = validate_descriptor( + descriptor, + profile=profile, + root_json_pointer=root_json_pointer, + ) + + table_schema: TableSchema | None = None + if report.valid: + # Valid -> we can cast + table_schema = TableSchema(**descriptor) + + return TableSchemaValidationResult( + valid=report.valid, + errors=report.errors, + table_schema=table_schema, + ) diff --git a/metadata/fairspec_metadata/actions/table_schema/validate_spec.py b/metadata/fairspec_metadata/actions/table_schema/validate_spec.py new file mode 100644 index 0000000..7fc3397 --- /dev/null +++ b/metadata/fairspec_metadata/actions/table_schema/validate_spec.py @@ -0,0 +1,45 @@ +import pytest + +from .validate import validate_table_schema + + +class TestValidateTableSchema: + def test_valid_schema(self): + descriptor = { + "properties": { + "id": {"type": "integer"}, + "name": {"type": "string"}, + }, + } + result = validate_table_schema(descriptor) + assert result.valid is True + assert result.errors == [] + + def test_invalid_schema(self): + descriptor = { + "properties": { + "id": {"type": 123}, + }, + } + result = validate_table_schema(descriptor) + assert result.valid is False + assert len(result.errors) > 0 + + def test_missing_schema_is_valid(self): + descriptor = { + "properties": { + "id": {"type": "integer"}, + }, + } + result = validate_table_schema(descriptor) + assert result.valid is True + + def test_wrong_profile_type_raises(self): + descriptor = { + "$schema": "https://fairspec.org/profiles/latest/dataset.json", + "properties": { + "id": {"type": "integer"}, + }, + } + with pytest.raises(Exception, match="table-schema"): + validate_table_schema(descriptor) diff --git a/metadata/fairspec_metadata/models/base.py b/metadata/fairspec_metadata/models/base.py new file mode 100644 index 0000000..5457591 --- /dev/null +++ b/metadata/fairspec_metadata/models/base.py @@ -0,0 +1,5 @@ +from pydantic import BaseModel, ConfigDict + + +class FairspecModel(BaseModel): + model_config = ConfigDict(revalidate_instances="never") diff --git a/metadata/fairspec_metadata/models/catalog.py b/metadata/fairspec_metadata/models/catalog.py new file mode 100644 index 0000000..5954e9f --- /dev/null +++ b/metadata/fairspec_metadata/models/catalog.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from typing import Annotated + +from pydantic import Field + +from .base import FairspecModel + + +class CatalogDataset(FairspecModel): + loc: str = Field(description="The location (URI) of the dataset") + upd: str = Field(description="The last updated date-time of the dataset") + + +Catalog = Annotated[ + list[CatalogDataset], + Field( + description="A catalog is an array of dataset references with their locations and update times" + ), +] diff --git a/metadata/fairspec_metadata/models/column/array.py b/metadata/fairspec_metadata/models/column/array.py new file mode 100644 index 0000000..62435bb --- /dev/null +++ b/metadata/fairspec_metadata/models/column/array.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import Field + +from ..base import FairspecModel +from .base import BaseColumn, BaseColumnProperty + +ArrayNullablePropertyType = ( + Literal["array"] + | tuple[Literal["array"], Literal["null"]] + | tuple[Literal["null"], Literal["array"]] +) + + +class ArrayMissingValueItem(FairspecModel): + value: str + label: str + + +class ArrayColumnProperty(BaseColumnProperty): + type: ArrayNullablePropertyType = "array" + format: Literal[None] = None + enum: list[Any] | None = Field( + default=None, + description="An optional array of allowed values for the column", + ) + const: list[Any] | None = Field( + default=None, + description="An optional const that all values must match", + ) + default: list[list[Any]] | None = Field( + default=None, + description="An optional default value for the column", + ) + examples: list[list[Any]] | None = Field( + default=None, + description="An optional array of examples for the column", + ) + missingValues: list[str | ArrayMissingValueItem] | None = Field( + default=None, + description="An optional column-specific list of values that represent missing or null data", + ) + allOf: Any | None = None + anyOf: Any | None = None + oneOf: Any | None = None + not_: Any | None = Field(default=None, alias="not") + if_: Any | None = Field(default=None, alias="if") + then: Any | None = None + else_: Any | None = Field(default=None, alias="else") + items: Any | None = None + prefixItems: Any | None = None + additionalItems: Any | None = None + contains: Any | None = None + minContains: float | None = None + maxContains: float | None = None + maxItems: float | None = None + minItems: float | None = None + uniqueItems: bool | None = None + + +class ArrayColumn(BaseColumn): + type: Literal["array"] + property: ArrayColumnProperty diff --git a/metadata/fairspec_metadata/models/column/base.py b/metadata/fairspec_metadata/models/column/base.py new file mode 100644 index 0000000..aef4547 --- /dev/null +++ b/metadata/fairspec_metadata/models/column/base.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from enum import StrEnum +from typing import Any + +from pydantic import Field + +from ..base import FairspecModel + + +class BasePropertyType(StrEnum): + string = "string" + number = "number" + integer = "integer" + boolean = "boolean" + array = "array" + object = "object" + + +class BaseColumnProperty(FairspecModel): + title: str | None = Field( + default=None, + description="An optional human-readable title for the column", + ) + description: str | None = Field( + default=None, + description="An optional detailed description of the column", + ) + rdfType: str | None = Field( + default=None, + description="An optional URI for semantic type (RDF)", + ) + default: Any | None = None + + +class BaseColumn(FairspecModel): + name: str + type: str + required: bool | None = None + nullable: bool | None = None + property: BaseColumnProperty diff --git a/metadata/fairspec_metadata/models/column/base64.py b/metadata/fairspec_metadata/models/column/base64.py new file mode 100644 index 0000000..eda492f --- /dev/null +++ b/metadata/fairspec_metadata/models/column/base64.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from typing import Literal + +from .base import BaseColumn +from .string import BaseStringColumnProperty + + +class Base64ColumnProperty(BaseStringColumnProperty): + format: Literal["base64"] = "base64" + + +class Base64Column(BaseColumn): + type: Literal["base64"] + property: Base64ColumnProperty diff --git a/metadata/fairspec_metadata/models/column/boolean.py b/metadata/fairspec_metadata/models/column/boolean.py new file mode 100644 index 0000000..dbc6da4 --- /dev/null +++ b/metadata/fairspec_metadata/models/column/boolean.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +from typing import Literal + +from pydantic import Field + +from ..base import FairspecModel +from .base import BaseColumn, BaseColumnProperty + +BooleanNullablePropertyType = ( + Literal["boolean"] + | tuple[Literal["boolean"], Literal["null"]] + | tuple[Literal["null"], Literal["boolean"]] +) + + +class StringIntMissingValue(FairspecModel): + value: str | int + label: str + + +class BooleanColumnProperty(BaseColumnProperty): + type: BooleanNullablePropertyType = "boolean" + format: Literal[None] = None + enum: list[bool] | None = Field( + default=None, + description="An optional array of allowed values for the column", + ) + const: bool | None = Field( + default=None, + description="An optional const that all values must match", + ) + default: list[bool] | None = Field( + default=None, + description="An optional default value for the column", + ) + examples: list[bool] | None = Field( + default=None, + description="An optional array of examples for the column", + ) + missingValues: list[str | int | StringIntMissingValue] | None = Field( + default=None, + description="An optional column-specific list of values that represent missing or null data", + ) + trueValues: list[str] | None = Field( + default=None, + description="An optional array of string values that should be interpreted as true when parsing data", + ) + falseValues: list[str] | None = Field( + default=None, + description="An optional array of string values that should be interpreted as false when parsing data", + ) + + +class BooleanColumn(BaseColumn): + type: Literal["boolean"] + property: BooleanColumnProperty diff --git a/metadata/fairspec_metadata/models/column/categorical.py b/metadata/fairspec_metadata/models/column/categorical.py new file mode 100644 index 0000000..59b824c --- /dev/null +++ b/metadata/fairspec_metadata/models/column/categorical.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +from typing import Literal, Union + +from pydantic import Field + +from ..base import FairspecModel +from .base import BaseColumn +from .integer import BaseIntegerColumnProperty +from .string import BaseStringColumnProperty + + +class IntegerCategoryItem(FairspecModel): + value: int + label: str + + +class IntegerCategoricalColumnProperty(BaseIntegerColumnProperty): + format: Literal["categorical"] = "categorical" + categories: list[int | IntegerCategoryItem] | None = Field( + default=None, + description="An optional array of categorical values with optional labels", + ) + withOrder: bool | None = Field( + default=None, + description="An optional boolean indicating whether the categories are ordered", + ) + + +class StringCategoryItem(FairspecModel): + value: str + label: str + + +class StringCategoricalColumnProperty(BaseStringColumnProperty): + format: Literal["categorical"] = "categorical" + categories: list[str | StringCategoryItem] | None = Field( + default=None, + description="An optional array of categorical values with optional labels", + ) + withOrder: bool | None = Field( + default=None, + description="An optional boolean indicating whether the categories are ordered", + ) + + +class CategoricalColumn(BaseColumn): + type: Literal["categorical"] + property: Union[ + StringCategoricalColumnProperty, + IntegerCategoricalColumnProperty, + ] diff --git a/metadata/fairspec_metadata/models/column/column.py b/metadata/fairspec_metadata/models/column/column.py new file mode 100644 index 0000000..efb77c4 --- /dev/null +++ b/metadata/fairspec_metadata/models/column/column.py @@ -0,0 +1,127 @@ +from __future__ import annotations + +from enum import StrEnum +from typing import Annotated, Union + +from pydantic import Field + +from .array import ArrayColumn, ArrayColumnProperty +from .base64 import Base64Column, Base64ColumnProperty +from .boolean import BooleanColumn, BooleanColumnProperty +from .categorical import ( + CategoricalColumn, + IntegerCategoricalColumnProperty, + StringCategoricalColumnProperty, +) +from .date import DateColumn, DateColumnProperty +from .date_time import DateTimeColumn, DateTimeColumnProperty +from .decimal import DecimalColumn, DecimalColumnProperty +from .duration import DurationColumn, DurationColumnProperty +from .email import EmailColumn, EmailColumnProperty +from .geojson import GeojsonColumn, GeojsonColumnProperty +from .hex import HexColumn, HexColumnProperty +from .integer import IntegerColumn, IntegerColumnProperty +from .list import ListColumn, ListColumnProperty +from .number import NumberColumn, NumberColumnProperty +from .object import ObjectColumn, ObjectColumnProperty +from .string import StringColumn, StringColumnProperty +from .time import TimeColumn, TimeColumnProperty +from .topojson import TopojsonColumn, TopojsonColumnProperty +from .unknown import UnknownColumn, UnknownColumnProperty +from .url import UrlColumn, UrlColumnProperty +from .wkb import WkbColumn, WkbColumnProperty +from .wkt import WktColumn, WktColumnProperty + +Column = Annotated[ + Union[ + ArrayColumn, + Base64Column, + BooleanColumn, + CategoricalColumn, + DateColumn, + DateTimeColumn, + DecimalColumn, + DurationColumn, + EmailColumn, + GeojsonColumn, + HexColumn, + IntegerColumn, + ListColumn, + NumberColumn, + ObjectColumn, + StringColumn, + TimeColumn, + TopojsonColumn, + UnknownColumn, + UrlColumn, + WkbColumn, + WktColumn, + ], + Field(discriminator="type"), +] + + +class ColumnType(StrEnum): + array = "array" + base64 = "base64" + boolean = "boolean" + categorical = "categorical" + date = "date" + date_time = "date-time" + decimal = "decimal" + duration = "duration" + email = "email" + geojson = "geojson" + hex = "hex" + integer = "integer" + list = "list" + number = "number" + object = "object" + string = "string" + time = "time" + topojson = "topojson" + unknown = "unknown" + url = "url" + wkb = "wkb" + wkt = "wkt" + + +StringColumnPropertyWithFormat = Annotated[ + Union[ + ListColumnProperty, + Base64ColumnProperty, + HexColumnProperty, + EmailColumnProperty, + UrlColumnProperty, + DateTimeColumnProperty, + DateColumnProperty, + TimeColumnProperty, + DurationColumnProperty, + WktColumnProperty, + WkbColumnProperty, + StringCategoricalColumnProperty, + DecimalColumnProperty, + ], + Field(discriminator="format"), +] + +ObjectColumnPropertyWithFormat = Annotated[ + Union[ + GeojsonColumnProperty, + TopojsonColumnProperty, + ], + Field(discriminator="format"), +] + +ColumnProperty = Union[ + BooleanColumnProperty, + IntegerColumnProperty, + IntegerCategoricalColumnProperty, + NumberColumnProperty, + StringColumnPropertyWithFormat, + StringColumnProperty, + ArrayColumnProperty, + ObjectColumnPropertyWithFormat, + ObjectColumnProperty, + UnknownColumnProperty, +] diff --git a/metadata/fairspec_metadata/models/column/date.py b/metadata/fairspec_metadata/models/column/date.py new file mode 100644 index 0000000..e14be47 --- /dev/null +++ b/metadata/fairspec_metadata/models/column/date.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from typing import Literal + +from pydantic import Field + +from .base import BaseColumn +from .string import BaseStringColumnProperty + + +class DateColumnProperty(BaseStringColumnProperty): + format: Literal["date"] = "date" + temporalFormat: str | None = Field( + default=None, + description="An optional string specifying the datetime format pattern as per the Strftime specification", + ) + + +class DateColumn(BaseColumn): + type: Literal["date"] + property: DateColumnProperty diff --git a/metadata/fairspec_metadata/models/column/date_time.py b/metadata/fairspec_metadata/models/column/date_time.py new file mode 100644 index 0000000..be3b9f5 --- /dev/null +++ b/metadata/fairspec_metadata/models/column/date_time.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from typing import Literal + +from pydantic import Field + +from .base import BaseColumn +from .string import BaseStringColumnProperty + + +class DateTimeColumnProperty(BaseStringColumnProperty): + format: Literal["date-time"] = "date-time" + temporalFormat: str | None = Field( + default=None, + description="An optional string specifying the datetime format pattern as per the Strftime specification", + ) + + +class DateTimeColumn(BaseColumn): + type: Literal["date-time"] + property: DateTimeColumnProperty diff --git a/metadata/fairspec_metadata/models/column/decimal.py b/metadata/fairspec_metadata/models/column/decimal.py new file mode 100644 index 0000000..bf44c54 --- /dev/null +++ b/metadata/fairspec_metadata/models/column/decimal.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +from typing import Literal + +from pydantic import Field + +from .base import BaseColumn +from .string import BaseStringColumnProperty + + +class DecimalColumnProperty(BaseStringColumnProperty): + format: Literal["decimal"] = "decimal" + minimum: float | None = Field( + default=None, + description="An optional minimum value constraint (inclusive)", + ) + maximum: float | None = Field( + default=None, + description="An optional maximum value constraint (inclusive)", + ) + exclusiveMinimum: float | None = Field( + default=None, + description="An optional minimum value constraint (exclusive)", + ) + exclusiveMaximum: float | None = Field( + default=None, + description="An optional maximum value constraint (exclusive)", + ) + multipleOf: float | None = Field( + default=None, + gt=0, + description="An optional constraint that values must be a multiple of this number", + ) + decimalChar: str | None = Field( + default=None, + min_length=1, + max_length=1, + description="An optional single character used as the decimal separator in the data", + ) + groupChar: str | None = Field( + default=None, + min_length=1, + max_length=1, + description="An optional single character used as the thousands separator in the data", + ) + withText: bool | None = Field( + default=None, + description="An optional boolean indicating whether numeric values may include non-numeric text that should be stripped during parsing", + ) + + +class DecimalColumn(BaseColumn): + type: Literal["decimal"] + property: DecimalColumnProperty diff --git a/metadata/fairspec_metadata/models/column/duration.py b/metadata/fairspec_metadata/models/column/duration.py new file mode 100644 index 0000000..922e5e7 --- /dev/null +++ b/metadata/fairspec_metadata/models/column/duration.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from typing import Literal + +from .base import BaseColumn +from .string import BaseStringColumnProperty + + +class DurationColumnProperty(BaseStringColumnProperty): + format: Literal["duration"] = "duration" + + +class DurationColumn(BaseColumn): + type: Literal["duration"] + property: DurationColumnProperty diff --git a/metadata/fairspec_metadata/models/column/email.py b/metadata/fairspec_metadata/models/column/email.py new file mode 100644 index 0000000..7d4f1f7 --- /dev/null +++ b/metadata/fairspec_metadata/models/column/email.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from typing import Literal + +from .base import BaseColumn +from .string import BaseStringColumnProperty + + +class EmailColumnProperty(BaseStringColumnProperty): + format: Literal["email"] = "email" + + +class EmailColumn(BaseColumn): + type: Literal["email"] + property: EmailColumnProperty diff --git a/metadata/fairspec_metadata/models/column/geojson.py b/metadata/fairspec_metadata/models/column/geojson.py new file mode 100644 index 0000000..e3ea398 --- /dev/null +++ b/metadata/fairspec_metadata/models/column/geojson.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from typing import Literal + +from .base import BaseColumn +from .object import BaseObjectColumnProperty + + +class GeojsonColumnProperty(BaseObjectColumnProperty): + format: Literal["geojson"] = "geojson" + + +class GeojsonColumn(BaseColumn): + type: Literal["geojson"] + property: GeojsonColumnProperty diff --git a/metadata/fairspec_metadata/models/column/hex.py b/metadata/fairspec_metadata/models/column/hex.py new file mode 100644 index 0000000..a34b2fb --- /dev/null +++ b/metadata/fairspec_metadata/models/column/hex.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from typing import Literal + +from .base import BaseColumn +from .string import BaseStringColumnProperty + + +class HexColumnProperty(BaseStringColumnProperty): + format: Literal["hex"] = "hex" + + +class HexColumn(BaseColumn): + type: Literal["hex"] + property: HexColumnProperty diff --git a/metadata/fairspec_metadata/models/column/integer.py b/metadata/fairspec_metadata/models/column/integer.py new file mode 100644 index 0000000..3506c2f --- /dev/null +++ b/metadata/fairspec_metadata/models/column/integer.py @@ -0,0 +1,83 @@ +from __future__ import annotations + +from typing import Literal + +from pydantic import Field + +from ..base import FairspecModel +from .base import BaseColumn, BaseColumnProperty + +IntegerNullablePropertyType = ( + Literal["integer"] + | tuple[Literal["integer"], Literal["null"]] + | tuple[Literal["null"], Literal["integer"]] +) + + +class IntegerMissingValueItem(FairspecModel): + value: str | int + label: str + + +class BaseIntegerColumnProperty(BaseColumnProperty): + type: IntegerNullablePropertyType = "integer" + enum: list[int] | None = Field( + default=None, + description="An optional array of allowed values for the column", + ) + const: int | None = Field( + default=None, + description="An optional const that all values must match", + ) + default: list[int] | None = Field( + default=None, + description="An optional default value for the column", + ) + examples: list[int] | None = Field( + default=None, + description="An optional array of examples for the column", + ) + missingValues: list[str | int | IntegerMissingValueItem] | None = Field( + default=None, + description="An optional column-specific list of values that represent missing or null data", + ) + minimum: int | None = Field( + default=None, + description="An optional minimum value constraint (inclusive)", + ) + maximum: int | None = Field( + default=None, + description="An optional maximum value constraint (inclusive)", + ) + exclusiveMinimum: int | None = Field( + default=None, + description="An optional minimum value constraint (exclusive)", + ) + exclusiveMaximum: int | None = Field( + default=None, + description="An optional maximum value constraint (exclusive)", + ) + multipleOf: int | None = Field( + default=None, + ge=1, + description="An optional constraint that values must be a multiple of this number", + ) + groupChar: str | None = Field( + default=None, + min_length=1, + max_length=1, + description="An optional single character used as the thousands separator in the data", + ) + withText: bool | None = Field( + default=None, + description="An optional boolean indicating whether numeric values may include non-numeric text that should be stripped during parsing", + ) + + +class IntegerColumnProperty(BaseIntegerColumnProperty): + format: Literal[None] = None + + +class IntegerColumn(BaseColumn): + type: Literal["integer"] + property: IntegerColumnProperty diff --git a/metadata/fairspec_metadata/models/column/list.py b/metadata/fairspec_metadata/models/column/list.py new file mode 100644 index 0000000..5dff6d2 --- /dev/null +++ b/metadata/fairspec_metadata/models/column/list.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from enum import StrEnum +from typing import Literal + +from pydantic import Field + +from .base import BaseColumn +from .string import BaseStringColumnProperty + + +class ListItemType(StrEnum): + string = "string" + integer = "integer" + number = "number" + boolean = "boolean" + date_time = "date-time" + date = "date" + time = "time" + + +class ListColumnProperty(BaseStringColumnProperty): + format: Literal["list"] = "list" + itemType: ListItemType | None = Field( + default=None, + description="An optional type for items in a list column", + ) + delimiter: str | None = Field( + default=None, + min_length=1, + max_length=1, + description="An optional single character used to delimit items in a list column", + ) + minItems: int | None = Field( + default=None, + ge=0, + description="An optional minimum length constraint for list values", + ) + maxItems: int | None = Field( + default=None, + ge=0, + description="An optional maximum length constraint for list values", + ) + + +class ListColumn(BaseColumn): + type: Literal["list"] + property: ListColumnProperty diff --git a/metadata/fairspec_metadata/models/column/number.py b/metadata/fairspec_metadata/models/column/number.py new file mode 100644 index 0000000..aebe70a --- /dev/null +++ b/metadata/fairspec_metadata/models/column/number.py @@ -0,0 +1,90 @@ +from __future__ import annotations + +from typing import Literal + +from pydantic import Field + +from ..base import FairspecModel +from .base import BaseColumn, BaseColumnProperty + +NumberNullablePropertyType = ( + Literal["number"] + | tuple[Literal["number"], Literal["null"]] + | tuple[Literal["null"], Literal["number"]] +) + + +class NumberMissingValueItem(FairspecModel): + value: str | int + label: str + + +class BaseNumberColumnProperty(BaseColumnProperty): + type: NumberNullablePropertyType = "number" + format: Literal[""] = "" + enum: list[float] | None = Field( + default=None, + description="An optional array of allowed values for the column", + ) + const: float | None = Field( + default=None, + description="An optional const that all values must match", + ) + default: list[float] | None = Field( + default=None, + description="An optional default value for the column", + ) + examples: list[float] | None = Field( + default=None, + description="An optional array of examples for the column", + ) + missingValues: list[str | int | NumberMissingValueItem] | None = Field( + default=None, + description="An optional column-specific list of values that represent missing or null data", + ) + minimum: float | None = Field( + default=None, + description="An optional minimum value constraint (inclusive)", + ) + maximum: float | None = Field( + default=None, + description="An optional maximum value constraint (inclusive)", + ) + exclusiveMinimum: float | None = Field( + default=None, + description="An optional minimum value constraint (exclusive)", + ) + exclusiveMaximum: float | None = Field( + default=None, + description="An optional maximum value constraint (exclusive)", + ) + multipleOf: float | None = Field( + default=None, + gt=0, + description="An optional constraint that values must be a multiple of this number", + ) + decimalChar: str | None = Field( + default=None, + min_length=1, + max_length=1, + description="An optional single character used as the decimal separator in the data", + ) + groupChar: str | None = Field( + default=None, + min_length=1, + max_length=1, + description="An optional single character used as the thousands separator in the data", + ) + withText: bool | None = Field( + default=None, + description="An optional boolean indicating whether numeric values may include non-numeric text that should be stripped during parsing", + ) + + +class NumberColumnProperty(BaseNumberColumnProperty): + format: Literal[None] = None + + +class NumberColumn(BaseColumn): + type: Literal["number"] + property: NumberColumnProperty diff --git a/metadata/fairspec_metadata/models/column/object.py b/metadata/fairspec_metadata/models/column/object.py new file mode 100644 index 0000000..21058c2 --- /dev/null +++ b/metadata/fairspec_metadata/models/column/object.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import Field + +from ..base import FairspecModel +from .base import BaseColumn, BaseColumnProperty + +ObjectNullablePropertyType = ( + Literal["object"] + | tuple[Literal["object"], Literal["null"]] + | tuple[Literal["null"], Literal["object"]] +) + + +class ObjectMissingValueItem(FairspecModel): + value: str + label: str + + +class BaseObjectColumnProperty(BaseColumnProperty): + type: ObjectNullablePropertyType = "object" + enum: list[dict[str, Any]] | None = Field( + default=None, + description="An optional array of allowed values for the column", + ) + const: dict[str, Any] | None = Field( + default=None, + description="An optional const that all values must match", + ) + default: list[dict[str, Any]] | None = Field( + default=None, + description="An optional default value for the column", + ) + examples: list[dict[str, Any]] | None = Field( + default=None, + description="An optional array of examples for the column", + ) + missingValues: list[str | ObjectMissingValueItem] | None = Field( + default=None, + description="An optional column-specific list of values that represent missing or null data", + ) + allOf: Any | None = None + anyOf: Any | None = None + oneOf: Any | None = None + not_: Any | None = Field(default=None, alias="not") + if_: Any | None = Field(default=None, alias="if") + then: Any | None = None + else_: Any | None = Field(default=None, alias="else") + properties: Any | None = None + additionalProperties: Any | None = None + patternProperties: Any | None = None + propertyNames: Any | None = None + minProperties: float | None = None + maxProperties: float | None = None + dependencies: Any | None = None + dependentRequired: Any | None = None + dependentSchemas: Any | None = None + required: Any | None = None + + +class ObjectColumnProperty(BaseObjectColumnProperty): + format: Literal[None] = None + + +class ObjectColumn(BaseColumn): + type: Literal["object"] + property: ObjectColumnProperty diff --git a/metadata/fairspec_metadata/models/column/string.py b/metadata/fairspec_metadata/models/column/string.py new file mode 100644 index 0000000..8632492 --- /dev/null +++ b/metadata/fairspec_metadata/models/column/string.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from typing import Literal + +from pydantic import Field + +from ..base import FairspecModel +from .base import BaseColumn, BaseColumnProperty + +StringNullablePropertyType = ( + Literal["string"] + | tuple[Literal["string"], Literal["null"]] + | tuple[Literal["null"], Literal["string"]] +) + + +class StringMissingValueItem(FairspecModel): + value: str + label: str + + +class BaseStringColumnProperty(BaseColumnProperty): + type: StringNullablePropertyType = "string" + enum: list[str] | None = Field( + default=None, + description="An optional array of allowed values for the column", + ) + const: str | None = Field( + default=None, + description="An optional const that all values must match", + ) + default: list[str] | None = Field( + default=None, + description="An optional default value for the column", + ) + examples: list[str] | None = Field( + default=None, + description="An optional array of examples for the column", + ) + missingValues: list[str | StringMissingValueItem] | None = Field( + default=None, + description="An optional column-specific list of values that represent missing or null data", + ) + minLength: int | None = Field( + default=None, + ge=0, + description="An optional minimum length constraint for string values", + ) + maxLength: int | None = Field( + default=None, + ge=0, + description="An optional maximum length constraint for string values", + ) + pattern: str | None = Field( + default=None, + description="An optional regular expression pattern that values must match", + ) + + +class StringColumnProperty(BaseStringColumnProperty): + format: Literal[None] = None + + +class StringColumn(BaseColumn): + type: Literal["string"] + property: StringColumnProperty diff --git a/metadata/fairspec_metadata/models/column/time.py b/metadata/fairspec_metadata/models/column/time.py new file mode 100644 index 0000000..d458217 --- /dev/null +++ b/metadata/fairspec_metadata/models/column/time.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from typing import Literal + +from pydantic import Field + +from .base import BaseColumn +from .string import BaseStringColumnProperty + + +class TimeColumnProperty(BaseStringColumnProperty): + format: Literal["time"] = "time" + temporalFormat: str | None = Field( + default=None, + description="An optional string specifying the datetime format pattern as per the Strftime specification", + ) + + +class TimeColumn(BaseColumn): + type: Literal["time"] + property: TimeColumnProperty diff --git a/metadata/fairspec_metadata/models/column/topojson.py b/metadata/fairspec_metadata/models/column/topojson.py new file mode 100644 index 0000000..4824cd3 --- /dev/null +++ b/metadata/fairspec_metadata/models/column/topojson.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from typing import Literal + +from .base import BaseColumn +from .object import BaseObjectColumnProperty + + +class TopojsonColumnProperty(BaseObjectColumnProperty): + format: Literal["topojson"] = "topojson" + + +class TopojsonColumn(BaseColumn): + type: Literal["topojson"] + property: TopojsonColumnProperty diff --git a/metadata/fairspec_metadata/models/column/unknown.py b/metadata/fairspec_metadata/models/column/unknown.py new file mode 100644 index 0000000..5ddb664 --- /dev/null +++ b/metadata/fairspec_metadata/models/column/unknown.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import Field + +from ..base import FairspecModel +from .base import BaseColumn, BaseColumnProperty + + +class UnknownMissingValueItem(FairspecModel): + value: str + label: str + + +class UnknownColumnProperty(BaseColumnProperty): + type: Literal["null"] | None = None + format: Literal[None] = None + enum: list[list[Any]] | None = Field( + default=None, + description="An optional array of allowed values for the column", + ) + const: list[Any] | None = Field( + default=None, + description="An optional const that all values must match", + ) + default: list[Any] | None = Field( + default=None, + description="An optional default value for the column", + ) + examples: list[list[Any]] | None = Field( + default=None, + description="An optional array of examples for the column", + ) + missingValues: list[str | UnknownMissingValueItem] | None = Field( + default=None, + description="An optional column-specific list of values that represent missing or null data", + ) + + +class UnknownColumn(BaseColumn): + type: Literal["unknown"] + property: UnknownColumnProperty diff --git a/metadata/fairspec_metadata/models/column/url.py b/metadata/fairspec_metadata/models/column/url.py new file mode 100644 index 0000000..01c1875 --- /dev/null +++ b/metadata/fairspec_metadata/models/column/url.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from typing import Literal + +from .base import BaseColumn +from .string import BaseStringColumnProperty + + +class UrlColumnProperty(BaseStringColumnProperty): + format: Literal["url"] = "url" + + +class UrlColumn(BaseColumn): + type: Literal["url"] + property: UrlColumnProperty diff --git a/metadata/fairspec_metadata/models/column/wkb.py b/metadata/fairspec_metadata/models/column/wkb.py new file mode 100644 index 0000000..9715045 --- /dev/null +++ b/metadata/fairspec_metadata/models/column/wkb.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from typing import Literal + +from .base import BaseColumn +from .string import BaseStringColumnProperty + + +class WkbColumnProperty(BaseStringColumnProperty): + format: Literal["wkb"] = "wkb" + + +class WkbColumn(BaseColumn): + type: Literal["wkb"] + property: WkbColumnProperty diff --git a/metadata/fairspec_metadata/models/column/wkt.py b/metadata/fairspec_metadata/models/column/wkt.py new file mode 100644 index 0000000..02bf1c0 --- /dev/null +++ b/metadata/fairspec_metadata/models/column/wkt.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from typing import Literal + +from .base import BaseColumn +from .string import BaseStringColumnProperty + + +class WktColumnProperty(BaseStringColumnProperty): + format: Literal["wkt"] = "wkt" + + +class WktColumn(BaseColumn): + type: Literal["wkt"] + property: WktColumnProperty diff --git a/metadata/fairspec_metadata/models/data.py b/metadata/fairspec_metadata/models/data.py new file mode 100644 index 0000000..f1766c8 --- /dev/null +++ b/metadata/fairspec_metadata/models/data.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from typing import Any, Union + +from .path import Path + +ResourceDataPath = Union[Path, list[Path]] + +ResourceDataValue = Union[ + dict[str, Any], + list[dict[str, Any]], +] + +ResourceData = Union[ResourceDataPath, ResourceDataValue] + +Data = Any diff --git a/metadata/fairspec_metadata/models/data_schema.py b/metadata/fairspec_metadata/models/data_schema.py new file mode 100644 index 0000000..ec52f5d --- /dev/null +++ b/metadata/fairspec_metadata/models/data_schema.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from .base import FairspecModel + +from .json_schema import JsonSchema + +DataSchema = JsonSchema + + +class RenderDataSchemaOptions(FairspecModel): + format: str diff --git a/metadata/fairspec_metadata/models/datacite/alternate_identifier.py b/metadata/fairspec_metadata/models/datacite/alternate_identifier.py new file mode 100644 index 0000000..9c65be9 --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/alternate_identifier.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from typing import Annotated + +from pydantic import Field + +from ..base import FairspecModel + + +class AlternateIdentifier(FairspecModel): + alternateIdentifier: str = Field( + description="An identifier or identifiers other than the primary Identifier applied to the resource being registered" + ) + alternateIdentifierType: str = Field( + description="The type of the AlternateIdentifier (e.g., URL, URN, ISBN, ISSN, etc.)" + ) + + +AlternateIdentifiers = Annotated[ + list[AlternateIdentifier], + Field( + description="An identifier or identifiers other than the primary Identifier applied to the resource being registered" + ), +] diff --git a/metadata/fairspec_metadata/models/datacite/common.py b/metadata/fairspec_metadata/models/datacite/common.py new file mode 100644 index 0000000..7497ec7 --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/common.py @@ -0,0 +1,187 @@ +from __future__ import annotations + +from enum import StrEnum +from typing import Annotated + +from pydantic import Field + + +class CreatorNameType(StrEnum): + Organizational = "Organizational" + Personal = "Personal" + + +class TitleType(StrEnum): + AlternativeTitle = "AlternativeTitle" + Subtitle = "Subtitle" + TranslatedTitle = "TranslatedTitle" + Other = "Other" + + +class ContributorType(StrEnum): + ContactPerson = "ContactPerson" + DataCollector = "DataCollector" + DataCurator = "DataCurator" + DataManager = "DataManager" + Distributor = "Distributor" + Editor = "Editor" + HostingInstitution = "HostingInstitution" + Producer = "Producer" + ProjectLeader = "ProjectLeader" + ProjectManager = "ProjectManager" + ProjectMember = "ProjectMember" + RegistrationAgency = "RegistrationAgency" + RegistrationAuthority = "RegistrationAuthority" + RelatedPerson = "RelatedPerson" + Researcher = "Researcher" + ResearchGroup = "ResearchGroup" + RightsHolder = "RightsHolder" + Sponsor = "Sponsor" + Supervisor = "Supervisor" + Translator = "Translator" + WorkPackageLeader = "WorkPackageLeader" + Other = "Other" + + +class DateType(StrEnum): + Accepted = "Accepted" + Available = "Available" + Copyrighted = "Copyrighted" + Collected = "Collected" + Coverage = "Coverage" + Created = "Created" + Issued = "Issued" + Submitted = "Submitted" + Updated = "Updated" + Valid = "Valid" + Withdrawn = "Withdrawn" + Other = "Other" + + +class ContentTypeGeneral(StrEnum): + Audiovisual = "Audiovisual" + Award = "Award" + Book = "Book" + BookChapter = "BookChapter" + Collection = "Collection" + ComputationalNotebook = "ComputationalNotebook" + ConferencePaper = "ConferencePaper" + ConferenceProceeding = "ConferenceProceeding" + DataPaper = "DataPaper" + Dataset = "Dataset" + Dissertation = "Dissertation" + Event = "Event" + Image = "Image" + Instrument = "Instrument" + InteractiveResource = "InteractiveResource" + Journal = "Journal" + JournalArticle = "JournalArticle" + Model = "Model" + OutputManagementPlan = "OutputManagementPlan" + PeerReview = "PeerReview" + PhysicalObject = "PhysicalObject" + Preprint = "Preprint" + Project = "Project" + Report = "Report" + Service = "Service" + Software = "Software" + Sound = "Sound" + Standard = "Standard" + StudyRegistration = "StudyRegistration" + Text = "Text" + Workflow = "Workflow" + Other = "Other" + + +class DescriptionType(StrEnum): + Abstract = "Abstract" + Methods = "Methods" + SeriesInformation = "SeriesInformation" + TableOfContents = "TableOfContents" + TechnicalInfo = "TechnicalInfo" + Other = "Other" + + +class RelationType(StrEnum): + IsCitedBy = "IsCitedBy" + Cites = "Cites" + IsCollectedBy = "IsCollectedBy" + Collects = "Collects" + IsSupplementTo = "IsSupplementTo" + IsSupplementedBy = "IsSupplementedBy" + IsContinuedBy = "IsContinuedBy" + Continues = "Continues" + IsDescribedBy = "IsDescribedBy" + Describes = "Describes" + HasMetadata = "HasMetadata" + IsMetadataFor = "IsMetadataFor" + HasVersion = "HasVersion" + IsVersionOf = "IsVersionOf" + IsNewVersionOf = "IsNewVersionOf" + IsPartOf = "IsPartOf" + IsPreviousVersionOf = "IsPreviousVersionOf" + IsPublishedIn = "IsPublishedIn" + HasPart = "HasPart" + IsReferencedBy = "IsReferencedBy" + References = "References" + IsDocumentedBy = "IsDocumentedBy" + Documents = "Documents" + IsCompiledBy = "IsCompiledBy" + Compiles = "Compiles" + IsVariantFormOf = "IsVariantFormOf" + IsOriginalFormOf = "IsOriginalFormOf" + IsIdenticalTo = "IsIdenticalTo" + IsReviewedBy = "IsReviewedBy" + Reviews = "Reviews" + IsDerivedFrom = "IsDerivedFrom" + IsSourceOf = "IsSourceOf" + IsRequiredBy = "IsRequiredBy" + Requires = "Requires" + IsObsoletedBy = "IsObsoletedBy" + Obsoletes = "Obsoletes" + HasTranslation = "HasTranslation" + IsTranslationOf = "IsTranslationOf" + + +class RelatedIdentifierType(StrEnum): + ARK = "ARK" + arXiv = "arXiv" + bibcode = "bibcode" + CSTR = "CSTR" + DOI = "DOI" + EAN13 = "EAN13" + EISSN = "EISSN" + Handle = "Handle" + IGSN = "IGSN" + ISBN = "ISBN" + ISSN = "ISSN" + ISTC = "ISTC" + LISSN = "LISSN" + LSID = "LSID" + PMID = "PMID" + PURL = "PURL" + RRID = "RRID" + UPC = "UPC" + URL = "URL" + URN = "URN" + w3id = "w3id" + + +class FunderIdentifierType(StrEnum): + ISNI = "ISNI" + GRID = "GRID" + CrossrefFunderID = "Crossref Funder ID" + ROR = "ROR" + Other = "Other" + + +class NumberType(StrEnum): + Article = "Article" + Chapter = "Chapter" + Report = "Report" + Other = "Other" + + +Longitude = Annotated[float, Field(ge=-180, le=180)] + +Latitude = Annotated[float, Field(ge=-90, le=90)] diff --git a/metadata/fairspec_metadata/models/datacite/content_type.py b/metadata/fairspec_metadata/models/datacite/content_type.py new file mode 100644 index 0000000..1bdfd69 --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/content_type.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from pydantic import Field + +from ..base import FairspecModel + +from .common import ContentTypeGeneral + + +class ContentTypes(FairspecModel): + resourceType: str | None = Field( + default=None, + description="A description of the resource (free text)", + ) + resourceTypeGeneral: ContentTypeGeneral = Field( + description="The general type of the resource (e.g., Dataset, Software, Collection, Image, etc.)" + ) diff --git a/metadata/fairspec_metadata/models/datacite/contributor.py b/metadata/fairspec_metadata/models/datacite/contributor.py new file mode 100644 index 0000000..b533fdc --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/contributor.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from typing import Annotated + +from pydantic import Field + +from .common import ContributorType +from .creator import Creator + + +class Contributor(Creator): + contributorType: ContributorType = Field( + description="The type of contributor (e.g., ContactPerson, DataCollector, Editor, etc.)" + ) + + +Contributors = Annotated[ + list[Contributor], + Field( + description="The institution or person responsible for collecting, managing, distributing, or otherwise contributing to the development of the resource" + ), +] diff --git a/metadata/fairspec_metadata/models/datacite/creator.py b/metadata/fairspec_metadata/models/datacite/creator.py new file mode 100644 index 0000000..45ed5e6 --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/creator.py @@ -0,0 +1,79 @@ +from __future__ import annotations + +from typing import Annotated + +from pydantic import Field + +from ..base import FairspecModel + +from .common import CreatorNameType + + +class CreatorNameIdentifier(FairspecModel): + nameIdentifier: str = Field( + description="Uniquely identifies an individual or legal entity, according to various schemas" + ) + nameIdentifierScheme: str = Field( + description="The name of the name identifier scheme (e.g., ORCID, ISNI, ROR)" + ) + schemeUri: str | None = Field( + default=None, + description="The URI of the name identifier scheme", + ) + + +class CreatorAffiliation(FairspecModel): + name: str = Field( + description="The organizational or institutional affiliation of the creator" + ) + affiliationIdentifier: str | None = Field( + default=None, + description="Uniquely identifies the organizational affiliation of the creator", + ) + affiliationIdentifierScheme: str | None = Field( + default=None, + description="The name of the affiliation identifier scheme", + ) + schemeUri: str | None = Field( + default=None, + description="The URI of the affiliation identifier scheme", + ) + + +class Creator(FairspecModel): + name: str = Field( + description="The main researchers involved in producing the data, or the authors of the publication in priority order" + ) + nameType: CreatorNameType | None = Field( + default=None, + description="The type of name (Organizational or Personal)", + ) + givenName: str | None = Field( + default=None, + description="The personal or first name of the creator", + ) + familyName: str | None = Field( + default=None, + description="The surname or last name of the creator", + ) + nameIdentifiers: list[CreatorNameIdentifier] | None = Field( + default=None, + description="Uniquely identifies an individual or legal entity, according to various schemas", + ) + affiliation: list[CreatorAffiliation] | None = Field( + default=None, + description="The organizational or institutional affiliation of the creator", + ) + lang: str | None = Field( + default=None, + description="Language of the name, specified using ISO 639-1 or ISO 639-3 codes", + ) + + +Creators = Annotated[ + list[Creator], + Field( + min_length=1, + description="The main researchers involved in producing the data, or the authors of the publication in priority order", + ), +] diff --git a/metadata/fairspec_metadata/models/datacite/datacite.py b/metadata/fairspec_metadata/models/datacite/datacite.py new file mode 100644 index 0000000..d492aa2 --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/datacite.py @@ -0,0 +1,117 @@ +from __future__ import annotations + +from pydantic import Field + +from ..base import FairspecModel + +from .alternate_identifier import AlternateIdentifiers +from .content_type import ContentTypes +from .contributor import Contributors +from .creator import Creators +from .date import Dates +from .description import Descriptions +from .formats import Formats +from .funding_reference import FundingReferences +from .geo_location import GeoLocations +from .identifier import Doi, DoiPrefix, DoiSuffix +from .language import Language +from .publication_year import PublicationYear +from .publisher import Publisher +from .related_identifier import RelatedIdentifiers +from .related_item import RelatedItems +from .rights import RightsList +from .size import Sizes +from .subject import Subjects +from .title import Titles +from .version import Version + + +class Datacite(FairspecModel): + doi: Doi | None = Field( + default=None, + description="The Digital Object Identifier (DOI) for the resource", + ) + prefix: DoiPrefix | None = Field( + default=None, + description="The DOI prefix for the resource", + ) + suffix: DoiSuffix | None = Field( + default=None, + description="The DOI suffix for the resource", + ) + creators: Creators | None = Field( + default=None, + description="The main researchers involved in producing the data, or the authors of the publication", + ) + titles: Titles | None = Field( + default=None, + description="A name or title by which a resource is known", + ) + publisher: Publisher | None = Field( + default=None, + description="The entity that holds, archives, publishes, prints, distributes, releases, issues, or produces the resource", + ) + publicationYear: PublicationYear | None = Field( + default=None, + description="The year when the data was or will be made publicly available", + ) + subjects: Subjects | None = Field( + default=None, + description="Subject, keywords, classification codes, or key phrases describing the resource", + ) + contributors: Contributors | None = Field( + default=None, + description="The institution or person responsible for collecting, managing, distributing, or otherwise contributing to the development of the resource", + ) + dates: Dates | None = Field( + default=None, + description="Different dates relevant to the work", + ) + language: Language | None = Field( + default=None, + description="The primary language of the resource", + ) + types: ContentTypes | None = Field( + default=None, + description="The type of the resource", + ) + alternateIdentifiers: AlternateIdentifiers | None = Field( + default=None, + description="An identifier or identifiers other than the primary Identifier applied to the resource", + ) + relatedIdentifiers: RelatedIdentifiers | None = Field( + default=None, + description="Identifiers of related resources", + ) + sizes: Sizes | None = Field( + default=None, + description="Size information about the resource", + ) + formats: Formats | None = Field( + default=None, + description="Technical format of the resource", + ) + version: Version | None = Field( + default=None, + description="The version number of the resource", + ) + rightsList: RightsList | None = Field( + default=None, + description="Rights information for this resource", + ) + descriptions: Descriptions | None = Field( + default=None, + description="All additional information that does not fit in any of the other categories", + ) + geoLocations: GeoLocations | None = Field( + default=None, + description="Spatial region or named place where the data was gathered or about which the data is focused", + ) + fundingReferences: FundingReferences | None = Field( + default=None, + description="Information about financial support (funding) for the resource", + ) + relatedItems: RelatedItems | None = Field( + default=None, + description="Information about resources related to the one being registered", + ) diff --git a/metadata/fairspec_metadata/models/datacite/date.py b/metadata/fairspec_metadata/models/datacite/date.py new file mode 100644 index 0000000..5183358 --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/date.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from typing import Annotated + +from pydantic import Field + +from ..base import FairspecModel + +from .common import DateType + +DateValue = Annotated[ + str, + Field( + description="Different date formats are supported: YYYY, YYYY-MM, YYYY-MM-DD, YYYY-MM-DDThh:mm:ss, YYYY-MM-DDThh:mm:ssTZD, or any of these formats with ranges separated by /" + ), +] + + +class DataciteDate(FairspecModel): + date: DateValue = Field( + description="The date associated with an event in the lifecycle of the resource" + ) + dateType: DateType = Field( + description="The type of date (e.g., Accepted, Available, Created, Issued, Submitted, Updated, etc.)" + ) + dateInformation: str | None = Field( + default=None, + description="Additional information about the date", + ) + + +Dates = Annotated[ + list[DataciteDate], + Field(description="Different dates relevant to the work"), +] diff --git a/metadata/fairspec_metadata/models/datacite/description.py b/metadata/fairspec_metadata/models/datacite/description.py new file mode 100644 index 0000000..87b92aa --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/description.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import Annotated + +from pydantic import Field + +from ..base import FairspecModel + +from .common import DescriptionType + + +class DataciteDescription(FairspecModel): + description: str = Field( + description="All additional information that does not fit in any of the other categories" + ) + descriptionType: DescriptionType = Field( + description="The type of the Description (e.g., Abstract, Methods, TechnicalInfo, etc.)" + ) + lang: str | None = Field( + default=None, + description="Language of the description, specified using ISO 639-1 or ISO 639-3 codes", + ) + + +Descriptions = Annotated[ + list[DataciteDescription], + Field( + description="All additional information that does not fit in any of the other categories" + ), +] diff --git a/metadata/fairspec_metadata/models/datacite/formats.py b/metadata/fairspec_metadata/models/datacite/formats.py new file mode 100644 index 0000000..724a48d --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/formats.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from typing import Annotated + +from pydantic import Field + +Formats = Annotated[ + list[str], + Field( + description="Technical format of the resource (e.g., file format, physical medium, or dimensions of the resource)" + ), +] diff --git a/metadata/fairspec_metadata/models/datacite/funding_reference.py b/metadata/fairspec_metadata/models/datacite/funding_reference.py new file mode 100644 index 0000000..37d8998 --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/funding_reference.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from typing import Annotated + +from pydantic import Field + +from ..base import FairspecModel + +from .common import FunderIdentifierType + + +class FundingReference(FairspecModel): + funderName: str = Field(description="Name of the funding provider") + funderIdentifier: str | None = Field( + default=None, + description="Uniquely identifies a funding entity, according to various identifier schemes", + ) + funderIdentifierType: FunderIdentifierType | None = Field( + default=None, + description="The type of the funderIdentifier (e.g., ISNI, GRID, Crossref Funder ID, ROR, Other)", + ) + awardNumber: str | None = Field( + default=None, + description="The code assigned by the funder to a sponsored award (grant)", + ) + awardUri: str | None = Field( + default=None, + description="The URI leading to a page provided by the funder for more information about the award (grant)", + ) + awardTitle: str | None = Field( + default=None, + description="The human readable title of the award (grant)", + ) + + +FundingReferences = Annotated[ + list[FundingReference], + Field( + description="Information about financial support (funding) for the resource being registered" + ), +] diff --git a/metadata/fairspec_metadata/models/datacite/geo_location.py b/metadata/fairspec_metadata/models/datacite/geo_location.py new file mode 100644 index 0000000..d8b926d --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/geo_location.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +from typing import Annotated + +from pydantic import Field + +from ..base import FairspecModel + +from .common import Latitude, Longitude + + +class GeoLocationPoint(FairspecModel): + pointLongitude: Longitude = Field(description="Longitudinal dimension of point") + pointLatitude: Latitude = Field(description="Latitudinal dimension of point") + + +class GeoLocationBox(FairspecModel): + westBoundLongitude: Longitude = Field( + description="Western longitudinal dimension of box" + ) + eastBoundLongitude: Longitude = Field( + description="Eastern longitudinal dimension of box" + ) + southBoundLatitude: Latitude = Field( + description="Southern latitudinal dimension of box" + ) + northBoundLatitude: Latitude = Field( + description="Northern latitudinal dimension of box" + ) + + +class GeoLocationPolygonItem(FairspecModel): + polygonPoint: GeoLocationPoint | None = Field( + default=None, + description="A point location in a polygon", + ) + inPolygonPoint: GeoLocationPoint | None = Field( + default=None, + description="For any bound area that is larger than half the earth, define a (random) point inside", + ) + + +class GeoLocation(FairspecModel): + geoLocationPlace: str | None = Field( + default=None, + description="Spatial region or named place where the data was gathered or about which the data is focused", + ) + geoLocationPoint: GeoLocationPoint | None = Field( + default=None, + description="A point location in space", + ) + geoLocationBox: GeoLocationBox | None = Field( + default=None, + description="The spatial limits of a box", + ) + geoLocationPolygon: list[GeoLocationPolygonItem] | None = Field( + default=None, + description="A drawn polygon area, defined by a set of points and lines connecting the points in a closed chain", + ) + + +GeoLocations = Annotated[ + list[GeoLocation], + Field( + description="Spatial region or named place where the data was gathered or about which the data is focused" + ), +] diff --git a/metadata/fairspec_metadata/models/datacite/identifier.py b/metadata/fairspec_metadata/models/datacite/identifier.py new file mode 100644 index 0000000..d4a73e3 --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/identifier.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from typing import Annotated + +from pydantic import Field + +Doi = Annotated[ + str, + Field( + pattern=r"^10[.][0-9]{4,9}[/][^\s]+$", + description="The Digital Object Identifier (DOI) is a persistent identifier for the resource, following the DOI syntax", + ), +] + +DoiPrefix = Annotated[ + str, + Field( + pattern=r"^10[.][0-9]{4,9}$", + description="The DOI prefix, which is the part of the DOI before the slash. It uniquely identifies the registrant", + ), +] + +DoiSuffix = Annotated[ + str, + Field( + pattern=r"^[^\s]+$", + description="The DOI suffix, which is the part of the DOI after the slash. It is assigned by the registrant", + ), +] diff --git a/metadata/fairspec_metadata/models/datacite/language.py b/metadata/fairspec_metadata/models/datacite/language.py new file mode 100644 index 0000000..75f93a6 --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/language.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from typing import Annotated + +from pydantic import Field + +Language = Annotated[ + str, + Field( + description="The primary language of the resource. Allowed values are taken from IETF BCP 47, ISO 639-1 language code" + ), +] diff --git a/metadata/fairspec_metadata/models/datacite/publication_year.py b/metadata/fairspec_metadata/models/datacite/publication_year.py new file mode 100644 index 0000000..c635b69 --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/publication_year.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from typing import Annotated + +from pydantic import Field + +PublicationYear = Annotated[ + str, + Field( + pattern=r"^[0-9]{4}$", + description="The year when the data was or will be made publicly available in YYYY format", + ), +] diff --git a/metadata/fairspec_metadata/models/datacite/publisher.py b/metadata/fairspec_metadata/models/datacite/publisher.py new file mode 100644 index 0000000..7b1d0a5 --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/publisher.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +from pydantic import Field + +from ..base import FairspecModel + + +class Publisher(FairspecModel): + name: str = Field( + description="The name of the entity that holds, archives, publishes, prints, distributes, releases, issues, or produces the resource" + ) + publisherIdentifier: str | None = Field( + default=None, + description="Uniquely identifies the publisher, according to various identifier schemes", + ) + publisherIdentifierScheme: str | None = Field( + default=None, + description="The name of the publisher identifier scheme (e.g., ISNI, ROR, Crossref Funder ID)", + ) + schemeUri: str | None = Field( + default=None, + description="The URI of the publisher identifier scheme", + ) + lang: str | None = Field( + default=None, + description="Language of the publisher name, specified using ISO 639-1 or ISO 639-3 codes", + ) diff --git a/metadata/fairspec_metadata/models/datacite/related_identifier.py b/metadata/fairspec_metadata/models/datacite/related_identifier.py new file mode 100644 index 0000000..ee12524 --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/related_identifier.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +from typing import Annotated, Self + +from pydantic import Field, model_validator + +from ..base import FairspecModel + +from .common import ContentTypeGeneral, RelatedIdentifierType, RelationType + + +class RelatedObject(FairspecModel): + relationType: RelationType = Field( + description="Description of the relationship of the resource being registered and the related resource" + ) + relatedMetadataScheme: str | None = Field( + default=None, + description="The name of the scheme (only for HasMetadata/IsMetadataFor relations)", + ) + schemeUri: str | None = Field( + default=None, + description="The URI of the relatedMetadataScheme (only for HasMetadata/IsMetadataFor relations)", + ) + schemeType: str | None = Field( + default=None, + description="The type of the relatedMetadataScheme (only for HasMetadata/IsMetadataFor relations)", + ) + resourceTypeGeneral: ContentTypeGeneral | None = Field( + default=None, + description="The general type of the related resource", + ) + + +class RelatedIdentifier(RelatedObject): + relatedIdentifier: str = Field(description="Identifiers of related resources") + relatedIdentifierType: RelatedIdentifierType = Field( + description="The type of the RelatedIdentifier (e.g., DOI, Handle, URL, etc.)" + ) + + @model_validator(mode="after") + def validate_metadata_fields(self) -> Self: + has_metadata_relation = self.relationType in ( + "HasMetadata", + "IsMetadataFor", + ) + if not has_metadata_relation: + if ( + self.relatedMetadataScheme is not None + or self.schemeUri is not None + or self.schemeType is not None + ): + raise ValueError( + "relatedMetadataScheme, schemeUri, and schemeType are only allowed for HasMetadata/IsMetadataFor relations" + ) + return self + + +RelatedIdentifiers = Annotated[ + list[RelatedIdentifier], + Field(description="Identifiers of related resources"), +] diff --git a/metadata/fairspec_metadata/models/datacite/related_item.py b/metadata/fairspec_metadata/models/datacite/related_item.py new file mode 100644 index 0000000..6083103 --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/related_item.py @@ -0,0 +1,96 @@ +from __future__ import annotations + +from typing import Annotated, Self + +from pydantic import Field, model_validator + +from ..base import FairspecModel + +from .common import ( + ContentTypeGeneral, + NumberType, + RelatedIdentifierType, +) +from .contributor import Contributors +from .creator import Creators +from .publication_year import PublicationYear +from .related_identifier import RelatedObject +from .title import Titles + + +class RelatedItemIdentifier(FairspecModel): + relatedItemIdentifier: str = Field(description="Identifier for the related item") + relatedItemIdentifierType: RelatedIdentifierType = Field( + description="The type of the RelatedItemIdentifier" + ) + + +class RelatedItem(RelatedObject): + relatedItemIdentifier: RelatedItemIdentifier | None = Field( + default=None, + description="Identifiers of related items", + ) + relatedItemType: ContentTypeGeneral = Field( + description="The general type of the related item" + ) + creators: Creators | None = None + contributors: Contributors | None = None + titles: Titles = Field(description="The title(s) of the related item") + publicationYear: PublicationYear | None = None + volume: str | None = Field( + default=None, + description="Volume of the related item", + ) + issue: str | None = Field( + default=None, + description="Issue of the related item", + ) + firstPage: str | None = Field( + default=None, + description="First page of the related item", + ) + lastPage: str | None = Field( + default=None, + description="Last page of the related item", + ) + edition: str | None = Field( + default=None, + description="Edition of the related item", + ) + publisher: str | None = Field( + default=None, + description="Publisher of the related item", + ) + number: str | None = Field( + default=None, + description="Number of the related item (e.g., report number, article number)", + ) + numberType: NumberType | None = Field( + default=None, + description="The type of the number", + ) + + @model_validator(mode="after") + def validate_metadata_fields(self) -> Self: + has_metadata_relation = self.relationType in ( + "HasMetadata", + "IsMetadataFor", + ) + if not has_metadata_relation: + if ( + self.relatedMetadataScheme is not None + or self.schemeUri is not None + or self.schemeType is not None + ): + raise ValueError( + "relatedMetadataScheme, schemeUri, and schemeType are only allowed for HasMetadata/IsMetadataFor relations" + ) + return self + + +RelatedItems = Annotated[ + list[RelatedItem], + Field( + description="Information about a resource related to the one being registered" + ), +] diff --git a/metadata/fairspec_metadata/models/datacite/rights.py b/metadata/fairspec_metadata/models/datacite/rights.py new file mode 100644 index 0000000..7644897 --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/rights.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from typing import Annotated + +from pydantic import Field + +from ..base import FairspecModel + + +class Rights(FairspecModel): + rights: str | None = Field( + default=None, + description="Any rights information for this resource", + ) + rightsUri: str | None = Field( + default=None, + description="The URI of the license", + ) + rightsIdentifier: str | None = Field( + default=None, + description="A short, standardized version of the license name", + ) + rightsIdentifierScheme: str | None = Field( + default=None, + description="The name of the scheme (e.g., SPDX)", + ) + schemeUri: str | None = Field( + default=None, + description="The URI of the rightsIdentifierScheme", + ) + lang: str | None = Field( + default=None, + description="Language of the rights statement, specified using ISO 639-1 or ISO 639-3 codes", + ) + + +RightsList = Annotated[ + list[Rights], + Field(description="Any rights information for this resource"), +] diff --git a/metadata/fairspec_metadata/models/datacite/size.py b/metadata/fairspec_metadata/models/datacite/size.py new file mode 100644 index 0000000..5fd46bd --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/size.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from typing import Annotated + +from pydantic import Field + +Sizes = Annotated[ + list[str], + Field( + description="Unstructured size information about the resource (e.g., '15 pages', '6 MB')" + ), +] diff --git a/metadata/fairspec_metadata/models/datacite/subject.py b/metadata/fairspec_metadata/models/datacite/subject.py new file mode 100644 index 0000000..f8db982 --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/subject.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from typing import Annotated + +from pydantic import Field + +from ..base import FairspecModel + + +class Subject(FairspecModel): + subject: str = Field( + description="Subject, keyword, classification code, or key phrase describing the resource" + ) + subjectScheme: str | None = Field( + default=None, + description="The name of the subject scheme or classification code or authority if one is used", + ) + schemeUri: str | None = Field( + default=None, + description="The URI of the subject identifier scheme", + ) + valueUri: str | None = Field( + default=None, + description="The URI of the subject term", + ) + classificationCode: str | None = Field( + default=None, + description="The classification code used for the subject term in the subject scheme", + ) + lang: str | None = Field( + default=None, + description="Language of the subject, specified using ISO 639-1 or ISO 639-3 codes", + ) + + +Subjects = Annotated[ + list[Subject], + Field( + description="Subject, keyword, classification code, or key phrase describing the resource" + ), +] diff --git a/metadata/fairspec_metadata/models/datacite/title.py b/metadata/fairspec_metadata/models/datacite/title.py new file mode 100644 index 0000000..3fefceb --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/title.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import Annotated + +from pydantic import Field + +from ..base import FairspecModel + +from .common import TitleType + + +class Title(FairspecModel): + title: str = Field(description="A name or title by which a resource is known") + titleType: TitleType | None = Field( + default=None, + description="The type of title (e.g., AlternativeTitle, Subtitle, TranslatedTitle, Other)", + ) + lang: str | None = Field( + default=None, + description="Language of the title, specified using ISO 639-1 or ISO 639-3 codes", + ) + + +Titles = Annotated[ + list[Title], + Field( + min_length=1, + description="A name or title by which a resource is known", + ), +] diff --git a/metadata/fairspec_metadata/models/datacite/version.py b/metadata/fairspec_metadata/models/datacite/version.py new file mode 100644 index 0000000..3dbbd57 --- /dev/null +++ b/metadata/fairspec_metadata/models/datacite/version.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from typing import Annotated + +from pydantic import Field + +Version = Annotated[ + str, + Field( + description="The version number of the resource. Suggested practice: track major_version.minor_version" + ), +] diff --git a/metadata/fairspec_metadata/models/dataset.py b/metadata/fairspec_metadata/models/dataset.py new file mode 100644 index 0000000..ff3a239 --- /dev/null +++ b/metadata/fairspec_metadata/models/dataset.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +from pydantic import ConfigDict, Field + +from .base import FairspecModel + +from .datacite.datacite import Datacite +from .resource import Resource + + +class Dataset(Datacite): + model_config = ConfigDict(populate_by_name=True) + + profile: str | None = Field( + default=None, + alias="$schema", + description="Fairspec Dataset profile url.", + ) + resources: list[Resource] | None = Field( + default=None, + description="A list of resources. Each item must be a Resource object describing data files or inline data.", + ) + + +class RenderDatasetOptions(FairspecModel): + format: str + + +class ConvertDatasetToOptions(FairspecModel): + format: str + + +class ConvertDatasetFromOptions(FairspecModel): + format: str diff --git a/metadata/fairspec_metadata/models/descriptor.py b/metadata/fairspec_metadata/models/descriptor.py new file mode 100644 index 0000000..ee281a8 --- /dev/null +++ b/metadata/fairspec_metadata/models/descriptor.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from typing import Any + +Descriptor = dict[str, Any] diff --git a/metadata/fairspec_metadata/models/error/base.py b/metadata/fairspec_metadata/models/error/base.py new file mode 100644 index 0000000..7fa44e1 --- /dev/null +++ b/metadata/fairspec_metadata/models/error/base.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from pydantic import Field + +from ..base import FairspecModel + + +class BaseError(FairspecModel): + type: str = Field(description="Error type identifier") + resourceName: str | None = Field( + default=None, + description="Name of the resource if available", + ) diff --git a/metadata/fairspec_metadata/models/error/cell.py b/metadata/fairspec_metadata/models/error/cell.py new file mode 100644 index 0000000..0e8b28d --- /dev/null +++ b/metadata/fairspec_metadata/models/error/cell.py @@ -0,0 +1,118 @@ +from __future__ import annotations + +from typing import Annotated, Literal, Union + +from pydantic import Field + +from fairspec_metadata.models.column.column import ColumnType +from .base import BaseError + + +class BaseCellError(BaseError): + columnName: str = Field(description="The name of the column") + rowNumber: float = Field(description="The row number where the error occurred") + cell: str = Field(description="The cell value that caused the error") + + +class CellTypeError(BaseCellError): + type: Literal["cell/type"] = Field(description="Error type identifier") + columnType: ColumnType = Field(description="The expected column type") + + +class CellMissingError(BaseCellError): + type: Literal["cell/missing"] = Field(description="Error type identifier") + + +class CellMinimumError(BaseCellError): + type: Literal["cell/minimum"] = Field(description="Error type identifier") + minimum: str = Field(description="The minimum value allowed") + + +class CellMaximumError(BaseCellError): + type: Literal["cell/maximum"] = Field(description="Error type identifier") + maximum: str = Field(description="The maximum value allowed") + + +class CellExclusiveMinimumError(BaseCellError): + type: Literal["cell/exclusiveMinimum"] = Field(description="Error type identifier") + minimum: str = Field(description="The exclusive minimum value") + + +class CellExclusiveMaximumError(BaseCellError): + type: Literal["cell/exclusiveMaximum"] = Field(description="Error type identifier") + maximum: str = Field(description="The exclusive maximum value") + + +class CellMultipleOfError(BaseCellError): + type: Literal["cell/multipleOf"] = Field(description="Error type identifier") + multipleOf: float = Field(description="The multiple of constraint") + + +class CellMinLengthError(BaseCellError): + type: Literal["cell/minLength"] = Field(description="Error type identifier") + minLength: float = Field(description="The minimum length required") + + +class CellMaxLengthError(BaseCellError): + type: Literal["cell/maxLength"] = Field(description="Error type identifier") + maxLength: float = Field(description="The maximum length allowed") + + +class CellPatternError(BaseCellError): + type: Literal["cell/pattern"] = Field(description="Error type identifier") + pattern: str = Field(description="The pattern that must be matched") + + +class CellUniqueError(BaseCellError): + type: Literal["cell/unique"] = Field(description="Error type identifier") + + +class CellConstError(BaseCellError): + type: Literal["cell/const"] = Field(description="Error type identifier") + const: str = Field(description="The allowed value") + + +class CellEnumError(BaseCellError): + type: Literal["cell/enum"] = Field(description="Error type identifier") + enum: list[str] = Field(description="The allowed enumeration values") + + +class CellJsonError(BaseCellError): + type: Literal["cell/json"] = Field(description="Error type identifier") + message: str = Field(description="The JSON schema validation error message") + jsonPointer: str = Field( + description="JSON Pointer to the validation error location" + ) + + +class CellMinItemsError(BaseCellError): + type: Literal["cell/minItems"] = Field(description="Error type identifier") + minItems: float = Field(description="The minimum number of items required") + + +class CellMaxItemsError(BaseCellError): + type: Literal["cell/maxItems"] = Field(description="Error type identifier") + maxItems: float = Field(description="The maximum number of items allowed") + + +CellError = Annotated[ + Union[ + CellTypeError, + CellMissingError, + CellMinimumError, + CellMaximumError, + CellExclusiveMinimumError, + CellExclusiveMaximumError, + CellMultipleOfError, + CellMinLengthError, + CellMaxLengthError, + CellMinItemsError, + CellMaxItemsError, + CellPatternError, + CellUniqueError, + CellConstError, + CellEnumError, + CellJsonError, + ], + Field(discriminator="type"), +] diff --git a/metadata/fairspec_metadata/models/error/column.py b/metadata/fairspec_metadata/models/error/column.py new file mode 100644 index 0000000..57bd2c0 --- /dev/null +++ b/metadata/fairspec_metadata/models/error/column.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from typing import Annotated, Literal, Union + +from pydantic import Field + +from fairspec_metadata.models.column.column import ColumnType +from .base import BaseError + + +class ColumnMissingError(BaseError): + type: Literal["column/missing"] = Field(description="Error type identifier") + columnName: str = Field(description="Names of missing column") + + +class ColumnTypeError(BaseError): + type: Literal["column/type"] = Field(description="Error type identifier") + columnName: str = Field(description="The name of the column") + expectedColumnType: ColumnType = Field( + description="The column type that was expected" + ) + actualColumnType: ColumnType = Field(description="The actual column type found") + + +ColumnError = Annotated[ + Union[ColumnMissingError, ColumnTypeError], + Field(discriminator="type"), +] diff --git a/metadata/fairspec_metadata/models/error/data.py b/metadata/fairspec_metadata/models/error/data.py new file mode 100644 index 0000000..d73f751 --- /dev/null +++ b/metadata/fairspec_metadata/models/error/data.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from typing import Literal + +from pydantic import Field + +from .base import BaseError + + +class DataError(BaseError): + type: Literal["data"] = Field(description="Error type identifier") + message: str = Field(description="The JSON parsing error message") + jsonPointer: str = Field(description="JSON Pointer to the location of the error") diff --git a/metadata/fairspec_metadata/models/error/error.py b/metadata/fairspec_metadata/models/error/error.py new file mode 100644 index 0000000..0910d06 --- /dev/null +++ b/metadata/fairspec_metadata/models/error/error.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from typing import Union + +from .data import DataError +from .file import FileError +from .metadata import MetadataError +from .resource import ResourceError +from .table import TableError + +FairspecError = Union[MetadataError, ResourceError, TableError, DataError, FileError] diff --git a/metadata/fairspec_metadata/models/error/file.py b/metadata/fairspec_metadata/models/error/file.py new file mode 100644 index 0000000..2441ef2 --- /dev/null +++ b/metadata/fairspec_metadata/models/error/file.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from typing import Annotated, Literal, Union + +from pydantic import Field + +from .base import BaseError + + +class TextualError(BaseError): + type: Literal["file/textual"] = Field(description="Error type identifier") + actualEncoding: str | None = Field( + default=None, + description="The actual encoding format found", + ) + + +class IntegrityError(BaseError): + type: Literal["file/integrity"] = Field(description="Error type identifier") + hashType: str = Field(description="The type of hash algorithm used") + expectedHash: str = Field(description="The expected hash value") + actualHash: str = Field(description="The actual hash value found") + + +FileError = Annotated[ + Union[TextualError, IntegrityError], + Field(discriminator="type"), +] diff --git a/metadata/fairspec_metadata/models/error/foreign_key.py b/metadata/fairspec_metadata/models/error/foreign_key.py new file mode 100644 index 0000000..c1c4e58 --- /dev/null +++ b/metadata/fairspec_metadata/models/error/foreign_key.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +from typing import Literal + +from pydantic import Field + +from fairspec_metadata.models.foreign_key import ForeignKey +from .base import BaseError + + +class ForeignKeyError(BaseError): + type: Literal["foreignKey"] = Field(description="Error type identifier") + foreignKey: ForeignKey = Field( + description="The foreign key constraint that was violated" + ) + cells: list[str] = Field( + description="The cells that violate the foreign key constraint" + ) diff --git a/metadata/fairspec_metadata/models/error/metadata.py b/metadata/fairspec_metadata/models/error/metadata.py new file mode 100644 index 0000000..807bc66 --- /dev/null +++ b/metadata/fairspec_metadata/models/error/metadata.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from typing import Literal + +from pydantic import Field + +from .base import BaseError + + +class MetadataError(BaseError): + type: Literal["metadata"] = Field(description="Error type identifier") + message: str = Field(description="The JSON parsing error message") + jsonPointer: str = Field(description="JSON Pointer to the location of the error") diff --git a/metadata/fairspec_metadata/models/error/resource.py b/metadata/fairspec_metadata/models/error/resource.py new file mode 100644 index 0000000..0f0e99d --- /dev/null +++ b/metadata/fairspec_metadata/models/error/resource.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from typing import Annotated, Literal, Union + +from pydantic import Field + +from .base import BaseError + + +class ResourceMissingError(BaseError): + type: Literal["resource/missing"] = Field(description="Error type identifier") + referencingResourceName: str = Field( + description="The name of the referencing resource" + ) + + +class ResourceTypeError(BaseError): + type: Literal["resource/type"] = Field(description="Error type identifier") + expectedResourceType: Literal["data", "table"] = Field( + description="The expected data type" + ) + referencingResourceName: str | None = Field( + default=None, + description="The name of the referencing resource", + ) + + +ResourceError = Annotated[ + Union[ResourceMissingError, ResourceTypeError], + Field(discriminator="type"), +] diff --git a/metadata/fairspec_metadata/models/error/row.py b/metadata/fairspec_metadata/models/error/row.py new file mode 100644 index 0000000..7aaa9dd --- /dev/null +++ b/metadata/fairspec_metadata/models/error/row.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from typing import Annotated, Literal, Union + +from pydantic import Field + +from .base import BaseError + + +class RowPrimaryKeyError(BaseError): + type: Literal["row/primaryKey"] = Field(description="Error type identifier") + rowNumber: float = Field(description="The row number where the error occurred") + columnNames: list[str] = Field( + description="Column names involved in the primary key constraint violation" + ) + + +class RowUniqueKeyError(BaseError): + type: Literal["row/uniqueKey"] = Field(description="Error type identifier") + rowNumber: float = Field(description="The row number where the error occurred") + columnNames: list[str] = Field( + description="Column names involved in the unique key constraint violation" + ) + + +RowError = Annotated[ + Union[RowPrimaryKeyError, RowUniqueKeyError], + Field(discriminator="type"), +] diff --git a/metadata/fairspec_metadata/models/error/table.py b/metadata/fairspec_metadata/models/error/table.py new file mode 100644 index 0000000..189e0dd --- /dev/null +++ b/metadata/fairspec_metadata/models/error/table.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +from typing import Union + +from .cell import CellError +from .column import ColumnError +from .foreign_key import ForeignKeyError +from .row import RowError + +TableError = Union[ColumnError, RowError, CellError, ForeignKeyError] diff --git a/metadata/fairspec_metadata/models/exception.py b/metadata/fairspec_metadata/models/exception.py new file mode 100644 index 0000000..53219e8 --- /dev/null +++ b/metadata/fairspec_metadata/models/exception.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .report import Report + + +class FairspecException(Exception): + report: Report | None + + def __init__(self, message: str, *, report: Report | None = None) -> None: + super().__init__(message) + self.report = report diff --git a/metadata/fairspec_metadata/models/file_dialect/arrow.py b/metadata/fairspec_metadata/models/file_dialect/arrow.py new file mode 100644 index 0000000..c20c8fc --- /dev/null +++ b/metadata/fairspec_metadata/models/file_dialect/arrow.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from typing import Literal + +from .base import BaseFileDialect + + +class ArrowFileDialect(BaseFileDialect): + format: Literal["arrow"] = "arrow" diff --git a/metadata/fairspec_metadata/models/file_dialect/base.py b/metadata/fairspec_metadata/models/file_dialect/base.py new file mode 100644 index 0000000..c064ce7 --- /dev/null +++ b/metadata/fairspec_metadata/models/file_dialect/base.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from pydantic import ConfigDict, Field + +from ..base import FairspecModel + + +class BaseFileDialect(FairspecModel): + model_config = ConfigDict(populate_by_name=True) + + profile: str | None = Field( + default=None, + alias="$schema", + description="Fairspec Dialect profile url.", + ) + title: str | None = Field( + default=None, + description="An optional human-readable title of the format", + ) + description: str | None = Field( + default=None, + description="An optional detailed description of the format", + ) diff --git a/metadata/fairspec_metadata/models/file_dialect/common.py b/metadata/fairspec_metadata/models/file_dialect/common.py new file mode 100644 index 0000000..a08c729 --- /dev/null +++ b/metadata/fairspec_metadata/models/file_dialect/common.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from enum import StrEnum +from typing import Literal + +HeaderRows = Literal[False] | list[int] + +Delimiter = str +LineTerminator = str +QuoteChar = str +NullSequence = str +HeaderJoin = str +CommentRows = list[int] +CommentPrefix = str +ColumnNames = list[str] +JsonPointer = str +SheetNumber = int +SheetName = str +TableName = str + + +class RowType(StrEnum): + array = "array" + object = "object" diff --git a/metadata/fairspec_metadata/models/file_dialect/csv.py b/metadata/fairspec_metadata/models/file_dialect/csv.py new file mode 100644 index 0000000..6e2305c --- /dev/null +++ b/metadata/fairspec_metadata/models/file_dialect/csv.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from typing import Literal + +from .base import BaseFileDialect +from .common import ( + ColumnNames, + CommentPrefix, + CommentRows, + Delimiter, + HeaderJoin, + HeaderRows, + LineTerminator, + NullSequence, + QuoteChar, +) + + +class CsvFileDialect(BaseFileDialect): + format: Literal["csv"] = "csv" + delimiter: Delimiter | None = None + lineTerminator: LineTerminator | None = None + quoteChar: QuoteChar | None = None + nullSequence: NullSequence | None = None + headerRows: HeaderRows | None = None + headerJoin: HeaderJoin | None = None + commentRows: CommentRows | None = None + commentPrefix: CommentPrefix | None = None + columnNames: ColumnNames | None = None diff --git a/metadata/fairspec_metadata/models/file_dialect/file_dialect.py b/metadata/fairspec_metadata/models/file_dialect/file_dialect.py new file mode 100644 index 0000000..3530e0d --- /dev/null +++ b/metadata/fairspec_metadata/models/file_dialect/file_dialect.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import Annotated, Union + +from pydantic import Field + +from .arrow import ArrowFileDialect +from .csv import CsvFileDialect +from .json import JsonFileDialect +from .jsonl import JsonlFileDialect +from .ods import OdsFileDialect +from .parquet import ParquetFileDialect +from .sqlite import SqliteFileDialect +from .tsv import TsvFileDialect +from .xlsx import XlsxFileDialect + +FileDialect = Annotated[ + Union[ + CsvFileDialect, + TsvFileDialect, + JsonFileDialect, + JsonlFileDialect, + XlsxFileDialect, + OdsFileDialect, + SqliteFileDialect, + ParquetFileDialect, + ArrowFileDialect, + ], + Field(discriminator="format"), +] diff --git a/metadata/fairspec_metadata/models/file_dialect/json.py b/metadata/fairspec_metadata/models/file_dialect/json.py new file mode 100644 index 0000000..07f8224 --- /dev/null +++ b/metadata/fairspec_metadata/models/file_dialect/json.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from typing import Literal + +from .base import BaseFileDialect +from .common import ( + ColumnNames, + CommentPrefix, + CommentRows, + HeaderJoin, + HeaderRows, + JsonPointer, + RowType, +) + + +class JsonFileDialect(BaseFileDialect): + format: Literal["json"] = "json" + jsonPointer: JsonPointer | None = None + rowType: RowType | None = None + headerRows: HeaderRows | None = None + headerJoin: HeaderJoin | None = None + commentRows: CommentRows | None = None + commentPrefix: CommentPrefix | None = None + columnNames: ColumnNames | None = None diff --git a/metadata/fairspec_metadata/models/file_dialect/jsonl.py b/metadata/fairspec_metadata/models/file_dialect/jsonl.py new file mode 100644 index 0000000..2f6ba87 --- /dev/null +++ b/metadata/fairspec_metadata/models/file_dialect/jsonl.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from typing import Literal + +from .base import BaseFileDialect +from .common import ( + ColumnNames, + CommentPrefix, + CommentRows, + HeaderJoin, + HeaderRows, + RowType, +) + + +class JsonlFileDialect(BaseFileDialect): + format: Literal["jsonl"] = "jsonl" + rowType: RowType | None = None + headerRows: HeaderRows | None = None + headerJoin: HeaderJoin | None = None + commentRows: CommentRows | None = None + commentPrefix: CommentPrefix | None = None + columnNames: ColumnNames | None = None diff --git a/metadata/fairspec_metadata/models/file_dialect/ods.py b/metadata/fairspec_metadata/models/file_dialect/ods.py new file mode 100644 index 0000000..63c6129 --- /dev/null +++ b/metadata/fairspec_metadata/models/file_dialect/ods.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from typing import Literal + +from .base import BaseFileDialect +from .common import ( + ColumnNames, + CommentPrefix, + CommentRows, + HeaderJoin, + HeaderRows, + SheetName, + SheetNumber, +) + + +class OdsFileDialect(BaseFileDialect): + format: Literal["ods"] = "ods" + sheetName: SheetName | None = None + sheetNumber: SheetNumber | None = None + headerRows: HeaderRows | None = None + headerJoin: HeaderJoin | None = None + commentRows: CommentRows | None = None + commentPrefix: CommentPrefix | None = None + columnNames: ColumnNames | None = None diff --git a/metadata/fairspec_metadata/models/file_dialect/parquet.py b/metadata/fairspec_metadata/models/file_dialect/parquet.py new file mode 100644 index 0000000..9249d14 --- /dev/null +++ b/metadata/fairspec_metadata/models/file_dialect/parquet.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from typing import Literal + +from .base import BaseFileDialect + + +class ParquetFileDialect(BaseFileDialect): + format: Literal["parquet"] = "parquet" diff --git a/metadata/fairspec_metadata/models/file_dialect/sqlite.py b/metadata/fairspec_metadata/models/file_dialect/sqlite.py new file mode 100644 index 0000000..0f6a2dd --- /dev/null +++ b/metadata/fairspec_metadata/models/file_dialect/sqlite.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from typing import Literal + +from .base import BaseFileDialect +from .common import TableName + + +class SqliteFileDialect(BaseFileDialect): + format: Literal["sqlite"] = "sqlite" + tableName: TableName | None = None diff --git a/metadata/fairspec_metadata/models/file_dialect/tsv.py b/metadata/fairspec_metadata/models/file_dialect/tsv.py new file mode 100644 index 0000000..153ca2b --- /dev/null +++ b/metadata/fairspec_metadata/models/file_dialect/tsv.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from typing import Literal + +from .base import BaseFileDialect +from .common import ( + ColumnNames, + CommentPrefix, + CommentRows, + HeaderJoin, + HeaderRows, + LineTerminator, + NullSequence, +) + + +class TsvFileDialect(BaseFileDialect): + format: Literal["tsv"] = "tsv" + lineTerminator: LineTerminator | None = None + nullSequence: NullSequence | None = None + headerRows: HeaderRows | None = None + headerJoin: HeaderJoin | None = None + commentRows: CommentRows | None = None + commentPrefix: CommentPrefix | None = None + columnNames: ColumnNames | None = None diff --git a/metadata/fairspec_metadata/models/file_dialect/unknown.py b/metadata/fairspec_metadata/models/file_dialect/unknown.py new file mode 100644 index 0000000..ef0bb6d --- /dev/null +++ b/metadata/fairspec_metadata/models/file_dialect/unknown.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from .base import BaseFileDialect + + +class UnknownFileDialect(BaseFileDialect): + format: str | None = None diff --git a/metadata/fairspec_metadata/models/file_dialect/xlsx.py b/metadata/fairspec_metadata/models/file_dialect/xlsx.py new file mode 100644 index 0000000..8809a6c --- /dev/null +++ b/metadata/fairspec_metadata/models/file_dialect/xlsx.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from typing import Literal + +from .base import BaseFileDialect +from .common import ( + ColumnNames, + CommentPrefix, + CommentRows, + HeaderJoin, + HeaderRows, + SheetName, + SheetNumber, +) + + +class XlsxFileDialect(BaseFileDialect): + format: Literal["xlsx"] = "xlsx" + sheetName: SheetName | None = None + sheetNumber: SheetNumber | None = None + headerRows: HeaderRows | None = None + headerJoin: HeaderJoin | None = None + commentRows: CommentRows | None = None + commentPrefix: CommentPrefix | None = None + columnNames: ColumnNames | None = None diff --git a/metadata/fairspec_metadata/models/foreign_key.py b/metadata/fairspec_metadata/models/foreign_key.py new file mode 100644 index 0000000..793d0f7 --- /dev/null +++ b/metadata/fairspec_metadata/models/foreign_key.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from pydantic import Field + +from .base import FairspecModel + + +class ForeignKeyReference(FairspecModel): + resource: str | None = Field( + default=None, + description="Target resource name (optional, omit for self-reference)", + ) + columns: list[str] = Field( + description="Target column(s) in the referenced resource" + ) + + +class ForeignKey(FairspecModel): + columns: list[str] = Field(description="Source column(s) in this table") + reference: ForeignKeyReference = Field( + description="Reference to columns in another resource" + ) diff --git a/metadata/fairspec_metadata/models/integrity.py b/metadata/fairspec_metadata/models/integrity.py new file mode 100644 index 0000000..11c6bff --- /dev/null +++ b/metadata/fairspec_metadata/models/integrity.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from enum import StrEnum + +from .base import FairspecModel + + +class IntegrityType(StrEnum): + md5 = "md5" + sha1 = "sha1" + sha256 = "sha256" + sha512 = "sha512" + + +class Integrity(FairspecModel): + type: IntegrityType + hash: str diff --git a/metadata/fairspec_metadata/models/json_schema.py b/metadata/fairspec_metadata/models/json_schema.py new file mode 100644 index 0000000..c5d152f --- /dev/null +++ b/metadata/fairspec_metadata/models/json_schema.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from typing import Any + +JsonSchema = dict[str, Any] diff --git a/metadata/fairspec_metadata/models/path.py b/metadata/fairspec_metadata/models/path.py new file mode 100644 index 0000000..2fd6792 --- /dev/null +++ b/metadata/fairspec_metadata/models/path.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from typing import Annotated + +from pydantic import Field + +InternalPath = Annotated[ + str, + Field( + pattern=r"^[^./~\\:][^./\\:]*(\.[^./\\:]+)*\.?(/([^./\\:]+(\.[^./\\:]+)*\.?|\.([^./\\:]+(\.[^./\\:]+)*\.?)?))*$" + ), +] + +ExternalPath = Annotated[str, Field(pattern=r"^https?://")] + +Path = InternalPath | ExternalPath diff --git a/metadata/fairspec_metadata/models/profile.py b/metadata/fairspec_metadata/models/profile.py new file mode 100644 index 0000000..47d1452 --- /dev/null +++ b/metadata/fairspec_metadata/models/profile.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +from enum import StrEnum + +from .base import FairspecModel + +from .json_schema import JsonSchema + +Profile = JsonSchema + + +class ProfileType(StrEnum): + catalog = "catalog" + dataset = "dataset" + file_dialect = "file-dialect" + data_schema = "data-schema" + table_schema = "table-schema" + + +class ProfileRegistryItem(FairspecModel): + type: ProfileType + path: str + version: str + profile: Profile + + +ProfileRegistry = list[ProfileRegistryItem] diff --git a/metadata/fairspec_metadata/models/report.py b/metadata/fairspec_metadata/models/report.py new file mode 100644 index 0000000..d5b95b1 --- /dev/null +++ b/metadata/fairspec_metadata/models/report.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from pydantic import Field + +from .base import FairspecModel +from .error.error import FairspecError + + +class Report(FairspecModel): + valid: bool = Field(description="Whether the validation passed without errors") + errors: list[FairspecError] = Field( + description="Array of validation errors encountered" + ) diff --git a/metadata/fairspec_metadata/models/resource.py b/metadata/fairspec_metadata/models/resource.py new file mode 100644 index 0000000..a950b63 --- /dev/null +++ b/metadata/fairspec_metadata/models/resource.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +from typing import Any, Union + +from pydantic import Field + +from .data import Data +from .data_schema import DataSchema +from .datacite.datacite import Datacite +from .file_dialect.file_dialect import FileDialect +from .integrity import Integrity +from .path import Path +from .table_schema import TableSchema + + +class Resource(Datacite): + data: Data | None = Field( + default=None, + description="Data or content of the resource. It must be a path to a file, array of paths to files, inline JSON object, or inline JSON array of objects.", + ) + name: str | None = Field( + default=None, + pattern=r"^[a-zA-Z0-9_]+$", + description="An optional name for the resource consisting of alphanumeric characters and underscores. If provided, it can be used to reference resource within a dataset context.", + ) + textual: bool | None = Field( + default=None, + description="Whether the resource is text-based.", + ) + integrity: Integrity | None = Field( + default=None, + description="The integrity check of the file with type (md5, sha1, sha256, sha512) and hash value.", + ) + fileDialect: Union[Path, FileDialect] | None = Field( + default=None, + description="A path to dialect or an object with the dialect of the file. For multiple files the format property defines the dialect for all the files.", + ) + dataSchema: Union[Path, DataSchema] | None = Field( + default=None, + description="A path to a JSON Schema or an object with the JSON Schema. The JSON Schema must be compatible with the JSONSchema Draft 2020-12 specification.", + ) + tableSchema: Union[Path, TableSchema] | None = Field( + default=None, + description="A path to a Table Schema or an object with the Table Schema. The Table Schema must be compatible with the Fairspec Table specification.", + ) + unstable_customMetadata: dict[str, Any] | None = Field( + default=None, + description="Custom properties for extending resources", + ) diff --git a/metadata/fairspec_metadata/models/table_schema.py b/metadata/fairspec_metadata/models/table_schema.py new file mode 100644 index 0000000..031ef6d --- /dev/null +++ b/metadata/fairspec_metadata/models/table_schema.py @@ -0,0 +1,79 @@ +from __future__ import annotations + +from typing import Union + +from pydantic import ConfigDict, Field + +from .base import FairspecModel + +from .column.column import ColumnProperty +from .foreign_key import ForeignKey +from .unique_key import UniqueKey + + +class TableSchemaMissingValueItem(FairspecModel): + value: str | int | float + label: str + + +class TableSchema(FairspecModel): + model_config = ConfigDict(populate_by_name=True) + + profile: str | None = Field( + default=None, + alias="$schema", + description="Fairspec Schema profile url.", + ) + title: str | None = Field( + default=None, + description="A human-readable title of the table schema", + ) + description: str | None = Field( + default=None, + description="A human-readable description of the table schema", + ) + required: list[str] | None = Field( + default=None, + description="An optional list of column names that must be present", + ) + allRequired: bool | None = Field( + default=None, + description="An optional boolean indicating whether all columns are required", + ) + properties: dict[str, ColumnProperty] | None = Field( + default=None, + description="An object defining the schema for table columns, where each key is a column name", + ) + missingValues: list[Union[str, int, float, TableSchemaMissingValueItem]] | None = ( + Field( + default=None, + description="An optional list of values that represent missing or null data across all columns", + ) + ) + primaryKey: list[str] | None = Field( + default=None, + min_length=1, + description="An optional array of column names that form the table's primary key", + ) + uniqueKeys: list[UniqueKey] | None = Field( + default=None, + min_length=1, + description="An optional array of unique key constraints", + ) + foreignKeys: list[ForeignKey] | None = Field( + default=None, + min_length=1, + description="An optional array of foreign key constraints", + ) + + +class RenderTableSchemaOptions(FairspecModel): + format: str + + +class ConvertTableSchemaToOptions(FairspecModel): + format: str + + +class ConvertTableSchemaFromOptions(FairspecModel): + format: str diff --git a/metadata/fairspec_metadata/models/unique_key.py b/metadata/fairspec_metadata/models/unique_key.py new file mode 100644 index 0000000..430c274 --- /dev/null +++ b/metadata/fairspec_metadata/models/unique_key.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from typing import Annotated + +from pydantic import Field + +UniqueKey = Annotated[ + list[str], + Field( + min_length=1, + description="An array of column names whose combined values must be unique", + ), +] diff --git a/metadata/fairspec_metadata/plugin.py b/metadata/fairspec_metadata/plugin.py new file mode 100644 index 0000000..a240bc7 --- /dev/null +++ b/metadata/fairspec_metadata/plugin.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .models.data_schema import DataSchema, RenderDataSchemaOptions + from .models.dataset import ( + ConvertDatasetFromOptions, + ConvertDatasetToOptions, + Dataset, + RenderDatasetOptions, + ) + from .models.descriptor import Descriptor + from .models.table_schema import ( + ConvertTableSchemaFromOptions, + ConvertTableSchemaToOptions, + RenderTableSchemaOptions, + TableSchema, + ) + + +class MetadataPlugin: + def render_dataset_as( + self, dataset: Dataset, options: RenderDatasetOptions + ) -> str | None: + return None + + def convert_dataset_to( + self, dataset: Dataset, options: ConvertDatasetToOptions + ) -> Descriptor | None: + return None + + def convert_dataset_from( + self, descriptor: Descriptor, options: ConvertDatasetFromOptions + ) -> Dataset | None: + return None + + def render_data_schema_as( + self, data_schema: DataSchema, options: RenderDataSchemaOptions + ) -> str | None: + return None + + def render_table_schema_as( + self, table_schema: TableSchema, options: RenderTableSchemaOptions + ) -> str | None: + return None + + def convert_table_schema_to( + self, table_schema: TableSchema, options: ConvertTableSchemaToOptions + ) -> Descriptor | None: + return None + + def convert_table_schema_from( + self, descriptor: Descriptor, options: ConvertTableSchemaFromOptions + ) -> TableSchema | None: + return None diff --git a/metadata/fairspec_metadata/profiles/catalog.json b/metadata/fairspec_metadata/profiles/catalog.json new file mode 100644 index 0000000..c603fec --- /dev/null +++ b/metadata/fairspec_metadata/profiles/catalog.json @@ -0,0 +1,27 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "allOf": [{ "$ref": "#/$defs/Catalog" }], + "$defs": { + "Catalog": { + "type": "array", + "items": { "$ref": "#/$defs/Dataset" } + }, + "Dataset": { + "type": "object", + "required": ["loc", "upd"], + "additionalProperties": false, + "properties": { + "loc": { "$ref": "#/$defs/Location" }, + "upd": { "$ref": "#/$defs/Updated" } + } + }, + "Location": { + "type": "string", + "format": "uri" + }, + "Updated": { + "type": "string", + "format": "date-time" + } + } +} diff --git a/metadata/fairspec_metadata/profiles/data-schema.json b/metadata/fairspec_metadata/profiles/data-schema.json new file mode 100644 index 0000000..0744c46 --- /dev/null +++ b/metadata/fairspec_metadata/profiles/data-schema.json @@ -0,0 +1,23 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "allOf": [{ "$ref": "#/$defs/DataSchema" }], + "$defs": { + "DataSchema": { + "type": "object", + "allOf": [{ "$ref": "#/$defs/JsonSchema" }], + "properties": { + "$schema": { "$ref": "#/$defs/Profile" } + } + }, + "JsonSchema": { + "$ref": "https://json-schema.org/draft/2020-12/schema" + }, + "Profile": { + "$ref": "#/$defs/ExternalPath" + }, + "ExternalPath": { + "type": "string", + "pattern": "^https?://" + } + } +} diff --git a/metadata/fairspec_metadata/profiles/dataset.json b/metadata/fairspec_metadata/profiles/dataset.json new file mode 100644 index 0000000..1268965 --- /dev/null +++ b/metadata/fairspec_metadata/profiles/dataset.json @@ -0,0 +1,710 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "allOf": [{ "$ref": "#/$defs/Dataset" }], + "$defs": { + "Dataset": { + "allOf": [{ "$ref": "#/$defs/Datacite" }], + "properties": { + "$schema": { "$ref": "#/$defs/Profile" }, + "resources": { "$ref": "#/$defs/Resources" } + } + }, + "Profile": { + "$ref": "#/$defs/ExternalPath" + }, + "Resources": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/Resource" } + }, + "Resource": { + "allOf": [{ "$ref": "#/$defs/Datacite" }], + "properties": { + "name": { "$ref": "#/$defs/Name" }, + "data": { "$ref": "#/$defs/Data" }, + "textual": { "$ref": "#/$defs/Textual" }, + "integrity": { "$ref": "#/$defs/Integrity" }, + "fileDialect": { "$ref": "#/$defs/FileDialect" }, + "dataSchema": { "$ref": "#/$defs/DataSchema" }, + "tableSchema": { "$ref": "#/$defs/TableSchema" } + } + }, + "Name": { + "type": "string", + "pattern": "^[a-zA-Z0-9_]+$" + }, + "Data": { + "oneOf": [{ "$ref": "#/$defs/PathData" }, { "$ref": "#/$defs/JsonData" }] + }, + "PathData": { + "oneOf": [ + { "$ref": "#/$defs/Path" }, + { "type": "array", "items": { "$ref": "#/$defs/Path" } } + ] + }, + "JsonData": { + "oneOf": [ + { + "type": "object" + }, + { + "type": "array", + "items": { "type": "object" } + } + ] + }, + "Path": { + "oneOf": [ + { "$ref": "#/$defs/InternalPath" }, + { "$ref": "#/$defs/ExternalPath" } + ] + }, + "InternalPath": { + "type": "string", + "pattern": "^(?![./~])(?!.*://)(?!.*\\.\\.)(?!.*\\\\)(?!.*:)[^/\\\\]+(/[^/\\\\]+)*$" + }, + "ExternalPath": { + "type": "string", + "pattern": "^https?://" + }, + "Textual": { + "type": "boolean" + }, + "FileDialect": { + "oneOf": [ + { "$ref": "#/$defs/Path" }, + { "$ref": "https://fairspec.org/profiles/latest/file-dialect.json" } + ] + }, + "Integrity": { + "type": "string", + "properties": { + "type": { + "enum": ["md5", "sha1", "sha256", "sha512"] + }, + "hash": { "type": "string" } + } + }, + "DataSchema": { + "oneOf": [ + { "$ref": "#/$defs/Path" }, + { "$ref": "https://fairspec.org/profiles/latest/data-schema.json" } + ] + }, + "TableSchema": { + "oneOf": [ + { "$ref": "#/$defs/Path" }, + { "$ref": "https://fairspec.org/profiles/latest/table-schema.json" } + ] + }, + "Datacite": { + "type": "object", + "properties": { + "doi": { "$ref": "#/$defs/Doi" }, + "prefix": { "$ref": "#/$defs/DoiPrefix" }, + "suffix": { "$ref": "#/$defs/DoiSuffix" }, + "creators": { "$ref": "#/$defs/Creators" }, + "titles": { "$ref": "#/$defs/Titles" }, + "publisher": { "$ref": "#/$defs/Publisher" }, + "publicationYear": { "$ref": "#/$defs/PublicationYear" }, + "subjects": { "$ref": "#/$defs/Subjects" }, + "contributors": { "$ref": "#/$defs/Contributors" }, + "dates": { "$ref": "#/$defs/Dates" }, + "language": { "$ref": "#/$defs/Language" }, + "types": { "$ref": "#/$defs/ContentTypes" }, + "alternateIdentifiers": { "$ref": "#/$defs/AlternateIdentifiers" }, + "relatedIdentifiers": { "$ref": "#/$defs/RelatedIdentifiers" }, + "sizes": { "$ref": "#/$defs/Sizes" }, + "formats": { "$ref": "#/$defs/Formats" }, + "version": { "$ref": "#/$defs/Version" }, + "rightsList": { "$ref": "#/$defs/RightsList" }, + "descriptions": { "$ref": "#/$defs/Descriptions" }, + "geoLocations": { "$ref": "#/$defs/GeoLocations" }, + "fundingReferences": { "$ref": "#/$defs/FundingReferences" }, + "relatedItems": { "$ref": "#/$defs/RelatedItems" } + } + }, + "Doi": { + "type": "string", + "pattern": "^10[.][0-9]{4,9}[/][^\\s]+$" + }, + "DoiPrefix": { + "type": "string", + "pattern": "^10[.][0-9]{4,9}$" + }, + "DoiSuffix": { + "type": "string", + "pattern": "^[^\\s]+$" + }, + "Creators": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/Creator" } + }, + "Creator": { + "type": "object", + "required": ["name"], + "properties": { + "name": { "type": "string" }, + "nameType": { "$ref": "#/$defs/CreatorNameType" }, + "givenName": { "type": "string" }, + "familyName": { "type": "string" }, + "nameIdentifiers": { "$ref": "#/$defs/CreatorNameIdentifiers" }, + "affiliation": { "$ref": "#/$defs/CreatorAffiliations" }, + "lang": { "type": "string" } + } + }, + "CreatorNameType": { + "enum": ["Organizational", "Personal"] + }, + "CreatorNameIdentifiers": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "nameIdentifier": { "type": "string" }, + "nameIdentifierScheme": { "type": "string" }, + "schemeUri": { "type": "string", "format": "uri" } + }, + "required": ["nameIdentifier", "nameIdentifierScheme"] + }, + "uniqueItems": true + }, + "CreatorAffiliations": { + "type": "array", + "minItems": 1, + "uniqueItems": true, + "items": { "$ref": "#/$defs/CreatorAffiliation" } + }, + "CreatorAffiliation": { + "type": "object", + "additionalProperties": false, + "properties": { + "name": { "type": "string" }, + "affiliationIdentifier": { "type": "string" }, + "affiliationIdentifierScheme": { "type": "string" }, + "schemeUri": { "type": "string", "format": "uri" } + }, + "required": ["name"] + }, + "Titles": { + "type": "array", + "minItems": 1, + "uniqueItems": true, + "items": { "$ref": "#/$defs/Title" } + }, + "Title": { + "type": "object", + "additionalProperties": false, + "properties": { + "title": { "type": "string" }, + "titleType": { "$ref": "#/$defs/TitleType" }, + "lang": { "type": "string" } + }, + "required": ["title"] + }, + "TitleType": { + "enum": ["AlternativeTitle", "Subtitle", "TranslatedTitle", "Other"] + }, + "Publisher": { + "type": "object", + "additionalProperties": false, + "properties": { + "name": { "type": "string" }, + "publisherIdentifier": { "type": "string" }, + "publisherIdentifierScheme": { "type": "string" }, + "schemeUri": { "type": "string", "format": "uri" }, + "lang": { "type": "string" } + }, + "required": ["name"] + }, + "PublicationYear": { + "type": "string", + "pattern": "^[0-9]{4}$" + }, + "Subjects": { + "type": "array", + "minItems": 1, + "uniqueItems": true, + "items": { "$ref": "#/$defs/Subject" } + }, + "Subject": { + "type": "object", + "additionalProperties": false, + "properties": { + "subject": { "type": "string" }, + "subjectScheme": { "type": "string" }, + "schemeUri": { "type": "string", "format": "uri" }, + "valueUri": { "type": "string", "format": "uri" }, + "classificationCode": { "type": "string" }, + "lang": { "type": "string" } + }, + "required": ["subject"] + }, + "Contributors": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/Contributor" } + }, + "Contributor": { + "type": "object", + "required": ["name", "contributorType"], + "allOf": [{ "$ref": "#/$defs/Creator" }], + "unevaluatedProperties": false, + "properties": { + "contributorType": { "$ref": "#/$defs/ContributorType" } + } + }, + "ContributorType": { + "enum": [ + "ContactPerson", + "DataCollector", + "DataCurator", + "DataManager", + "Distributor", + "Editor", + "HostingInstitution", + "Producer", + "ProjectLeader", + "ProjectManager", + "ProjectMember", + "RegistrationAgency", + "RegistrationAuthority", + "RelatedPerson", + "Researcher", + "ResearchGroup", + "RightsHolder", + "Sponsor", + "Supervisor", + "Translator", + "WorkPackageLeader", + "Other" + ] + }, + "Dates": { + "type": "array", + "minItems": 1, + "uniqueItems": true, + "items": { "$ref": "#/$defs/Date" } + }, + "Date": { + "type": "object", + "additionalProperties": false, + "properties": { + "date": { "$ref": "#/$defs/DateValue" }, + "dateType": { "$ref": "#/$defs/DateType" }, + "dateInformation": { "type": "string" } + }, + "required": ["date", "dateType"] + }, + "DateValue": { + "type": "string", + "anyOf": [ + { "format": "year" }, + { "format": "yearmonth" }, + { "format": "date" }, + { "format": "datetime" }, + { "format": "year-range" }, + { "format": "yearmonth-range" }, + { "format": "date-range" }, + { "format": "datetime-range" } + ] + }, + "DateType": { + "enum": [ + "Accepted", + "Available", + "Copyrighted", + "Collected", + "Coverage", + "Created", + "Issued", + "Submitted", + "Updated", + "Valid", + "Withdrawn", + "Other" + ] + }, + "Language": { + "type": "string" + }, + "ContentTypes": { + "type": "object", + "additionalProperties": false, + "properties": { + "resourceType": { "type": "string" }, + "resourceTypeGeneral": { + "$ref": "#/$defs/ContentTypeGeneral" + } + }, + "required": ["resourceTypeGeneral"] + }, + "ContentTypeGeneral": { + "enum": [ + "Audiovisual", + "Award", + "Book", + "BookChapter", + "Collection", + "ComputationalNotebook", + "ConferencePaper", + "ConferenceProceeding", + "DataPaper", + "Dataset", + "Dissertation", + "Event", + "Image", + "Instrument", + "InteractiveResource", + "Journal", + "JournalArticle", + "Model", + "OutputManagementPlan", + "PeerReview", + "PhysicalObject", + "Preprint", + "Project", + "Report", + "Service", + "Software", + "Sound", + "Standard", + "StudyRegistration", + "Text", + "Workflow", + "Other" + ] + }, + "AlternateIdentifiers": { + "type": "array", + "minItems": 1, + "uniqueItems": true, + "items": { "$ref": "#/$defs/AlternateIdentifier" } + }, + "AlternateIdentifier": { + "type": "object", + "additionalProperties": false, + "properties": { + "alternateIdentifier": { "type": "string" }, + "alternateIdentifierType": { "type": "string" } + }, + "required": ["alternateIdentifier", "alternateIdentifierType"] + }, + "RelatedIdentifiers": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/RelatedIdentifier" } + }, + "RelatedIdentifier": { + "type": "object", + "allOf": [{ "$ref": "#/$defs/RelatedObject" }], + "unevaluatedProperties": false, + "properties": { + "relatedIdentifier": { "type": "string" }, + "relatedIdentifierType": { + "$ref": "#/$defs/RelatedIdentifierType" + } + }, + "required": [ + "relatedIdentifier", + "relatedIdentifierType", + "relationType" + ], + "if": { "$ref": "#/$defs/RelatedObjectIf" }, + "else": { "$ref": "#/$defs/RelatedObjectElse" } + }, + "RelatedObject": { + "type": "object", + "properties": { + "relationType": { "$ref": "#/$defs/RelationType" }, + "relatedMetadataScheme": { "type": "string" }, + "schemeUri": { "type": "string", "format": "uri" }, + "schemeType": { "type": "string" }, + "resourceTypeGeneral": { "$ref": "#/$defs/ContentTypeGeneral" } + }, + "required": ["relationType"] + }, + "RelationType": { + "enum": [ + "IsCitedBy", + "Cites", + "IsCollectedBy", + "Collects", + "IsSupplementTo", + "IsSupplementedBy", + "IsContinuedBy", + "Continues", + "IsDescribedBy", + "Describes", + "HasMetadata", + "IsMetadataFor", + "HasVersion", + "IsVersionOf", + "IsNewVersionOf", + "IsPartOf", + "IsPreviousVersionOf", + "IsPublishedIn", + "HasPart", + "IsReferencedBy", + "References", + "IsDocumentedBy", + "Documents", + "IsCompiledBy", + "Compiles", + "IsVariantFormOf", + "IsOriginalFormOf", + "IsIdenticalTo", + "IsReviewedBy", + "Reviews", + "IsDerivedFrom", + "IsSourceOf", + "IsRequiredBy", + "Requires", + "IsObsoletedBy", + "Obsoletes", + "HasTranslation", + "IsTranslationOf" + ] + }, + "RelatedIdentifierType": { + "enum": [ + "ARK", + "arXiv", + "bibcode", + "CSTR", + "DOI", + "EAN13", + "EISSN", + "Handle", + "IGSN", + "ISBN", + "ISSN", + "ISTC", + "LISSN", + "LSID", + "PMID", + "PURL", + "RRID", + "UPC", + "URL", + "URN", + "w3id" + ] + }, + "RelatedObjectIf": { + "properties": { + "relationType": { "enum": ["HasMetadata", "IsMetadataFor"] } + } + }, + "RelatedObjectElse": { + "properties": { + "relatedMetadataScheme": false, + "schemeUri": false, + "schemeType": false + } + }, + "Sizes": { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + }, + "uniqueItems": true + }, + "Formats": { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + }, + "uniqueItems": true + }, + "Version": { + "type": "string" + }, + "RightsList": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/Rights" }, + "uniqueItems": true + }, + "Rights": { + "type": "object", + "additionalProperties": false, + "properties": { + "rights": { "type": "string" }, + "rightsUri": { "type": "string", "format": "uri" }, + "rightsIdentifier": { "type": "string" }, + "rightsIdentifierScheme": { "type": "string" }, + "schemeUri": { "type": "string", "format": "uri" }, + "lang": { "type": "string" } + } + }, + "Descriptions": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/Description" }, + "uniqueItems": true + }, + "Description": { + "type": "object", + "additionalProperties": false, + "properties": { + "description": { "type": "string" }, + "descriptionType": { "$ref": "#/$defs/DescriptionType" }, + "lang": { "type": "string" } + }, + "required": ["description", "descriptionType"] + }, + "DescriptionType": { + "enum": [ + "Abstract", + "Methods", + "SeriesInformation", + "TableOfContents", + "TechnicalInfo", + "Other" + ] + }, + "GeoLocations": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/GeoLocation" }, + "uniqueItems": true + }, + "GeoLocation": { + "type": "object", + "additionalProperties": false, + "properties": { + "geoLocationPlace": { "type": "string" }, + "geoLocationPoint": { "$ref": "#/$defs/GeoLocationPoint" }, + "geoLocationBox": { "$ref": "#/$defs/GeoLocationBox" }, + "geoLocationPolygon": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/GeoLocationPolygonItem" } + } + } + }, + "GeoLocationPoint": { + "type": "object", + "additionalProperties": false, + "properties": { + "pointLongitude": { "$ref": "#/$defs/Longitude" }, + "pointLatitude": { "$ref": "#/$defs/Latitude" } + }, + "required": ["pointLongitude", "pointLatitude"] + }, + "Longitude": { + "type": "number", + "maximum": 180, + "minimum": -180 + }, + "Latitude": { + "type": "number", + "maximum": 90, + "minimum": -90 + }, + "GeoLocationBox": { + "type": "object", + "additionalProperties": false, + "properties": { + "westBoundLongitude": { "$ref": "#/$defs/Longitude" }, + "eastBoundLongitude": { "$ref": "#/$defs/Longitude" }, + "southBoundLatitude": { "$ref": "#/$defs/Latitude" }, + "northBoundLatitude": { "$ref": "#/$defs/Latitude" } + }, + "required": [ + "westBoundLongitude", + "eastBoundLongitude", + "southBoundLatitude", + "northBoundLatitude" + ] + }, + "GeoLocationPolygonItem": { + "type": "object", + "additionalProperties": false, + "properties": { + "polygonPoint": { + "$ref": "#/$defs/GeoLocationPoint" + }, + "inPolygonPoint": { + "$ref": "#/$defs/GeoLocationPoint" + } + } + }, + "FundingReferences": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/FundingReference" }, + "uniqueItems": true + }, + "FundingReference": { + "type": "object", + "additionalProperties": false, + "properties": { + "funderName": { "type": "string" }, + "funderIdentifier": { "type": "string" }, + "funderIdentifierType": { + "$ref": "#/$defs/FunderIdentifierType" + }, + "awardNumber": { "type": "string" }, + "awardUri": { "type": "string", "format": "uri" }, + "awardTitle": { "type": "string" } + }, + "required": ["funderName"] + }, + "FunderIdentifierType": { + "enum": ["ISNI", "GRID", "Crossref Funder ID", "ROR", "Other"] + }, + "RelatedItems": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/RelatedItem" }, + "uniqueItems": true + }, + "RelatedItem": { + "type": "object", + "allOf": [{ "$ref": "#/$defs/RelatedObject" }], + "unevaluatedProperties": false, + "properties": { + "relatedItemIdentifier": { + "$ref": "#/$defs/RelatedItemIdentifier" + }, + "relatedItemType": { "$ref": "#/$defs/ContentTypeGeneral" }, + "creators": { "$ref": "#/$defs/Creators" }, + "contributors": { "$ref": "#/$defs/Contributors" }, + "titles": { "$ref": "#/$defs/Titles" }, + "publicationYear": { "$ref": "#/$defs/PublicationYear" }, + "volume": { "type": "string" }, + "issue": { "type": "string" }, + "firstPage": { "type": "string" }, + "lastPage": { "type": "string" }, + "edition": { "type": "string" }, + "publisher": { "type": "string" }, + "number": { "type": "string" }, + "numberType": { "$ref": "#/$defs/NumberType" } + }, + "required": ["titles", "relatedItemType", "relationType"], + "if": { "$ref": "#/$defs/RelatedObjectIf" }, + "else": { "$ref": "#/$defs/RelatedObjectElse" } + }, + "RelatedItemIdentifier": { + "type": "object", + "additionalProperties": false, + "properties": { + "relatedItemIdentifier": { "type": "string" }, + "relatedItemIdentifierType": { + "$ref": "#/$defs/RelatedIdentifierType" + } + }, + "required": ["relatedItemIdentifier", "relatedItemIdentifierType"] + }, + "NumberType": { + "enum": ["Article", "Chapter", "Report", "Other"] + } + } +} diff --git a/metadata/fairspec_metadata/profiles/file-dialect.json b/metadata/fairspec_metadata/profiles/file-dialect.json new file mode 100644 index 0000000..4db1afd --- /dev/null +++ b/metadata/fairspec_metadata/profiles/file-dialect.json @@ -0,0 +1,216 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "allOf": [{ "$ref": "#/$defs/FileDialect" }], + "$defs": { + "FileDialect": { + "type": "object", + "properties": { + "$schema": { "$ref": "#/$defs/Profile" }, + "format": { "$ref": "#/$defs/Format" }, + "title": { "$ref": "#/$defs/Title" }, + "description": { "$ref": "#/$defs/Description" } + }, + "allOf": [ + { + "if": { "properties": { "format": { "const": "csv" } } }, + "then": { "$ref": "#/$defs/CsvDialect" } + }, + { + "if": { "properties": { "format": { "const": "tsv" } } }, + "then": { "$ref": "#/$defs/TsvDialect" } + }, + { + "if": { "properties": { "format": { "const": "json" } } }, + "then": { "$ref": "#/$defs/JsonDialect" } + }, + { + "if": { "properties": { "format": { "const": "jsonl" } } }, + "then": { "$ref": "#/$defs/JsonlDialect" } + }, + { + "if": { "properties": { "format": { "const": "xlsx" } } }, + "then": { "$ref": "#/$defs/XlsxDialect" } + }, + { + "if": { "properties": { "format": { "const": "ods" } } }, + "then": { "$ref": "#/$defs/OdsDialect" } + }, + { + "if": { "properties": { "format": { "const": "parquet" } } }, + "then": { "$ref": "#/$defs/ParquetDialect" } + }, + { + "if": { "properties": { "format": { "const": "arrow" } } }, + "then": { "$ref": "#/$defs/ArrowDialect" } + }, + { + "if": { "properties": { "format": { "const": "sqlite" } } }, + "then": { "$ref": "#/$defs/SqliteDialect" } + } + ] + }, + "CsvDialect": { + "properties": { + "format": { "const": "csv" }, + "delimiter": { "$ref": "#/$defs/Delimiter" }, + "lineTerminator": { "$ref": "#/$defs/LineTerminator" }, + "quoteChar": { "$ref": "#/$defs/QuoteChar" }, + "nullSequence": { "$ref": "#/$defs/NullSequence" }, + "headerRows": { "$ref": "#/$defs/HeaderRows" }, + "headerJoin": { "$ref": "#/$defs/HeaderJoin" }, + "commentRows": { "$ref": "#/$defs/CommentRows" }, + "commentPrefix": { "$ref": "#/$defs/CommentPrefix" }, + "columnNames": { "$ref": "#/$defs/ColumnNames" } + } + }, + "TsvDialect": { + "properties": { + "format": { "const": "tsv" }, + "lineTerminator": { "$ref": "#/$defs/LineTerminator" }, + "nullSequence": { "$ref": "#/$defs/NullSequence" }, + "headerRows": { "$ref": "#/$defs/HeaderRows" }, + "headerJoin": { "$ref": "#/$defs/HeaderJoin" }, + "commentRows": { "$ref": "#/$defs/CommentRows" }, + "commentPrefix": { "$ref": "#/$defs/CommentPrefix" }, + "columnNames": { "$ref": "#/$defs/ColumnNames" } + } + }, + "JsonDialect": { + "properties": { + "format": { "const": "json" }, + "jsonPointer": { "$ref": "#/$defs/JsonPointer" }, + "rowType": { "$ref": "#/$defs/RowType" }, + "headerRows": { "$ref": "#/$defs/HeaderRows" }, + "headerJoin": { "$ref": "#/$defs/HeaderJoin" }, + "commentRows": { "$ref": "#/$defs/CommentRows" }, + "commentPrefix": { "$ref": "#/$defs/CommentPrefix" }, + "columnNames": { "$ref": "#/$defs/ColumnNames" } + } + }, + "JsonlDialect": { + "properties": { + "format": { "const": "jsonl" }, + "rowType": { "$ref": "#/$defs/RowType" }, + "headerRows": { "$ref": "#/$defs/HeaderRows" }, + "headerJoin": { "$ref": "#/$defs/HeaderJoin" }, + "commentRows": { "$ref": "#/$defs/CommentRows" }, + "commentPrefix": { "$ref": "#/$defs/CommentPrefix" }, + "columnNames": { "$ref": "#/$defs/ColumnNames" } + } + }, + "XlsxDialect": { + "properties": { + "format": { "const": "xlsx" }, + "sheetName": { "$ref": "#/$defs/SheetName" }, + "sheetNumber": { "$ref": "#/$defs/SheetNumber" }, + "headerRows": { "$ref": "#/$defs/HeaderRows" }, + "headerJoin": { "$ref": "#/$defs/HeaderJoin" }, + "commentRows": { "$ref": "#/$defs/CommentRows" }, + "commentPrefix": { "$ref": "#/$defs/CommentPrefix" }, + "columnNames": { "$ref": "#/$defs/ColumnNames" } + } + }, + "OdsDialect": { + "properties": { + "format": { "const": "ods" }, + "sheetName": { "$ref": "#/$defs/SheetName" }, + "sheetNumber": { "$ref": "#/$defs/SheetNumber" }, + "headerRows": { "$ref": "#/$defs/HeaderRows" }, + "headerJoin": { "$ref": "#/$defs/HeaderJoin" }, + "commentRows": { "$ref": "#/$defs/CommentRows" }, + "commentPrefix": { "$ref": "#/$defs/CommentPrefix" }, + "columnNames": { "$ref": "#/$defs/ColumnNames" } + } + }, + "ParquetDialect": { + "properties": { + "format": { "const": "parquet" } + } + }, + "ArrowDialect": { + "properties": { + "format": { "const": "arrow" } + } + }, + "SqliteDialect": { + "properties": { + "format": { "const": "sqlite" }, + "tableName": { "$ref": "#/$defs/TableName" } + } + }, + "Profile": { + "$ref": "#/$defs/ExternalPath" + }, + "ExternalPath": { + "type": "string", + "pattern": "^https?://" + }, + "Format": { + "type": "string" + }, + "Title": { + "type": "string" + }, + "Description": { + "type": "string" + }, + "Delimiter": { + "type": "string", + "maxLength": 1 + }, + "LineTerminator": { + "type": "string" + }, + "QuoteChar": { + "type": "string" + }, + "NullSequence": { + "type": "string" + }, + "Header": { + "type": "boolean" + }, + "HeaderRows": { + "oneOf": [ + { "const": false }, + { + "type": "array", + "items": { "type": "integer", "minimum": 1 }, + "minItems": 1 + } + ] + }, + "HeaderJoin": { + "type": "string" + }, + "CommentRows": { + "type": "array", + "items": { "type": "integer", "minimum": 1 }, + "minItems": 1 + }, + "CommentPrefix": { + "type": "string" + }, + "ColumnNames": { + "type": "array", + "items": { "type": "string" }, + "minItems": 1 + }, + "JsonPointer": { + "type": "string", + "format": "json-pointer" + }, + "RowType": { + "enum": ["array", "object"] + }, + "SheetNumber": { + "type": "integer" + }, + "SheetName": { + "type": "string" + }, + "TableName": { + "type": "string" + } + } +} diff --git a/metadata/fairspec_metadata/profiles/table-schema.json b/metadata/fairspec_metadata/profiles/table-schema.json new file mode 100644 index 0000000..82732ea --- /dev/null +++ b/metadata/fairspec_metadata/profiles/table-schema.json @@ -0,0 +1,715 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "allOf": [{ "$ref": "#/$defs/TableSchema" }], + "$defs": { + "TableSchema": { + "type": "object", + "allOf": [{ "$ref": "#/$defs/JsonSchema" }], + "properties": { + "$schema": { "$ref": "#/$defs/Profile" }, + "title": { "$ref": "#/$defs/Title" }, + "description": { "$ref": "#/$defs/Description" }, + "properties": { "$ref": "#/$defs/Properties" }, + "required": { "$ref": "#/$defs/Required" }, + "allRequired": { "$ref": "#/$defs/AllRequired" }, + "missingValues": { "$ref": "#/$defs/MissingValues" }, + "primaryKey": { "$ref": "#/$defs/PrimaryKey" }, + "uniqueKeys": { "$ref": "#/$defs/UniqueKeys" }, + "foreignKeys": { "$ref": "#/$defs/ForeignKeys" } + } + }, + "JsonSchema": { + "$ref": "https://json-schema.org/draft/2020-12/schema" + }, + "Profile": { + "$ref": "#/$defs/ExternalPath" + }, + "Title": { + "type": "string" + }, + "Description": { + "type": "string" + }, + "Required": { + "type": "array", + "items": { "type": "string" } + }, + "AllRequired": { + "type": "boolean" + }, + "Properties": { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/ColumnProperty" + } + }, + "ColumnProperty": { + "type": "object", + "properties": { + "type": {}, + "title": { "$ref": "#/$defs/Title" }, + "description": { "$ref": "#/$defs/Description" }, + "rdfType": { "$ref": "#/$defs/RdfType" }, + "enum": { "$ref": "#/$defs/UnknownEnum" }, + "const": { "$ref": "#/$defs/UnknownConst" }, + "default": { "$ref": "#/$defs/UnknownDefault" }, + "examples": { "$ref": "#/$defs/UnknownExamples" }, + "missingValues": { "$ref": "#/$defs/MissingValues" } + }, + "allOf": [ + { + "if": { "properties": { "type": { "$ref": "#/$defs/BooleanType" } } }, + "then": { "$ref": "#/$defs/BooleanColumnProperty" } + }, + { + "if": { "properties": { "type": { "$ref": "#/$defs/IntegerType" } } }, + "then": { "$ref": "#/$defs/IntegerColumnPropertyGroup" } + }, + { + "if": { "properties": { "type": { "$ref": "#/$defs/NumberType" } } }, + "then": { "$ref": "#/$defs/NumberColumnProperty" } + }, + { + "if": { "properties": { "type": { "$ref": "#/$defs/StringType" } } }, + "then": { "$ref": "#/$defs/StringColumnPropertyGroup" } + }, + { + "if": { "properties": { "type": { "$ref": "#/$defs/ArrayType" } } }, + "then": { "$ref": "#/$defs/ArrayColumnProperty" } + }, + { + "if": { "properties": { "type": { "$ref": "#/$defs/ObjectType" } } }, + "then": { "$ref": "#/$defs/ObjectColumnPropertyGroup" } + } + ] + }, + "IntegerColumnPropertyGroup": { + "allOf": [ + { + "if": { "not": { "required": ["format"] } }, + "then": { "$ref": "#/$defs/IntegerColumnProperty" } + }, + { + "if": { "properties": { "format": { "const": "categorical" } } }, + "then": { "$ref": "#/$defs/IntegerCategoricalColumnProperty" } + } + ] + }, + "StringColumnPropertyGroup": { + "properties": { + "format": { + "enum": [ + "categorical", + "decimal", + "list", + "url", + "email", + "date", + "time", + "date-time", + "duration", + "wkt", + "wkb", + "hex", + "base64" + ] + } + }, + "allOf": [ + { + "if": { "not": { "required": ["format"] } }, + "then": { "$ref": "#/$defs/StringColumnProperty" } + }, + { + "if": { "properties": { "format": { "const": "categorical" } } }, + "then": { "$ref": "#/$defs/StringCategoricalColumnProperty" } + }, + { + "if": { "properties": { "format": { "const": "decimal" } } }, + "then": { "$ref": "#/$defs/DecimalColumnProperty" } + }, + { + "if": { "properties": { "format": { "const": "list" } } }, + "then": { "$ref": "#/$defs/ListColumnProperty" } + }, + { + "if": { "properties": { "format": { "const": "hex" } } }, + "then": { "$ref": "#/$defs/HexColumnProperty" } + }, + { + "if": { "properties": { "format": { "const": "base64" } } }, + "then": { "$ref": "#/$defs/Base64ColumnProperty" } + }, + { + "if": { "properties": { "format": { "const": "email" } } }, + "then": { "$ref": "#/$defs/EmailColumnProperty" } + }, + { + "if": { "properties": { "format": { "const": "url" } } }, + "then": { "$ref": "#/$defs/UrlColumnProperty" } + }, + { + "if": { "properties": { "format": { "const": "date-time" } } }, + "then": { "$ref": "#/$defs/DateTimeColumnProperty" } + }, + { + "if": { "properties": { "format": { "const": "date" } } }, + "then": { "$ref": "#/$defs/DateColumnProperty" } + }, + { + "if": { "properties": { "format": { "const": "time" } } }, + "then": { "$ref": "#/$defs/TimeColumnProperty" } + }, + { + "if": { "properties": { "format": { "const": "duration" } } }, + "then": { "$ref": "#/$defs/DurationColumnProperty" } + }, + { + "if": { "properties": { "format": { "const": "wkt" } } }, + "then": { "$ref": "#/$defs/WktColumnProperty" } + }, + { + "if": { "properties": { "format": { "const": "wkb" } } }, + "then": { "$ref": "#/$defs/WkbColumnProperty" } + } + ] + }, + "ObjectColumnPropertyGroup": { + "properties": { + "format": { + "enum": ["geojson", "topojson"] + } + }, + "allOf": [ + { + "if": { "not": { "required": ["format"] } }, + "then": { "$ref": "#/$defs/ObjectColumnProperty" } + }, + { + "if": { "properties": { "format": { "const": "geojson" } } }, + "then": { "$ref": "#/$defs/GeojsonColumnProperty" } + }, + { + "if": { "properties": { "format": { "const": "topojson" } } }, + "then": { "$ref": "#/$defs/TopojsonColumnProperty" } + } + ] + }, + "BooleanColumnProperty": { + "properties": { + "type": { "$ref": "#/$defs/BooleanType" }, + "enum": { "$ref": "#/$defs/BooleanEnum" }, + "const": { "$ref": "#/$defs/BooleanConst" }, + "default": { "$ref": "#/$defs/BooleanDefault" }, + "examples": { "$ref": "#/$defs/BooleanExamples" }, + "missingValues": { "$ref": "#/$defs/MissingValues" }, + "trueValues": { "$ref": "#/$defs/TrueValues" }, + "falseValues": { "$ref": "#/$defs/FalseValues" } + } + }, + "BaseIntegerColumnProperty": { + "properties": { + "type": { "$ref": "#/$defs/IntegerType" }, + "enum": { "$ref": "#/$defs/IntegerEnum" }, + "const": { "$ref": "#/$defs/IntegerConst" }, + "default": { "$ref": "#/$defs/IntegerDefault" }, + "examples": { "$ref": "#/$defs/IntegerExamples" }, + "missingValues": { "$ref": "#/$defs/MissingValues" }, + "minimum": { "$ref": "#/$defs/IntegerMinimum" }, + "maximum": { "$ref": "#/$defs/IntegerMaximum" }, + "exclusiveMinimum": { "$ref": "#/$defs/IntegerExclusiveMinimum" }, + "exclusiveMaximum": { "$ref": "#/$defs/IntegerExclusiveMaximum" }, + "multipleOf": { "$ref": "#/$defs/IntegerMultipleOf" }, + "groupChar": { "$ref": "#/$defs/GroupChar" }, + "withText": { "type": "boolean" }, + "categories": { "$ref": "#/$defs/IntegerCategories" }, + "categoriesOrdered": { "type": "boolean" } + } + }, + "IntegerColumnProperty": { + "allOf": [{ "$ref": "#/$defs/BaseIntegerColumnProperty" }] + }, + "IntegerCategoricalColumnProperty": { + "allOf": [{ "$ref": "#/$defs/BaseIntegerColumnProperty" }], + "properties": { + "format": { "const": "categorical" }, + "categories": { "$ref": "#/$defs/IntegerCategories" }, + "withOrder": { "type": "boolean" } + } + }, + "NumberColumnProperty": { + "properties": { + "type": { "$ref": "#/$defs/NumberType" }, + "enum": { "$ref": "#/$defs/NumberEnum" }, + "const": { "$ref": "#/$defs/NumberConst" }, + "default": { "$ref": "#/$defs/NumberDefault" }, + "examples": { "$ref": "#/$defs/NumberExamples" }, + "missingValues": { "$ref": "#/$defs/MissingValues" }, + "minimum": { "$ref": "#/$defs/NumberMinimum" }, + "maximum": { "$ref": "#/$defs/NumberMaximum" }, + "exclusiveMinimum": { "$ref": "#/$defs/NumberExclusiveMinimum" }, + "exclusiveMaximum": { "$ref": "#/$defs/NumberExclusiveMaximum" }, + "multipleOf": { "$ref": "#/$defs/NumberMultipleOf" }, + "decimalChar": { "$ref": "#/$defs/DecimalChar" }, + "groupChar": { "$ref": "#/$defs/GroupChar" }, + "withText": { "type": "boolean" } + } + }, + "BaseStringColumnProperty": { + "properties": { + "type": { "$ref": "#/$defs/StringType" }, + "enum": { "$ref": "#/$defs/StringEnum" }, + "const": { "$ref": "#/$defs/StringConst" }, + "default": { "$ref": "#/$defs/StringDefault" }, + "examples": { "$ref": "#/$defs/StringExamples" }, + "missingValues": { "$ref": "#/$defs/StringMissingValues" }, + "minLength": { "$ref": "#/$defs/MinLength" }, + "maxLength": { "$ref": "#/$defs/MaxLength" }, + "pattern": { "$ref": "#/$defs/Pattern" } + } + }, + "StringColumnProperty": { + "allOf": [{ "$ref": "#/$defs/BaseStringColumnProperty" }], + "properties": { + "categories": { "$ref": "#/$defs/StringCategories" }, + "categoriesOrdered": { "type": "boolean" } + } + }, + "StringCategoricalColumnProperty": { + "allOf": [{ "$ref": "#/$defs/BaseStringColumnProperty" }], + "properties": { + "format": { "const": "categorical" }, + "categories": { "$ref": "#/$defs/StringCategories" }, + "withOrder": { "type": "boolean" } + } + }, + "DecimalColumnProperty": { + "allOf": [{ "$ref": "#/$defs/BaseStringColumnProperty" }], + "properties": { + "format": { "const": "decimal" }, + "minimum": { "$ref": "#/$defs/NumberMinimum" }, + "maximum": { "$ref": "#/$defs/NumberMaximum" }, + "exclusiveMinimum": { "$ref": "#/$defs/NumberExclusiveMinimum" }, + "exclusiveMaximum": { "$ref": "#/$defs/NumberExclusiveMaximum" }, + "multipleOf": { "$ref": "#/$defs/NumberMultipleOf" }, + "decimalChar": { "$ref": "#/$defs/DecimalChar" }, + "groupChar": { "$ref": "#/$defs/GroupChar" }, + "withText": { "type": "boolean" } + } + }, + "ListColumnProperty": { + "allOf": [{ "$ref": "#/$defs/BaseStringColumnProperty" }], + "properties": { + "format": { "const": "list" }, + "itemType": { "$ref": "#/$defs/ListItemType" }, + "delimiter": { "$ref": "#/$defs/ListDelimiter" }, + "minItems": { "$ref": "#/$defs/MinItems" }, + "maxItems": { "$ref": "#/$defs/MaxItems" } + } + }, + "Base64ColumnProperty": { + "allOf": [{ "$ref": "#/$defs/BaseStringColumnProperty" }], + "properties": { + "format": { "const": "base64" } + } + }, + "HexColumnProperty": { + "allOf": [{ "$ref": "#/$defs/BaseStringColumnProperty" }], + "properties": { + "format": { "const": "hex" } + } + }, + "EmailColumnProperty": { + "allOf": [{ "$ref": "#/$defs/BaseStringColumnProperty" }], + "properties": { + "format": { "const": "email" } + } + }, + "UrlColumnProperty": { + "allOf": [{ "$ref": "#/$defs/BaseStringColumnProperty" }], + "properties": { + "format": { "const": "url" } + } + }, + "DateTimeColumnProperty": { + "allOf": [{ "$ref": "#/$defs/BaseStringColumnProperty" }], + "properties": { + "format": { "const": "date-time" }, + "temporalFormat": { "$ref": "#/$defs/TemporalFormat" } + } + }, + "DateColumnProperty": { + "allOf": [{ "$ref": "#/$defs/BaseStringColumnProperty" }], + "properties": { + "format": { "const": "date" }, + "temporalFormat": { "$ref": "#/$defs/TemporalFormat" } + } + }, + "TimeColumnProperty": { + "allOf": [{ "$ref": "#/$defs/BaseStringColumnProperty" }], + "properties": { + "format": { "const": "time" }, + "temporalFormat": { "$ref": "#/$defs/TemporalFormat" } + } + }, + "DurationColumnProperty": { + "allOf": [{ "$ref": "#/$defs/BaseStringColumnProperty" }], + "properties": { + "format": { "const": "duration" } + } + }, + "WktColumnProperty": { + "allOf": [{ "$ref": "#/$defs/BaseStringColumnProperty" }], + "properties": { + "format": { "const": "wkt" } + } + }, + "WkbColumnProperty": { + "allOf": [{ "$ref": "#/$defs/BaseStringColumnProperty" }], + "properties": { + "format": { "const": "wkb" } + } + }, + "ArrayColumnProperty": { + "allOf": [{ "$ref": "#/$defs/JsonSchema" }], + "properties": { + "type": { "$ref": "#/$defs/ArrayType" }, + "enum": { "$ref": "#/$defs/ArrayEnum" }, + "const": { "$ref": "#/$defs/ArrayConst" }, + "default": { "$ref": "#/$defs/ArrayDefault" }, + "examples": { "$ref": "#/$defs/ArrayExamples" }, + "missingValues": { "$ref": "#/$defs/StringMissingValues" } + } + }, + "BaseObjectColumnProperty": { + "allOf": [{ "$ref": "#/$defs/JsonSchema" }], + "properties": { + "type": { "$ref": "#/$defs/ObjectType" }, + "enum": { "$ref": "#/$defs/ObjectEnum" }, + "const": { "$ref": "#/$defs/ObjectConst" }, + "default": { "$ref": "#/$defs/ObjectDefault" }, + "examples": { "$ref": "#/$defs/ObjectExamples" }, + "missingValues": { "$ref": "#/$defs/StringMissingValues" } + } + }, + "ObjectColumnProperty": { + "allOf": [{ "$ref": "#/$defs/BaseObjectColumnProperty" }] + }, + "GeojsonColumnProperty": { + "allOf": [{ "$ref": "#/$defs/BaseObjectColumnProperty" }], + "properties": { + "format": { "const": "geojson" } + } + }, + "TopojsonColumnProperty": { + "allOf": [{ "$ref": "#/$defs/BaseObjectColumnProperty" }], + "properties": { + "format": { "const": "topojson" } + } + }, + "ExternalPath": { + "type": "string", + "pattern": "^https?://" + }, + "RdfType": { + "type": "string", + "format": "uri" + }, + "Pattern": { + "type": "string", + "format": "regex" + }, + "MinLength": { + "type": "integer", + "minimum": 0 + }, + "MaxLength": { + "type": "integer", + "minimum": 0 + }, + "MinItems": { + "type": "integer", + "minimum": 0 + }, + "MaxItems": { + "type": "integer", + "minimum": 0 + }, + "TrueValues": { + "type": "array", + "items": { "type": "string" } + }, + "FalseValues": { + "type": "array", + "items": { "type": "string" } + }, + "IntegerMinimum": { + "type": "integer" + }, + "IntegerMaximum": { + "type": "integer" + }, + "IntegerExclusiveMinimum": { + "type": "integer" + }, + "IntegerExclusiveMaximum": { + "type": "integer" + }, + "IntegerMultipleOf": { + "type": "integer", + "minimum": 1 + }, + "NumberMinimum": { + "type": "number" + }, + "NumberMaximum": { + "type": "number" + }, + "NumberExclusiveMinimum": { + "type": "number" + }, + "NumberExclusiveMaximum": { + "type": "number" + }, + "NumberMultipleOf": { + "type": "number", + "exclusiveMinimum": 0 + }, + "DecimalChar": { + "type": "string", + "maxLength": 1 + }, + "GroupChar": { + "type": "string", + "maxLength": 1 + }, + "TemporalFormat": { + "type": "string" + }, + "ListDelimiter": { + "type": "string", + "maxLength": 1 + }, + "ListItemType": { + "enum": [ + "string", + "integer", + "number", + "boolean", + "date-time", + "date", + "time" + ] + }, + "PrimaryKey": { + "type": "array", + "items": { "type": "string" }, + "minItems": 1 + }, + "UniqueKeys": { + "type": "array", + "items": { "$ref": "#/$defs/UniqueKey" }, + "minItems": 1 + }, + "UniqueKey": { + "type": "array", + "items": { "type": "string" }, + "minItems": 1 + }, + "ForeignKeys": { + "type": "array", + "items": { "$ref": "#/$defs/ForeignKey" }, + "minItems": 1 + }, + "ForeignKey": { + "type": "object", + "required": ["columns", "reference"], + "properties": { + "columns": { "$ref": "#/$defs/ForeignKeyColumns" }, + "reference": { "$ref": "#/$defs/ForeignKeyReference" } + } + }, + "ForeignKeyReference": { + "type": "object", + "required": ["columns"], + "properties": { + "resource": { "type": "string" }, + "columns": { "$ref": "#/$defs/ForeignKeyColumns" } + } + }, + "ForeignKeyColumns": { + "type": "array", + "items": { "type": "string" } + }, + "BooleanType": { + "enum": ["boolean", ["boolean", "null"], ["null", "boolean"]] + }, + "IntegerType": { + "enum": ["integer", ["integer", "null"], ["null", "integer"]] + }, + "NumberType": { + "enum": ["number", ["number", "null"], ["null", "number"]] + }, + "StringType": { + "const": ["string", ["string", "null"], ["null", "string"]] + }, + "ArrayType": { + "const": ["array", ["array", "null"], ["null", "array"]] + }, + "ObjectType": { + "const": ["object", ["object", "null"], ["null", "object"]] + }, + "MissingValues": { + "type": "array", + "items": { + "oneOf": [ + { "type": ["string", "integer"] }, + { + "type": "object", + "properties": { + "value": { "type": ["string", "integer"] }, + "label": { "type": "string" } + } + } + ] + } + }, + "StringMissingValues": { + "type": "array", + "items": { + "oneOf": [ + { "type": "string" }, + { + "type": "object", + "properties": { + "value": { "type": "string" }, + "label": { "type": "string" } + } + } + ] + } + }, + "BooleanExamples": { + "type": "array", + "items": { "type": "boolean" } + }, + "IntegerExamples": { + "type": "array", + "items": { "type": "integer" } + }, + "NumberExamples": { + "type": "array", + "items": { "type": "number" } + }, + "StringExamples": { + "type": "array", + "items": { "type": "string" } + }, + "ArrayExamples": { + "type": "array", + "items": { "type": "array" } + }, + "ObjectExamples": { + "type": "array", + "items": { "type": "object" } + }, + "UnknownExamples": { + "type": "array" + }, + "BooleanEnum": { + "type": "array", + "items": { "type": "boolean" } + }, + "IntegerEnum": { + "type": "array", + "items": { "type": "integer" } + }, + "NumberEnum": { + "type": "array", + "items": { "type": "number" } + }, + "StringEnum": { + "type": "array", + "items": { "type": "string" } + }, + "ArrayEnum": { + "type": "array", + "items": { "type": "array" } + }, + "ObjectEnum": { + "type": "array", + "items": { "type": "object" } + }, + "UnknownEnum": { + "type": "array" + }, + "BooleanConst": { + "type": "boolean" + }, + "IntegerConst": { + "type": "integer" + }, + "NumberConst": { + "type": "number" + }, + "StringConst": { + "type": "string" + }, + "ArrayConst": { + "type": "array" + }, + "ObjectConst": { + "type": "object" + }, + "UnknownConst": {}, + "BooleanDefault": { + "type": "boolean" + }, + "IntegerDefault": { + "type": "integer" + }, + "NumberDefault": { + "type": "number" + }, + "StringDefault": { + "type": "string" + }, + "ArrayDefault": { + "type": "array" + }, + "ObjectDefault": { + "type": "object" + }, + "UnknownDefault": {}, + "IntegerCategories": { + "type": "array", + "items": { + "oneOf": [ + { "type": "integer" }, + { + "type": "object", + "properties": { + "value": { "type": "integer" }, + "label": { "type": "string" } + } + } + ] + } + }, + "StringCategories": { + "type": "array", + "items": { + "oneOf": [ + { "type": "string" }, + { + "type": "object", + "properties": { + "value": { "type": "string" }, + "label": { "type": "string" } + } + } + ] + } + } + } +} diff --git a/metadata/fairspec_metadata/py.typed b/metadata/fairspec_metadata/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/metadata/fairspec_metadata/settings.py b/metadata/fairspec_metadata/settings.py new file mode 100644 index 0000000..452c51e --- /dev/null +++ b/metadata/fairspec_metadata/settings.py @@ -0,0 +1 @@ +FAIRSPEC_VERSION = "0.5.0" diff --git a/metadata/pyproject.toml b/metadata/pyproject.toml index 058b561..f56d588 100644 --- a/metadata/pyproject.toml +++ b/metadata/pyproject.toml @@ -24,6 +24,7 @@ classifiers = [ "Topic :: Software Development :: Libraries :: Python Modules", ] dependencies = [ + "jsonschema>=4.23", "pydantic>=2.12", "typing-extensions>=4.15", ] diff --git a/pyproject.toml b/pyproject.toml index d99e429..0aa98ac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,6 @@ requires-python = ">=3.12" [tool.ruff] lint.ignore = ["E501"] -line-length = 90 extend-exclude = [ "__pycache__", ".eggs", @@ -20,31 +19,49 @@ extend-exclude = [ commit_parser = "conventional" version_toml = [ "pyproject.toml:project.version", + "dataset/pyproject.toml:project.version", + "fairspec/pyproject.toml:project.version", + "library/pyproject.toml:project.version", "metadata/pyproject.toml:project.version", + "table/pyproject.toml:project.version", + "terminal/pyproject.toml:project.version", ] [tool.taskipy.tasks] format = "ruff format" +install = "uv sync --all-packages" lint = "ruff check" spec = "pytest" +start = "pnpm --dir website install && pnpm --dir website start" test = "task lint && task type && task spec" type = "ty check" +[tool.pytest.ini_options] +addopts = "--import-mode=importlib" +python_files = "*_spec.py" + [tool.ty.rules] index-out-of-bounds = "ignore" [tool.uv] package = false dev-dependencies = [ - "python-semantic-release", - "pytest", - "ruff", - "taskipy", - "ty", + "pre-commit==4.2.0", + "python-semantic-release==10.5.3", + "pytest==9.0.2", + "ruff==0.15.0", + "taskipy==1.14.1", + "ty==0.0.15", + "pytest-recording==0.13.4", ] [tool.uv.workspace] -members = ["metadata"] +members = ["dataset", "fairspec", "library", "metadata", "table", "terminal"] [tool.uv.sources] +fairspec = { workspace = true } +fairspec-dataset = { workspace = true } +fairspec-library = { workspace = true } fairspec-metadata = { workspace = true } +fairspec-table = { workspace = true } +fairspec-terminal = { workspace = true } diff --git a/table/README.md b/table/README.md new file mode 100644 index 0000000..ff2c788 --- /dev/null +++ b/table/README.md @@ -0,0 +1,3 @@ +# fairspec-table + +Fairspec Python is a fast data management framework built on top of the Fairspec standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please read the [project's documentation](https://python.fairspec.org). diff --git a/table/fairspec_table/__init__.py b/table/fairspec_table/__init__.py new file mode 100644 index 0000000..0875edb --- /dev/null +++ b/table/fairspec_table/__init__.py @@ -0,0 +1,85 @@ +from .actions.table.denormalize import denormalize_table +from .actions.table.inspect import inspect_table +from .actions.table.normalize import normalize_table +from .actions.table.query import query_table +from .actions.table_schema.infer import ( + infer_table_schema_from_sample, + infer_table_schema_from_table, +) +from .models.column import ColumnMapping, DenormalizeColumnOptions, PolarsColumn +from .models.data import DataRecord, DataRow +from .models.frame import Frame +from .models.schema import ( + InferTableSchemaOptions, + PolarsSchema, + SchemaMapping, + TableSchemaOptions, +) +from .models.table import Table +from .plugin import TablePlugin +from .plugins.arrow import ArrowPlugin, load_arrow_table, save_arrow_table +from .plugins.inline import InlinePlugin, load_inline_table +from .plugins.csv import ( + CsvPlugin, + infer_csv_file_dialect, + load_csv_table, + save_csv_table, +) +from .plugins.json import ( + JsonPlugin, + infer_json_file_dialect, + load_json_table, + save_json_table, +) +from .plugins.parquet import ParquetPlugin, load_parquet_table, save_parquet_table +from .plugins.sqlite import SqlitePlugin, load_sqlite_table, save_sqlite_table +from .plugins.xlsx import ( + XlsxPlugin, + infer_xlsx_file_dialect, + load_xlsx_table, + save_xlsx_table, +) + +__all__ = [ + "ArrowPlugin", + "CsvPlugin", + "ColumnMapping", + "DataRecord", + "DataRow", + "DenormalizeColumnOptions", + "Frame", + "InferTableSchemaOptions", + "InlinePlugin", + "JsonPlugin", + "ParquetPlugin", + "PolarsColumn", + "PolarsSchema", + "SchemaMapping", + "SqlitePlugin", + "Table", + "TablePlugin", + "TableSchemaOptions", + "denormalize_table", + "infer_csv_file_dialect", + "infer_table_schema_from_sample", + "infer_table_schema_from_table", + "infer_json_file_dialect", + "inspect_table", + "load_arrow_table", + "load_csv_table", + "load_inline_table", + "load_json_table", + "load_parquet_table", + "load_sqlite_table", + "normalize_table", + "query_table", + "save_arrow_table", + "save_csv_table", + "save_json_table", + "save_parquet_table", + "save_sqlite_table", + "XlsxPlugin", + "infer_xlsx_file_dialect", + "load_xlsx_table", + "save_xlsx_table", +] diff --git a/table/fairspec_table/actions/__init__.py b/table/fairspec_table/actions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/actions/column/__init__.py b/table/fairspec_table/actions/column/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/actions/column/checks/__init__.py b/table/fairspec_table/actions/column/checks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/actions/column/checks/const.py b/table/fairspec_table/actions/column/checks/const.py new file mode 100644 index 0000000..e1e0745 --- /dev/null +++ b/table/fairspec_table/actions/column/checks/const.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass + +import polars as pl +from fairspec_metadata import CellConstError +from fairspec_metadata import Column + +from fairspec_table.models import CellMapping + + +@dataclass +class ColumnCheck: + is_error_expr: pl.Expr + error_template: CellConstError + + +def check_cell_const(column: Column, mapping: CellMapping) -> ColumnCheck | None: + const_constraint = column.property.const + if const_constraint is None: + return None + + primitive_const_constraint = ( + json.dumps(const_constraint) + if isinstance(const_constraint, (dict, list)) + else const_constraint + ) + + is_error_expr = mapping.target.eq(pl.lit(primitive_const_constraint)).not_() + + error_template = CellConstError( + type="cell/const", + columnName=column.name, + rowNumber=0, + cell="", + **{"const": str(primitive_const_constraint)}, + ) + + return ColumnCheck(is_error_expr=is_error_expr, error_template=error_template) diff --git a/table/fairspec_table/actions/column/checks/const_spec.py b/table/fairspec_table/actions/column/checks/const_spec.py new file mode 100644 index 0000000..6c02c6c --- /dev/null +++ b/table/fairspec_table/actions/column/checks/const_spec.py @@ -0,0 +1,170 @@ +from __future__ import annotations + +import polars as pl +from fairspec_metadata import ( + BooleanColumn, + BooleanColumnProperty, +) +from fairspec_metadata import ( + IntegerColumn, + IntegerColumnProperty, +) +from fairspec_metadata import ( + NumberColumn, + NumberColumnProperty, +) +from fairspec_metadata import ( + StringColumn, + StringColumnProperty, +) + +from fairspec_table.models import CellMapping + +from .const import check_cell_const + + +class TestCheckCellConst: + def test_returns_none_when_no_const(self): + column = StringColumn( + name="status", + type="string", + property=StringColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + + result = check_cell_const(column, mapping) + + assert result is None + + def test_string_const_match(self): + column = StringColumn( + name="status", + type="string", + property=StringColumnProperty(const="active"), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + { + "source": ["active", "active", "active"], + "target": ["active", "active", "active"], + } + ).lazy() + + result = check_cell_const(column, mapping) + + assert result is not None + assert result.error_template.type == "cell/const" + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 0 + + def test_string_const_mismatch(self): + column = StringColumn( + name="status", + type="string", + property=StringColumnProperty(const="active"), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + { + "source": ["active", "inactive", "active", "pending"], + "target": ["active", "inactive", "active", "pending"], + } + ).lazy() + + result = check_cell_const(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 2 + + def test_null_values_not_flagged(self): + column = StringColumn( + name="status", + type="string", + property=StringColumnProperty(const="active"), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + { + "source": ["active", None, "active", None], + "target": ["active", None, "active", None], + } + ).lazy() + + result = check_cell_const(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 0 + + def test_case_sensitivity(self): + column = StringColumn( + name="status", + type="string", + property=StringColumnProperty(const="active"), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + { + "source": ["Active", "ACTIVE", "active"], + "target": ["Active", "ACTIVE", "active"], + } + ).lazy() + + result = check_cell_const(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 2 + + def test_integer_const(self): + column = IntegerColumn( + name="priority", + type="integer", + property=IntegerColumnProperty(const=1), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame({"source": [1, 1, 2], "target": [1, 1, 2]}).lazy() + + result = check_cell_const(column, mapping) + + assert result is not None + assert result.error_template.model_dump()["const"] == "1" + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 1 + + def test_number_const(self): + column = NumberColumn( + name="rate", + type="number", + property=NumberColumnProperty(const=1.5), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": [1.5, 1.5, 2.5], "target": [1.5, 1.5, 2.5]} + ).lazy() + + result = check_cell_const(column, mapping) + + assert result is not None + assert result.error_template.model_dump()["const"] == "1.5" + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 1 + + def test_boolean_const(self): + column = BooleanColumn( + name="enabled", + type="boolean", + property=BooleanColumnProperty(const=True), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": [True, True, False], "target": [True, True, False]} + ).lazy() + + result = check_cell_const(column, mapping) + + assert result is not None + assert result.error_template.model_dump()["const"] == "True" + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 1 diff --git a/table/fairspec_table/actions/column/checks/enum.py b/table/fairspec_table/actions/column/checks/enum.py new file mode 100644 index 0000000..a50018d --- /dev/null +++ b/table/fairspec_table/actions/column/checks/enum.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass + +import polars as pl +from fairspec_metadata import CellEnumError +from fairspec_metadata import Column + +from fairspec_table.models import CellMapping + + +@dataclass +class ColumnCheck: + is_error_expr: pl.Expr + error_template: CellEnumError + + +def check_cell_enum(column: Column, mapping: CellMapping) -> ColumnCheck | None: + enum_constraint = column.property.enum + if enum_constraint is None: + return None + + primitive_enum_constraint = [ + json.dumps(item) if isinstance(item, (dict, list)) else item + for item in enum_constraint + ] + + is_error_expr = mapping.target.is_in(primitive_enum_constraint).not_() + + error_template = CellEnumError( + type="cell/enum", + columnName=column.name, + enum=[str(item) for item in enum_constraint], + rowNumber=0, + cell="", + ) + + return ColumnCheck(is_error_expr=is_error_expr, error_template=error_template) diff --git a/table/fairspec_table/actions/column/checks/enum_spec.py b/table/fairspec_table/actions/column/checks/enum_spec.py new file mode 100644 index 0000000..730a85c --- /dev/null +++ b/table/fairspec_table/actions/column/checks/enum_spec.py @@ -0,0 +1,156 @@ +from __future__ import annotations + +import polars as pl +from fairspec_metadata import ( + IntegerColumn, + IntegerColumnProperty, +) +from fairspec_metadata import ( + NumberColumn, + NumberColumnProperty, +) +from fairspec_metadata import ( + StringColumn, + StringColumnProperty, +) + +from fairspec_table.models import CellMapping + +from .enum import check_cell_enum + + +class TestCheckCellEnum: + def test_returns_none_when_no_enum(self): + column = StringColumn( + name="status", + type="string", + property=StringColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + + result = check_cell_enum(column, mapping) + + assert result is None + + def test_string_values_in_enum(self): + column = StringColumn( + name="status", + type="string", + property=StringColumnProperty( + type="string", enum=["pending", "approved", "rejected"] + ), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + { + "source": ["pending", "approved", "rejected", "pending"], + "target": ["pending", "approved", "rejected", "pending"], + } + ).lazy() + + result = check_cell_enum(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 0 + + def test_values_not_in_enum(self): + column = StringColumn( + name="status", + type="string", + property=StringColumnProperty( + type="string", enum=["pending", "approved", "rejected"] + ), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + { + "source": ["pending", "approved", "unknown", "cancelled", "rejected"], + "target": ["pending", "approved", "unknown", "cancelled", "rejected"], + } + ).lazy() + + result = check_cell_enum(column, mapping) + + assert result is not None + assert result.error_template.type == "cell/enum" + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 2 + + def test_null_values_not_flagged(self): + column = StringColumn( + name="status", + type="string", + property=StringColumnProperty( + type="string", enum=["pending", "approved", "rejected"] + ), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + { + "source": ["pending", None, "approved", None], + "target": ["pending", None, "approved", None], + } + ).lazy() + + result = check_cell_enum(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 0 + + def test_case_sensitivity(self): + column = StringColumn( + name="status", + type="string", + property=StringColumnProperty( + type="string", enum=["pending", "approved", "rejected"] + ), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + { + "source": ["Pending", "APPROVED", "rejected"], + "target": ["Pending", "APPROVED", "rejected"], + } + ).lazy() + + result = check_cell_enum(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 2 + + def test_integer_enum(self): + column = IntegerColumn( + name="priority", + type="integer", + property=IntegerColumnProperty(enum=[1, 2, 3]), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame({"source": [1, 2, 5], "target": [1, 2, 5]}).lazy() + + result = check_cell_enum(column, mapping) + + assert result is not None + assert result.error_template.enum == ["1", "2", "3"] + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 1 + + def test_number_enum(self): + column = NumberColumn( + name="rating", + type="number", + property=NumberColumnProperty(enum=[1.5, 2.5, 3.5]), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": [1.5, 2.5, 4.5], "target": [1.5, 2.5, 4.5]} + ).lazy() + + result = check_cell_enum(column, mapping) + + assert result is not None + assert result.error_template.enum == ["1.5", "2.5", "3.5"] + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 1 diff --git a/table/fairspec_table/actions/column/checks/max_items.py b/table/fairspec_table/actions/column/checks/max_items.py new file mode 100644 index 0000000..5fc66c3 --- /dev/null +++ b/table/fairspec_table/actions/column/checks/max_items.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import polars as pl +from fairspec_metadata import CellMaxItemsError +from fairspec_metadata import Column + +from fairspec_table.models import CellMapping + + +@dataclass +class ColumnCheck: + is_error_expr: pl.Expr + error_template: CellMaxItemsError + + +def check_cell_max_items(column: Column, mapping: CellMapping) -> ColumnCheck | None: + if column.type != "list": + return None + + max_items = getattr(column.property, "maxItems", None) + if max_items is None: + return None + + is_error_expr = mapping.target.list.len().gt(max_items) + + error_template = CellMaxItemsError( + type="cell/maxItems", + columnName=column.name, + maxItems=max_items, + rowNumber=0, + cell="", + ) + + return ColumnCheck(is_error_expr=is_error_expr, error_template=error_template) diff --git a/table/fairspec_table/actions/column/checks/max_items_spec.py b/table/fairspec_table/actions/column/checks/max_items_spec.py new file mode 100644 index 0000000..fee1be2 --- /dev/null +++ b/table/fairspec_table/actions/column/checks/max_items_spec.py @@ -0,0 +1,125 @@ +from __future__ import annotations + +import polars as pl +from fairspec_metadata import ListColumn, ListColumnProperty +from fairspec_metadata import ( + StringColumn, + StringColumnProperty, +) + +from fairspec_table.models import CellMapping + +from .max_items import check_cell_max_items + + +class TestCheckCellMaxItems: + def test_returns_none_for_non_list_column(self): + column = StringColumn( + name="tags", + type="string", + property=StringColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + + result = check_cell_max_items(column, mapping) + + assert result is None + + def test_returns_none_when_no_max_items(self): + column = ListColumn( + name="tags", + type="list", + property=ListColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + + result = check_cell_max_items(column, mapping) + + assert result is None + + def test_values_within_max_items(self): + column = ListColumn( + name="tags", + type="list", + property=ListColumnProperty(maxItems=3), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + { + "source": ["a,b", "x,y,z", "1"], + "target": [["a", "b"], ["x", "y", "z"], ["1"]], + } + ).lazy() + + result = check_cell_max_items(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 0 + + def test_values_exceeding_max_items(self): + column = ListColumn( + name="tags", + type="list", + property=ListColumnProperty(maxItems=3), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + { + "source": ["a,b", "x,y,z,w", "1,2,3,4,5", "p,q"], + "target": [ + ["a", "b"], + ["x", "y", "z", "w"], + ["1", "2", "3", "4", "5"], + ["p", "q"], + ], + } + ).lazy() + + result = check_cell_max_items(column, mapping) + + assert result is not None + assert result.error_template.type == "cell/maxItems" + assert result.error_template.maxItems == 3 + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 2 + + def test_null_values_not_flagged(self): + column = ListColumn( + name="tags", + type="list", + property=ListColumnProperty(maxItems=2), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + { + "source": ["a,b", None, "x,y,z,w,v"], + "target": [["a", "b"], None, ["x", "y", "z", "w", "v"]], + } + ).lazy() + + result = check_cell_max_items(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 1 + + def test_max_items_of_one(self): + column = ListColumn( + name="tags", + type="list", + property=ListColumnProperty(maxItems=1), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + { + "source": ["a", "b,c", "d"], + "target": [["a"], ["b", "c"], ["d"]], + } + ).lazy() + + result = check_cell_max_items(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 1 diff --git a/table/fairspec_table/actions/column/checks/max_length.py b/table/fairspec_table/actions/column/checks/max_length.py new file mode 100644 index 0000000..eeb6ffb --- /dev/null +++ b/table/fairspec_table/actions/column/checks/max_length.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import polars as pl +from fairspec_metadata import CellMaxLengthError +from fairspec_metadata import Column + +from fairspec_table.models import CellMapping + + +@dataclass +class ColumnCheck: + is_error_expr: pl.Expr + error_template: CellMaxLengthError + + +def check_cell_max_length(column: Column, mapping: CellMapping) -> ColumnCheck | None: + max_length = getattr(column.property, "maxLength", None) + if not max_length: + return None + + is_error_expr = mapping.source.str.len_chars().gt(max_length) + + error_template = CellMaxLengthError( + type="cell/maxLength", + columnName=column.name, + maxLength=max_length, + rowNumber=0, + cell="", + ) + + return ColumnCheck(is_error_expr=is_error_expr, error_template=error_template) diff --git a/table/fairspec_table/actions/column/checks/max_length_spec.py b/table/fairspec_table/actions/column/checks/max_length_spec.py new file mode 100644 index 0000000..060d91c --- /dev/null +++ b/table/fairspec_table/actions/column/checks/max_length_spec.py @@ -0,0 +1,81 @@ +from __future__ import annotations + +import polars as pl +from fairspec_metadata import ( + IntegerColumn, + IntegerColumnProperty, +) +from fairspec_metadata import ( + StringColumn, + StringColumnProperty, +) + +from fairspec_table.models import CellMapping + +from .max_length import check_cell_max_length + + +class TestCheckCellMaxLength: + def test_returns_none_when_no_max_length(self): + column = StringColumn( + name="code", + type="string", + property=StringColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + + result = check_cell_max_length(column, mapping) + + assert result is None + + def test_returns_none_for_column_without_max_length_field(self): + column = IntegerColumn( + name="id", + type="integer", + property=IntegerColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + + result = check_cell_max_length(column, mapping) + + assert result is None + + def test_values_within_max_length(self): + column = StringColumn( + name="code", + type="string", + property=StringColumnProperty(maxLength=4), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": ["A123", "B456", "C789"], "target": ["A123", "B456", "C789"]} + ).lazy() + + result = check_cell_max_length(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 0 + + def test_values_exceeding_max_length(self): + column = StringColumn( + name="username", + type="string", + property=StringColumnProperty(maxLength=8), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + { + "source": ["bob", "alice", "christopher", "david"], + "target": ["bob", "alice", "christopher", "david"], + } + ).lazy() + + result = check_cell_max_length(column, mapping) + + assert result is not None + assert result.error_template.type == "cell/maxLength" + assert result.error_template.maxLength == 8 + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 1 + assert errors["source"][0] == "christopher" diff --git a/table/fairspec_table/actions/column/checks/maximum.py b/table/fairspec_table/actions/column/checks/maximum.py new file mode 100644 index 0000000..22aca29 --- /dev/null +++ b/table/fairspec_table/actions/column/checks/maximum.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Callable + +import polars as pl +from fairspec_metadata import CellExclusiveMaximumError, CellMaximumError +from fairspec_metadata import Column + +from fairspec_table.models import CellMapping + + +@dataclass +class ColumnCheck: + is_error_expr: pl.Expr + error_template: CellMaximumError | CellExclusiveMaximumError + + +def create_check_cell_maximum( + *, is_exclusive: bool = False +) -> Callable[[Column, CellMapping], ColumnCheck | None]: + def check_cell_maximum(column: Column, mapping: CellMapping) -> ColumnCheck | None: + if not hasattr(column.property, "maximum") and not hasattr( + column.property, "exclusiveMaximum" + ): + return None + + maximum = ( + getattr(column.property, "exclusiveMaximum", None) + if is_exclusive + else getattr(column.property, "maximum", None) + ) + if maximum is None: + return None + + is_error_expr = ( + mapping.target.ge(maximum) if is_exclusive else mapping.target.gt(maximum) + ) + + error_template: CellMaximumError | CellExclusiveMaximumError + if is_exclusive: + error_template = CellExclusiveMaximumError( + type="cell/exclusiveMaximum", + columnName=column.name, + maximum=str(maximum), + rowNumber=0, + cell="", + ) + else: + error_template = CellMaximumError( + type="cell/maximum", + columnName=column.name, + maximum=str(maximum), + rowNumber=0, + cell="", + ) + + return ColumnCheck(is_error_expr=is_error_expr, error_template=error_template) + + return check_cell_maximum diff --git a/table/fairspec_table/actions/column/checks/maximum_spec.py b/table/fairspec_table/actions/column/checks/maximum_spec.py new file mode 100644 index 0000000..866f49a --- /dev/null +++ b/table/fairspec_table/actions/column/checks/maximum_spec.py @@ -0,0 +1,142 @@ +from __future__ import annotations + +import polars as pl +from fairspec_metadata import ( + IntegerColumn, + IntegerColumnProperty, +) +from fairspec_metadata import ( + NumberColumn, + NumberColumnProperty, +) +from fairspec_metadata import ( + StringColumn, + StringColumnProperty, +) + +from fairspec_table.models import CellMapping + +from .maximum import create_check_cell_maximum + + +class TestCheckCellMaximum: + def test_returns_none_when_no_maximum(self): + column = NumberColumn( + name="price", + type="number", + property=NumberColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + check = create_check_cell_maximum() + + result = check(column, mapping) + + assert result is None + + def test_returns_none_for_column_without_maximum_field(self): + column = StringColumn( + name="name", + type="string", + property=StringColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + check = create_check_cell_maximum() + + result = check(column, mapping) + + assert result is None + + def test_values_within_maximum(self): + column = NumberColumn( + name="price", + type="number", + property=NumberColumnProperty(maximum=50), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": [10.5, 20.75, 30.0], "target": [10.5, 20.75, 30.0]} + ).lazy() + check = create_check_cell_maximum() + + result = check(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 0 + + def test_values_exceeding_maximum(self): + column = NumberColumn( + name="temperature", + type="number", + property=NumberColumnProperty(maximum=40), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": [20.5, 30.0, 40.0, 50.5], "target": [20.5, 30.0, 40.0, 50.5]} + ).lazy() + check = create_check_cell_maximum() + + result = check(column, mapping) + + assert result is not None + assert result.error_template.type == "cell/maximum" + assert result.error_template.maximum == "40.0" + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 1 + + def test_exclusive_maximum(self): + column = NumberColumn( + name="temperature", + type="number", + property=NumberColumnProperty(exclusiveMaximum=40), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": [20.5, 30.0, 40.0, 50.5], "target": [20.5, 30.0, 40.0, 50.5]} + ).lazy() + check = create_check_cell_maximum(is_exclusive=True) + + result = check(column, mapping) + + assert result is not None + assert result.error_template.type == "cell/exclusiveMaximum" + assert result.error_template.maximum == "40.0" + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 2 + + def test_integer_maximum(self): + column = IntegerColumn( + name="year", + type="integer", + property=IntegerColumnProperty(maximum=2022), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": [2020, 2021, 2023], "target": [2020, 2021, 2023]} + ).lazy() + check = create_check_cell_maximum() + + result = check(column, mapping) + + assert result is not None + assert result.error_template.maximum == "2022" + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 1 + + def test_integer_exclusive_maximum(self): + column = IntegerColumn( + name="year", + type="integer", + property=IntegerColumnProperty(exclusiveMaximum=2022), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": [2020, 2021, 2022, 2023], "target": [2020, 2021, 2022, 2023]} + ).lazy() + check = create_check_cell_maximum(is_exclusive=True) + + result = check(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 2 diff --git a/table/fairspec_table/actions/column/checks/min_items.py b/table/fairspec_table/actions/column/checks/min_items.py new file mode 100644 index 0000000..fe73cf1 --- /dev/null +++ b/table/fairspec_table/actions/column/checks/min_items.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import polars as pl +from fairspec_metadata import CellMinItemsError +from fairspec_metadata import Column + +from fairspec_table.models import CellMapping + + +@dataclass +class ColumnCheck: + is_error_expr: pl.Expr + error_template: CellMinItemsError + + +def check_cell_min_items(column: Column, mapping: CellMapping) -> ColumnCheck | None: + if column.type != "list": + return None + + min_items = getattr(column.property, "minItems", None) + if min_items is None: + return None + + is_error_expr = mapping.target.list.len().lt(min_items) + + error_template = CellMinItemsError( + type="cell/minItems", + columnName=column.name, + minItems=min_items, + rowNumber=0, + cell="", + ) + + return ColumnCheck(is_error_expr=is_error_expr, error_template=error_template) diff --git a/table/fairspec_table/actions/column/checks/min_items_spec.py b/table/fairspec_table/actions/column/checks/min_items_spec.py new file mode 100644 index 0000000..08ca38c --- /dev/null +++ b/table/fairspec_table/actions/column/checks/min_items_spec.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +import polars as pl +from fairspec_metadata import ListColumn, ListColumnProperty +from fairspec_metadata import ( + StringColumn, + StringColumnProperty, +) + +from fairspec_table.models import CellMapping + +from .min_items import check_cell_min_items + + +class TestCheckCellMinItems: + def test_returns_none_for_non_list_column(self): + column = StringColumn( + name="tags", + type="string", + property=StringColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + + result = check_cell_min_items(column, mapping) + + assert result is None + + def test_returns_none_when_no_min_items(self): + column = ListColumn( + name="tags", + type="list", + property=ListColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + + result = check_cell_min_items(column, mapping) + + assert result is None + + def test_values_meeting_min_items(self): + column = ListColumn( + name="tags", + type="list", + property=ListColumnProperty(minItems=3), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + { + "source": ["a,b,c", "x,y,z", "1,2,3"], + "target": [ + ["a", "b", "c"], + ["x", "y", "z"], + ["1", "2", "3"], + ], + } + ).lazy() + + result = check_cell_min_items(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 0 + + def test_values_below_min_items(self): + column = ListColumn( + name="tags", + type="list", + property=ListColumnProperty(minItems=3), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + { + "source": ["a,b,c", "x", "1,2", "p,q,r,s"], + "target": [ + ["a", "b", "c"], + ["x"], + ["1", "2"], + ["p", "q", "r", "s"], + ], + } + ).lazy() + + result = check_cell_min_items(column, mapping) + + assert result is not None + assert result.error_template.type == "cell/minItems" + assert result.error_template.minItems == 3 + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 2 + + def test_null_values_not_flagged(self): + column = ListColumn( + name="tags", + type="list", + property=ListColumnProperty(minItems=3), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + { + "source": ["a,b,c", None, "x,y,z"], + "target": [["a", "b", "c"], None, ["x", "y", "z"]], + } + ).lazy() + + result = check_cell_min_items(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 0 diff --git a/table/fairspec_table/actions/column/checks/min_length.py b/table/fairspec_table/actions/column/checks/min_length.py new file mode 100644 index 0000000..12e7540 --- /dev/null +++ b/table/fairspec_table/actions/column/checks/min_length.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import polars as pl +from fairspec_metadata import CellMinLengthError +from fairspec_metadata import Column + +from fairspec_table.models import CellMapping + + +@dataclass +class ColumnCheck: + is_error_expr: pl.Expr + error_template: CellMinLengthError + + +def check_cell_min_length(column: Column, mapping: CellMapping) -> ColumnCheck | None: + min_length = getattr(column.property, "minLength", None) + if not min_length: + return None + + is_error_expr = mapping.source.str.len_chars().lt(min_length) + + error_template = CellMinLengthError( + type="cell/minLength", + columnName=column.name, + minLength=min_length, + rowNumber=0, + cell="", + ) + + return ColumnCheck(is_error_expr=is_error_expr, error_template=error_template) diff --git a/table/fairspec_table/actions/column/checks/min_length_spec.py b/table/fairspec_table/actions/column/checks/min_length_spec.py new file mode 100644 index 0000000..c76430d --- /dev/null +++ b/table/fairspec_table/actions/column/checks/min_length_spec.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +import polars as pl +from fairspec_metadata import ( + IntegerColumn, + IntegerColumnProperty, +) +from fairspec_metadata import ( + StringColumn, + StringColumnProperty, +) + +from fairspec_table.models import CellMapping + +from .min_length import check_cell_min_length + + +class TestCheckCellMinLength: + def test_returns_none_when_no_min_length(self): + column = StringColumn( + name="code", + type="string", + property=StringColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + + result = check_cell_min_length(column, mapping) + + assert result is None + + def test_returns_none_for_column_without_min_length_field(self): + column = IntegerColumn( + name="id", + type="integer", + property=IntegerColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + + result = check_cell_min_length(column, mapping) + + assert result is None + + def test_values_meeting_min_length(self): + column = StringColumn( + name="code", + type="string", + property=StringColumnProperty(minLength=3), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": ["A123", "B456", "C789"], "target": ["A123", "B456", "C789"]} + ).lazy() + + result = check_cell_min_length(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 0 + + def test_values_below_min_length(self): + column = StringColumn( + name="username", + type="string", + property=StringColumnProperty(minLength=3), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + { + "source": ["bob", "a", "christopher", "ab"], + "target": ["bob", "a", "christopher", "ab"], + } + ).lazy() + + result = check_cell_min_length(column, mapping) + + assert result is not None + assert result.error_template.type == "cell/minLength" + assert result.error_template.minLength == 3 + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 2 + + def test_null_values_not_flagged(self): + column = StringColumn( + name="code", + type="string", + property=StringColumnProperty(minLength=3), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": ["ABCD", "A", None], "target": ["ABCD", "A", None]} + ).lazy() + + result = check_cell_min_length(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 1 diff --git a/table/fairspec_table/actions/column/checks/minimum.py b/table/fairspec_table/actions/column/checks/minimum.py new file mode 100644 index 0000000..5a6c308 --- /dev/null +++ b/table/fairspec_table/actions/column/checks/minimum.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Callable + +import polars as pl +from fairspec_metadata import CellExclusiveMinimumError, CellMinimumError +from fairspec_metadata import Column + +from fairspec_table.models import CellMapping + + +@dataclass +class ColumnCheck: + is_error_expr: pl.Expr + error_template: CellMinimumError | CellExclusiveMinimumError + + +def create_check_cell_minimum( + *, is_exclusive: bool = False +) -> Callable[[Column, CellMapping], ColumnCheck | None]: + def check_cell_minimum(column: Column, mapping: CellMapping) -> ColumnCheck | None: + if not hasattr(column.property, "minimum") and not hasattr( + column.property, "exclusiveMinimum" + ): + return None + + minimum = ( + getattr(column.property, "exclusiveMinimum", None) + if is_exclusive + else getattr(column.property, "minimum", None) + ) + if minimum is None: + return None + + is_error_expr = ( + mapping.target.le(minimum) if is_exclusive else mapping.target.lt(minimum) + ) + + error_template: CellMinimumError | CellExclusiveMinimumError + if is_exclusive: + error_template = CellExclusiveMinimumError( + type="cell/exclusiveMinimum", + columnName=column.name, + minimum=str(minimum), + rowNumber=0, + cell="", + ) + else: + error_template = CellMinimumError( + type="cell/minimum", + columnName=column.name, + minimum=str(minimum), + rowNumber=0, + cell="", + ) + + return ColumnCheck(is_error_expr=is_error_expr, error_template=error_template) + + return check_cell_minimum diff --git a/table/fairspec_table/actions/column/checks/minimum_spec.py b/table/fairspec_table/actions/column/checks/minimum_spec.py new file mode 100644 index 0000000..21c3de2 --- /dev/null +++ b/table/fairspec_table/actions/column/checks/minimum_spec.py @@ -0,0 +1,142 @@ +from __future__ import annotations + +import polars as pl +from fairspec_metadata import ( + IntegerColumn, + IntegerColumnProperty, +) +from fairspec_metadata import ( + NumberColumn, + NumberColumnProperty, +) +from fairspec_metadata import ( + StringColumn, + StringColumnProperty, +) + +from fairspec_table.models import CellMapping + +from .minimum import create_check_cell_minimum + + +class TestCheckCellMinimum: + def test_returns_none_when_no_minimum(self): + column = NumberColumn( + name="price", + type="number", + property=NumberColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + check = create_check_cell_minimum() + + result = check(column, mapping) + + assert result is None + + def test_returns_none_for_column_without_minimum_field(self): + column = StringColumn( + name="name", + type="string", + property=StringColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + check = create_check_cell_minimum() + + result = check(column, mapping) + + assert result is None + + def test_values_above_minimum(self): + column = NumberColumn( + name="price", + type="number", + property=NumberColumnProperty(minimum=5), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": [10.5, 20.75, 30.0], "target": [10.5, 20.75, 30.0]} + ).lazy() + check = create_check_cell_minimum() + + result = check(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 0 + + def test_values_below_minimum(self): + column = NumberColumn( + name="temperature", + type="number", + property=NumberColumnProperty(minimum=10), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": [20.5, 30.0, 40.0, 3.5], "target": [20.5, 30.0, 40.0, 3.5]} + ).lazy() + check = create_check_cell_minimum() + + result = check(column, mapping) + + assert result is not None + assert result.error_template.type == "cell/minimum" + assert result.error_template.minimum == "10.0" + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 1 + + def test_exclusive_minimum(self): + column = NumberColumn( + name="temperature", + type="number", + property=NumberColumnProperty(exclusiveMinimum=10), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": [20.5, 30.0, 10.0, 5.5], "target": [20.5, 30.0, 10.0, 5.5]} + ).lazy() + check = create_check_cell_minimum(is_exclusive=True) + + result = check(column, mapping) + + assert result is not None + assert result.error_template.type == "cell/exclusiveMinimum" + assert result.error_template.minimum == "10.0" + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 2 + + def test_integer_minimum(self): + column = IntegerColumn( + name="year", + type="integer", + property=IntegerColumnProperty(minimum=2019), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": [2020, 2021, 2018], "target": [2020, 2021, 2018]} + ).lazy() + check = create_check_cell_minimum() + + result = check(column, mapping) + + assert result is not None + assert result.error_template.minimum == "2019" + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 1 + + def test_integer_exclusive_minimum(self): + column = IntegerColumn( + name="year", + type="integer", + property=IntegerColumnProperty(exclusiveMinimum=2019), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": [2020, 2021, 2019, 2018], "target": [2020, 2021, 2019, 2018]} + ).lazy() + check = create_check_cell_minimum(is_exclusive=True) + + result = check(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 2 diff --git a/table/fairspec_table/actions/column/checks/missing.py b/table/fairspec_table/actions/column/checks/missing.py new file mode 100644 index 0000000..947ae39 --- /dev/null +++ b/table/fairspec_table/actions/column/checks/missing.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import polars as pl +from fairspec_metadata import CellMissingError +from fairspec_metadata import Column + +from fairspec_table.models import CellMapping + + +@dataclass +class ColumnCheck: + is_error_expr: pl.Expr + error_template: CellMissingError + + +def check_cell_missing(column: Column, mapping: CellMapping) -> ColumnCheck | None: + if column.nullable: + return None + + is_error_expr = mapping.target.is_null() + + error_template = CellMissingError( + type="cell/missing", + columnName=column.name, + rowNumber=0, + cell="", + ) + + return ColumnCheck(is_error_expr=is_error_expr, error_template=error_template) diff --git a/table/fairspec_table/actions/column/checks/missing_spec.py b/table/fairspec_table/actions/column/checks/missing_spec.py new file mode 100644 index 0000000..e53ca18 --- /dev/null +++ b/table/fairspec_table/actions/column/checks/missing_spec.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +import polars as pl +from fairspec_metadata import ( + NumberColumn, + NumberColumnProperty, +) + +from fairspec_table.models import CellMapping + +from .missing import check_cell_missing + + +class TestCheckCellMissing: + def test_non_nullable_column_with_nulls(self): + column = NumberColumn( + name="id", + type="number", + property=NumberColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": [1.0, None, 3.0], "target": [1.0, None, 3.0]} + ).lazy() + + result = check_cell_missing(column, mapping) + + assert result is not None + assert result.error_template.type == "cell/missing" + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 1 + + def test_nullable_column_returns_none(self): + column = NumberColumn( + name="id", + type="number", + nullable=True, + property=NumberColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + + result = check_cell_missing(column, mapping) + + assert result is None + + def test_non_nullable_column_no_nulls(self): + column = NumberColumn( + name="id", + type="number", + property=NumberColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": [1.0, 2.0, 3.0], "target": [1.0, 2.0, 3.0]} + ).lazy() + + result = check_cell_missing(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 0 + + def test_error_template_fields(self): + column = NumberColumn( + name="id", + type="number", + property=NumberColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + + result = check_cell_missing(column, mapping) + + assert result is not None + assert result.error_template.columnName == "id" + assert result.error_template.rowNumber == 0 + assert result.error_template.cell == "" diff --git a/table/fairspec_table/actions/column/checks/multiple_of.py b/table/fairspec_table/actions/column/checks/multiple_of.py new file mode 100644 index 0000000..65bbfdb --- /dev/null +++ b/table/fairspec_table/actions/column/checks/multiple_of.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import polars as pl +from fairspec_metadata import CellMultipleOfError +from fairspec_metadata import Column + +from fairspec_table.models import CellMapping + + +@dataclass +class ColumnCheck: + is_error_expr: pl.Expr + error_template: CellMultipleOfError + + +def check_cell_multiple_of(column: Column, mapping: CellMapping) -> ColumnCheck | None: + multiple_of = getattr(column.property, "multipleOf", None) + if multiple_of is None: + return None + + is_error_expr = (mapping.target % multiple_of).eq(0).not_() + + error_template = CellMultipleOfError( + type="cell/multipleOf", + columnName=column.name, + multipleOf=multiple_of, + rowNumber=0, + cell="", + ) + + return ColumnCheck(is_error_expr=is_error_expr, error_template=error_template) diff --git a/table/fairspec_table/actions/column/checks/multiple_of_spec.py b/table/fairspec_table/actions/column/checks/multiple_of_spec.py new file mode 100644 index 0000000..f5bbb18 --- /dev/null +++ b/table/fairspec_table/actions/column/checks/multiple_of_spec.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +import polars as pl +from fairspec_metadata import ( + IntegerColumn, + IntegerColumnProperty, +) +from fairspec_metadata import ( + NumberColumn, + NumberColumnProperty, +) +from fairspec_metadata import ( + StringColumn, + StringColumnProperty, +) + +from fairspec_table.models import CellMapping + +from .multiple_of import check_cell_multiple_of + + +class TestCheckCellMultipleOf: + def test_returns_none_when_no_multiple_of(self): + column = IntegerColumn( + name="quantity", + type="integer", + property=IntegerColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + + result = check_cell_multiple_of(column, mapping) + + assert result is None + + def test_returns_none_for_column_without_multiple_of_field(self): + column = StringColumn( + name="name", + type="string", + property=StringColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + + result = check_cell_multiple_of(column, mapping) + + assert result is None + + def test_valid_integer_multiples(self): + column = IntegerColumn( + name="quantity", + type="integer", + property=IntegerColumnProperty(multipleOf=10), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": [10, 20, 30, 40], "target": [10, 20, 30, 40]} + ).lazy() + + result = check_cell_multiple_of(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 0 + + def test_invalid_integer_values(self): + column = IntegerColumn( + name="quantity", + type="integer", + property=IntegerColumnProperty(multipleOf=10), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame({"source": [10, 15, 20], "target": [10, 15, 20]}).lazy() + + result = check_cell_multiple_of(column, mapping) + + assert result is not None + assert result.error_template.type == "cell/multipleOf" + assert result.error_template.multipleOf == 10 + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 1 + + def test_valid_number_multiples(self): + column = NumberColumn( + name="price", + type="number", + property=NumberColumnProperty(multipleOf=2.5), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": [2.5, 5.0, 7.5], "target": [2.5, 5.0, 7.5]} + ).lazy() + + result = check_cell_multiple_of(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 0 + + def test_invalid_number_values(self): + column = NumberColumn( + name="price", + type="number", + property=NumberColumnProperty(multipleOf=2.5), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": [2.5, 3.7, 5.0], "target": [2.5, 3.7, 5.0]} + ).lazy() + + result = check_cell_multiple_of(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 1 + + def test_multiple_of_one(self): + column = IntegerColumn( + name="count", + type="integer", + property=IntegerColumnProperty(multipleOf=1), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame({"source": [1, 2, 3, 4], "target": [1, 2, 3, 4]}).lazy() + + result = check_cell_multiple_of(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 0 diff --git a/table/fairspec_table/actions/column/checks/pattern.py b/table/fairspec_table/actions/column/checks/pattern.py new file mode 100644 index 0000000..2531243 --- /dev/null +++ b/table/fairspec_table/actions/column/checks/pattern.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import polars as pl +from fairspec_metadata import CellPatternError +from fairspec_metadata import Column + +from fairspec_table.models import CellMapping + + +@dataclass +class ColumnCheck: + is_error_expr: pl.Expr + error_template: CellPatternError + + +def check_cell_pattern(column: Column, mapping: CellMapping) -> ColumnCheck | None: + pattern = getattr(column.property, "pattern", None) + if not pattern: + return None + + is_error_expr = mapping.source.str.contains(pattern).not_() + + error_template = CellPatternError( + type="cell/pattern", + columnName=column.name, + pattern=pattern, + rowNumber=0, + cell="", + ) + + return ColumnCheck(is_error_expr=is_error_expr, error_template=error_template) diff --git a/table/fairspec_table/actions/column/checks/pattern_spec.py b/table/fairspec_table/actions/column/checks/pattern_spec.py new file mode 100644 index 0000000..056aaa6 --- /dev/null +++ b/table/fairspec_table/actions/column/checks/pattern_spec.py @@ -0,0 +1,95 @@ +from __future__ import annotations + +import polars as pl +from fairspec_metadata import ( + IntegerColumn, + IntegerColumnProperty, +) +from fairspec_metadata import ( + StringColumn, + StringColumnProperty, +) + +from fairspec_table.models import CellMapping + +from .pattern import check_cell_pattern + + +class TestCheckCellPattern: + def test_returns_none_when_no_pattern(self): + column = StringColumn( + name="email", + type="string", + property=StringColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + + result = check_cell_pattern(column, mapping) + + assert result is None + + def test_returns_none_for_column_without_pattern_field(self): + column = IntegerColumn( + name="id", + type="integer", + property=IntegerColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + + result = check_cell_pattern(column, mapping) + + assert result is None + + def test_values_matching_pattern(self): + email_pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$" + column = StringColumn( + name="email", + type="string", + property=StringColumnProperty(pattern=email_pattern), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + { + "source": ["john@example.com", "alice@domain.org", "test@test.io"], + "target": ["john@example.com", "alice@domain.org", "test@test.io"], + } + ).lazy() + + result = check_cell_pattern(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 0 + + def test_values_not_matching_pattern(self): + email_pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$" + column = StringColumn( + name="email", + type="string", + property=StringColumnProperty(pattern=email_pattern), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + { + "source": [ + "john@example.com", + "alice@domain", + "test.io", + "valid@email.com", + ], + "target": [ + "john@example.com", + "alice@domain", + "test.io", + "valid@email.com", + ], + } + ).lazy() + + result = check_cell_pattern(column, mapping) + + assert result is not None + assert result.error_template.type == "cell/pattern" + assert result.error_template.pattern == email_pattern + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 2 diff --git a/table/fairspec_table/actions/column/checks/type.py b/table/fairspec_table/actions/column/checks/type.py new file mode 100644 index 0000000..703e807 --- /dev/null +++ b/table/fairspec_table/actions/column/checks/type.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import polars as pl +from fairspec_metadata import CellTypeError +from fairspec_metadata import Column, ColumnType + +from fairspec_table.models import CellMapping + + +@dataclass +class ColumnCheck: + is_error_expr: pl.Expr + error_template: CellTypeError + + +def check_cell_type(column: Column, mapping: CellMapping) -> ColumnCheck: + is_error_expr = mapping.source.is_not_null() & mapping.target.is_null() + + error_template = CellTypeError( + type="cell/type", + columnName=column.name, + columnType=ColumnType(column.type), + rowNumber=0, + cell="", + ) + + return ColumnCheck(is_error_expr=is_error_expr, error_template=error_template) diff --git a/table/fairspec_table/actions/column/checks/type_spec.py b/table/fairspec_table/actions/column/checks/type_spec.py new file mode 100644 index 0000000..c6d8399 --- /dev/null +++ b/table/fairspec_table/actions/column/checks/type_spec.py @@ -0,0 +1,133 @@ +from __future__ import annotations + +import polars as pl +from fairspec_metadata import ( + BooleanColumn, + BooleanColumnProperty, +) +from fairspec_metadata import ( + IntegerColumn, + IntegerColumnProperty, +) +from fairspec_metadata import ( + NumberColumn, + NumberColumnProperty, +) +from fairspec_metadata import ( + StringColumn, + StringColumnProperty, +) + +from fairspec_table.models import CellMapping + +from .type import check_cell_type + + +class TestCheckCellType: + def test_no_errors_for_valid_values(self): + column = IntegerColumn( + name="id", + type="integer", + property=IntegerColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": ["1", "2", "3", "4"], "target": [1, 2, 3, 4]} + ).lazy() + + result = check_cell_type(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 0 + + def test_errors_for_invalid_integer_values(self): + column = IntegerColumn( + name="id", + type="integer", + property=IntegerColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": ["1", "bad", "3", "4x"], "target": [1, None, 3, None]} + ).lazy() + + result = check_cell_type(column, mapping) + + assert result is not None + assert result.error_template.type == "cell/type" + assert result.error_template.columnType == "integer" + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 2 + + def test_errors_for_invalid_number_values(self): + column = NumberColumn( + name="price", + type="number", + property=NumberColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + { + "source": ["10.5", "twenty", "30.75", "$40"], + "target": [10.5, None, 30.75, None], + } + ).lazy() + + result = check_cell_type(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 2 + + def test_errors_for_invalid_boolean_values(self): + column = BooleanColumn( + name="active", + type="boolean", + property=BooleanColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + { + "source": ["true", "yes", "false", "0", "1"], + "target": [True, None, False, False, True], + } + ).lazy() + + result = check_cell_type(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 1 + + def test_null_source_not_flagged(self): + column = IntegerColumn( + name="id", + type="integer", + property=IntegerColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + table = pl.DataFrame( + {"source": ["1", None, "3"], "target": [1, None, 3]} + ).lazy() + + result = check_cell_type(column, mapping) + + assert result is not None + errors: pl.DataFrame = table.filter(result.is_error_expr).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert len(errors) == 0 + + def test_error_template_fields(self): + column = StringColumn( + name="name", + type="string", + property=StringColumnProperty(), + ) + mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + + result = check_cell_type(column, mapping) + + assert result.error_template.columnName == "name" + assert result.error_template.columnType == "string" + assert result.error_template.rowNumber == 0 + assert result.error_template.cell == "" diff --git a/table/fairspec_table/actions/column/denarrow.py b/table/fairspec_table/actions/column/denarrow.py new file mode 100644 index 0000000..a8abae1 --- /dev/null +++ b/table/fairspec_table/actions/column/denarrow.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from typing import cast + +import polars as pl + +from fairspec_metadata import CategoricalColumn, get_base_property_type + +from fairspec_table.helpers import get_categorical_values_and_labels +from fairspec_table.models import ColumnMapping + +INTEGER_VARIANTS = {pl.Int8, pl.Int16, pl.Int32, pl.Int64} +NUMBER_VARIANTS = {pl.Float32, pl.Float64} +STRING_VARIANTS = {pl.String, pl.Categorical} +ALPHANUMERIC_VARIANTS = INTEGER_VARIANTS | NUMBER_VARIANTS | STRING_VARIANTS + + +def denarrow_column(mapping: ColumnMapping, column_expr: pl.Expr) -> pl.Expr: + variant = mapping.source.type + + if isinstance(mapping.target, CategoricalColumn): + if variant in ALPHANUMERIC_VARIANTS: + target = mapping.target + values, labels = get_categorical_values_and_labels(target) + + polars_type: type[pl.DataType] = ( + pl.String + if get_base_property_type(cast(str, target.property.type)) == "string" + else pl.Int64 + ) + + if values: + return column_expr.replace_strict( + labels, values, default=None, return_dtype=polars_type + ) + + return column_expr diff --git a/table/fairspec_table/actions/column/denarrow_spec.py b/table/fairspec_table/actions/column/denarrow_spec.py new file mode 100644 index 0000000..4aaa87b --- /dev/null +++ b/table/fairspec_table/actions/column/denarrow_spec.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +import polars as pl +import pytest + +from fairspec_metadata import CategoricalColumn +from fairspec_metadata import ( + IntegerCategoricalColumnProperty, + IntegerCategoryItem, + StringCategoricalColumnProperty, +) + +from fairspec_table.models import ColumnMapping, PolarsColumn + +from .denarrow import denarrow_column + + +class TestDenarrowCategorical: + @pytest.mark.parametrize( + "value, expected", + [ + ("red", "red"), + ("green", "green"), + ], + ) + def test_string_categorical(self, value: str, expected: str): + table = pl.DataFrame([pl.Series("name", [value], pl.Categorical)]).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="name", type=pl.Categorical), + target=CategoricalColumn( + name="name", + type="categorical", + property=StringCategoricalColumnProperty( + type="string", + format="categorical", + categories=["red", "green", "blue"], + ), + ), + ) + + result = table.select(denarrow_column(mapping, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "value, expected", + [ + ("Low", 1), + ("High", 2), + ], + ) + def test_integer_categorical(self, value: str, expected: int): + table = pl.DataFrame([pl.Series("name", [value], pl.Categorical)]).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="name", type=pl.Categorical), + target=CategoricalColumn( + name="name", + type="categorical", + property=IntegerCategoricalColumnProperty( + type="integer", + format="categorical", + categories=[ + IntegerCategoryItem(value=1, label="Low"), + IntegerCategoryItem(value=2, label="High"), + ], + ), + ), + ) + + result = table.select(denarrow_column(mapping, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] diff --git a/table/fairspec_table/actions/column/denormalize.py b/table/fairspec_table/actions/column/denormalize.py new file mode 100644 index 0000000..9bb3f43 --- /dev/null +++ b/table/fairspec_table/actions/column/denormalize.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +from typing import Unpack + +import polars as pl + +from fairspec_table.models import ColumnMapping, DenormalizeColumnOptions + +from .denarrow import denarrow_column +from .desubstitute import desubstitute_column +from .stringify import stringify_column + + +def denormalize_column( + mapping: ColumnMapping, + **options: Unpack[DenormalizeColumnOptions], +) -> pl.Expr: + column_expr = pl.col(mapping.source.name) + + native_types = options.get("nativeTypes") + if not native_types or mapping.target.type not in native_types: + column_expr = denarrow_column(mapping, column_expr) + column_expr = stringify_column(mapping, column_expr) + + column_expr = desubstitute_column(mapping, column_expr, **options) + return column_expr diff --git a/table/fairspec_table/actions/column/desubstitute.py b/table/fairspec_table/actions/column/desubstitute.py new file mode 100644 index 0000000..7f81f07 --- /dev/null +++ b/table/fairspec_table/actions/column/desubstitute.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +from typing import Unpack + +import polars as pl + +from fairspec_metadata import get_base_property_type +from fairspec_metadata import Column + +from fairspec_table.models import ColumnMapping, DenormalizeColumnOptions + + +def desubstitute_column( + mapping: ColumnMapping, + column_expr: pl.Expr, + **options: Unpack[DenormalizeColumnOptions], +) -> pl.Expr: + missing_value_type = _get_missing_value_type(mapping.target, **options) + if not missing_value_type: + return column_expr + + flatten_missing_values = [ + item.value if hasattr(item, "value") else item + for item in (mapping.target.property.missingValues or []) + ] + + compatible_missing_values = ( + [value for value in flatten_missing_values if isinstance(value, str)] + if missing_value_type == "string" + else [ + value for value in flatten_missing_values if isinstance(value, (int, float)) + ] + ) + + if not compatible_missing_values: + return column_expr + + compatible_missing_value = compatible_missing_values[0] + + return ( + pl.when(column_expr.is_null()) + .then(pl.lit(compatible_missing_value)) + .otherwise(column_expr) + .alias(mapping.target.name) + ) + + +def _get_missing_value_type( + column: Column, + **options: Unpack[DenormalizeColumnOptions], +) -> str | None: + base_type = get_base_property_type(column.property.type) + + if base_type == "string": + return "string" + + if base_type in ("integer", "number"): + native_types = options.get("nativeTypes") + return "number" if native_types and base_type in native_types else "string" + + return None diff --git a/table/fairspec_table/actions/column/desubstitute_spec.py b/table/fairspec_table/actions/column/desubstitute_spec.py new file mode 100644 index 0000000..97366aa --- /dev/null +++ b/table/fairspec_table/actions/column/desubstitute_spec.py @@ -0,0 +1,355 @@ +from __future__ import annotations + +import polars as pl +import pytest + +from fairspec_metadata import IntegerColumn, NumberColumn, StringColumn +from fairspec_metadata import IntegerColumnProperty +from fairspec_metadata import NumberColumnProperty +from fairspec_metadata import StringColumnProperty + +from fairspec_table.models import ColumnMapping, PolarsColumn + +from .desubstitute import desubstitute_column + + +class TestDesubstituteColumnString: + @pytest.mark.parametrize( + "value, expected", + [ + (None, None), + ("hello", "hello"), + ("value", "value"), + ], + ) + def test_no_missing_values(self, value: str | None, expected: str | None): + table = pl.DataFrame([pl.Series("name", [value], pl.String)]).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="name", type=pl.String), + target=StringColumn( + name="name", + type="string", + property=StringColumnProperty(), + ), + ) + + result = table.select(desubstitute_column(mapping, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "value, expected", + [ + (None, None), + ("hello", "hello"), + ("value", "value"), + ], + ) + def test_empty_missing_values(self, value: str | None, expected: str | None): + table = pl.DataFrame([pl.Series("name", [value], pl.String)]).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="name", type=pl.String), + target=StringColumn( + name="name", + type="string", + property=StringColumnProperty(missingValues=[]), + ), + ) + + result = table.select(desubstitute_column(mapping, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "value, expected", + [ + (None, "-"), + ("hello", "hello"), + ("value", "value"), + ], + ) + def test_missing_value_dash(self, value: str | None, expected: str): + table = pl.DataFrame([pl.Series("name", [value], pl.String)]).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="name", type=pl.String), + target=StringColumn( + name="name", + type="string", + property=StringColumnProperty(missingValues=["-"]), + ), + ) + + result = table.select(desubstitute_column(mapping, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "value, expected", + [ + (None, "x"), + ("hello", "hello"), + ("value", "value"), + ], + ) + def test_missing_value_x(self, value: str | None, expected: str): + table = pl.DataFrame([pl.Series("name", [value], pl.String)]).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="name", type=pl.String), + target=StringColumn( + name="name", + type="string", + property=StringColumnProperty(missingValues=["x"]), + ), + ) + + result = table.select(desubstitute_column(mapping, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "value, expected", + [ + (None, "n/a"), + ("value", "value"), + ("test", "test"), + ], + ) + def test_missing_value_na(self, value: str | None, expected: str): + table = pl.DataFrame([pl.Series("name", [value], pl.String)]).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="name", type=pl.String), + target=StringColumn( + name="name", + type="string", + property=StringColumnProperty(missingValues=["n/a"]), + ), + ) + + result = table.select(desubstitute_column(mapping, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "value, expected", + [ + (None, "-"), + ("value", "value"), + ("test", "test"), + ], + ) + def test_multiple_missing_values_uses_first(self, value: str | None, expected: str): + table = pl.DataFrame([pl.Series("name", [value], pl.String)]).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="name", type=pl.String), + target=StringColumn( + name="name", + type="string", + property=StringColumnProperty( + type="string", missingValues=["-", "n/a", "null"] + ), + ), + ) + + result = table.select(desubstitute_column(mapping, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + +class TestDesubstituteColumnInteger: + @pytest.mark.parametrize( + "value, expected", + [ + (None, -1), + (0, 0), + (1, 1), + (42, 42), + ], + ) + def test_missing_value_neg1(self, value: int | None, expected: int): + table = pl.DataFrame([pl.Series("value", [value], pl.Int64)]).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="value", type=pl.Int64), + target=IntegerColumn( + name="value", + type="integer", + property=IntegerColumnProperty(missingValues=[-1]), + ), + ) + + result = table.select( + desubstitute_column( + mapping, + pl.col("value"), + nativeTypes=["integer"], + ) + ) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"value": expected}] + + @pytest.mark.parametrize( + "value, expected", + [ + (None, -999), + (0, 0), + (1, 1), + (100, 100), + ], + ) + def test_missing_value_neg999(self, value: int | None, expected: int): + table = pl.DataFrame([pl.Series("value", [value], pl.Int64)]).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="value", type=pl.Int64), + target=IntegerColumn( + name="value", + type="integer", + property=IntegerColumnProperty(missingValues=[-999]), + ), + ) + + result = table.select( + desubstitute_column( + mapping, + pl.col("value"), + nativeTypes=["integer"], + ) + ) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"value": expected}] + + @pytest.mark.parametrize( + "value, expected", + [ + (None, -1), + (0, 0), + (42, 42), + ], + ) + def test_multiple_missing_values_uses_first(self, value: int | None, expected: int): + table = pl.DataFrame([pl.Series("value", [value], pl.Int64)]).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="value", type=pl.Int64), + target=IntegerColumn( + name="value", + type="integer", + property=IntegerColumnProperty( + type="integer", missingValues=[-1, -999, -9999] + ), + ), + ) + + result = table.select( + desubstitute_column( + mapping, + pl.col("value"), + nativeTypes=["integer"], + ) + ) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"value": expected}] + + +class TestDesubstituteColumnNumber: + @pytest.mark.parametrize( + "value, expected", + [ + (None, -1.0), + (0.0, 0.0), + (1.0, 1.0), + (42.0, 42.0), + ], + ) + def test_missing_value_float64(self, value: float | None, expected: float): + table = pl.DataFrame([pl.Series("value", [value], pl.Float64)]).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="value", type=pl.Float64), + target=NumberColumn( + name="value", + type="number", + property=NumberColumnProperty(missingValues=[-1]), + ), + ) + + result = table.select( + desubstitute_column( + mapping, + pl.col("value"), + nativeTypes=["number"], + ) + ) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"value": expected}] + + @pytest.mark.parametrize( + "value, expected", + [ + (None, -999.0), + (0.0, 0.0), + (1.0, 1.0), + (100.0, 100.0), + ], + ) + def test_missing_value_neg999_float64(self, value: float | None, expected: float): + table = pl.DataFrame([pl.Series("value", [value], pl.Float64)]).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="value", type=pl.Float64), + target=NumberColumn( + name="value", + type="number", + property=NumberColumnProperty(missingValues=[-999]), + ), + ) + + result = table.select( + desubstitute_column( + mapping, + pl.col("value"), + nativeTypes=["number"], + ) + ) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"value": expected}] + + @pytest.mark.parametrize( + "value, expected", + [ + (None, -1.0), + (0.0, 0.0), + (42.0, 42.0), + ], + ) + def test_multiple_missing_values_uses_first_float64( + self, value: float | None, expected: float + ): + table = pl.DataFrame([pl.Series("value", [value], pl.Float64)]).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="value", type=pl.Float64), + target=NumberColumn( + name="value", + type="number", + property=NumberColumnProperty( + type="number", missingValues=[-1, -999, -9999] + ), + ), + ) + + result = table.select( + desubstitute_column( + mapping, + pl.col("value"), + nativeTypes=["number"], + ) + ) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"value": expected}] diff --git a/table/fairspec_table/actions/column/helpers.py b/table/fairspec_table/actions/column/helpers.py new file mode 100644 index 0000000..6cba8d3 --- /dev/null +++ b/table/fairspec_table/actions/column/helpers.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +import json +from typing import Callable, Union, cast + +import polars as pl + +from fairspec_metadata import ( + ArrayColumn, + CellJsonError, + CellTypeError, + DurationColumn, + GeojsonColumn, + ObjectColumn, + TopojsonColumn, + WkbColumn, + WktColumn, + inspect_json, +) +from fairspec_metadata import ColumnType +from fairspec_metadata import CellError + +from fairspec_table.helpers import get_is_object +from fairspec_table.models import Table +from fairspec_table.settings import NUMBER_COLUMN_NAME + + +def inspect_text_column( + column: Union[DurationColumn, WktColumn, WkbColumn], + table: Table, + *, + parse: Callable[[str], object], +) -> list[CellError]: + errors: list[CellError] = [] + + frame = cast( + pl.DataFrame, + table.with_row_index(NUMBER_COLUMN_NAME, 1) + .select(pl.col(NUMBER_COLUMN_NAME), pl.col(column.name).alias("source")) + .collect(), + ) + + for row in frame.to_dicts(): + if row["source"] is None: + continue + + target = None + try: + target = parse(row["source"]) + except Exception: + pass + + if not target: + errors.append( + CellTypeError( + type="cell/type", + cell=str(row["source"]), + columnName=column.name, + columnType=ColumnType(column.type), + rowNumber=row[NUMBER_COLUMN_NAME], + ) + ) + + return errors + + +def inspect_json_column( + column: Union[ArrayColumn, ObjectColumn, GeojsonColumn, TopojsonColumn], + table: Table, + *, + type_json_schema: dict[str, object] | None = None, +) -> list[CellError]: + errors: list[CellError] = [] + + column_type = ColumnType(column.type) + constraint_json_schema = column.property.model_dump( + exclude_none=True, by_alias=True + ) + + frame = cast( + pl.DataFrame, + table.with_row_index(NUMBER_COLUMN_NAME, 1) + .select(pl.col(NUMBER_COLUMN_NAME), pl.col(column.name).alias("source")) + .collect(), + ) + + for row in frame.to_dicts(): + if row["source"] is None: + continue + + target = None + check_compat: Callable[[object], bool] = ( + (lambda v: isinstance(v, list)) if column.type == "array" else get_is_object + ) + + try: + target = json.loads(row["source"]) + except Exception: + pass + + if not target or not check_compat(target): + errors.append( + CellTypeError( + type="cell/type", + cell=str(row["source"]), + columnName=column.name, + columnType=column_type, + rowNumber=row[NUMBER_COLUMN_NAME], + ) + ) + continue + + if type_json_schema: + format_errors = inspect_json(target, json_schema=type_json_schema) + + if format_errors: + errors.append( + CellTypeError( + type="cell/type", + cell=str(row["source"]), + columnName=column.name, + columnType=column_type, + rowNumber=row[NUMBER_COLUMN_NAME], + ) + ) + continue + + if constraint_json_schema: + constraint_errors = inspect_json(target, json_schema=constraint_json_schema) + + for error in constraint_errors: + errors.append( + CellJsonError( + type="cell/json", + cell=str(row["source"]), + columnName=column.name, + rowNumber=row[NUMBER_COLUMN_NAME], + message=error["message"], + jsonPointer=error["jsonPointer"], + ) + ) + + return errors diff --git a/table/fairspec_table/actions/column/helpers_spec.py b/table/fairspec_table/actions/column/helpers_spec.py new file mode 100644 index 0000000..0c345db --- /dev/null +++ b/table/fairspec_table/actions/column/helpers_spec.py @@ -0,0 +1,197 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_metadata import ( + ArrayColumn, + ArrayColumnProperty, + DurationColumn, + DurationColumnProperty, + ObjectColumn, + ObjectColumnProperty, +) + +from .helpers import inspect_json_column, inspect_text_column + + +class TestInspectTextColumn: + def test_no_errors_for_valid_values(self): + table = pl.DataFrame({"duration": ["P1Y", "P2M", "P3D"]}).lazy() + column = DurationColumn( + name="duration", + type="duration", + property=DurationColumnProperty(), + ) + + errors = inspect_text_column( + column, table, parse=lambda s: s if s.startswith("P") else None + ) + + assert errors == [] + + def test_error_for_invalid_values(self): + table = pl.DataFrame({"duration": ["P1Y", "invalid", "P3D"]}).lazy() + column = DurationColumn( + name="duration", + type="duration", + property=DurationColumnProperty(), + ) + + errors = inspect_text_column( + column, table, parse=lambda s: s if s.startswith("P") else None + ) + + assert len(errors) == 1 + assert errors[0].type == "cell/type" + assert errors[0].cell == "invalid" + assert errors[0].columnName == "duration" + assert errors[0].rowNumber == 2 + + def test_skip_null_values(self): + table = pl.DataFrame({"duration": ["P1Y", None, "P3D"]}).lazy() + column = DurationColumn( + name="duration", + type="duration", + property=DurationColumnProperty(), + ) + + errors = inspect_text_column( + column, table, parse=lambda s: s if s.startswith("P") else None + ) + + assert errors == [] + + def test_error_when_parse_raises(self): + def failing_parse(source: str) -> object: + raise ValueError("parse error") + + table = pl.DataFrame({"duration": ["bad"]}).lazy() + column = DurationColumn( + name="duration", + type="duration", + property=DurationColumnProperty(), + ) + + errors = inspect_text_column(column, table, parse=failing_parse) + + assert len(errors) == 1 + assert errors[0].type == "cell/type" + + +class TestInspectJsonColumn: + def test_no_errors_for_valid_object(self): + table = pl.DataFrame({"data": ['{"a": 1}']}).lazy() + column = ObjectColumn( + name="data", + type="object", + property=ObjectColumnProperty(), + ) + + errors = inspect_json_column(column, table) + + assert errors == [] + + def test_error_for_invalid_json(self): + table = pl.DataFrame({"data": ["not json"]}).lazy() + column = ObjectColumn( + name="data", + type="object", + property=ObjectColumnProperty(), + ) + + errors = inspect_json_column(column, table) + + assert len(errors) == 1 + assert errors[0].type == "cell/type" + assert errors[0].cell == "not json" + + def test_error_for_array_when_expecting_object(self): + table = pl.DataFrame({"data": ["[1, 2, 3]"]}).lazy() + column = ObjectColumn( + name="data", + type="object", + property=ObjectColumnProperty(), + ) + + errors = inspect_json_column(column, table) + + assert len(errors) == 1 + assert errors[0].type == "cell/type" + + def test_no_errors_for_valid_array(self): + table = pl.DataFrame({"data": ["[1, 2, 3]"]}).lazy() + column = ArrayColumn( + name="data", + type="array", + property=ArrayColumnProperty(), + ) + + errors = inspect_json_column(column, table) + + assert errors == [] + + def test_error_for_object_when_expecting_array(self): + table = pl.DataFrame({"data": ['{"a": 1}']}).lazy() + column = ArrayColumn( + name="data", + type="array", + property=ArrayColumnProperty(), + ) + + errors = inspect_json_column(column, table) + + assert len(errors) == 1 + assert errors[0].type == "cell/type" + + def test_skip_null_values(self): + table = pl.DataFrame({"data": [None, '{"a": 1}']}).lazy() + column = ObjectColumn( + name="data", + type="object", + property=ObjectColumnProperty(), + ) + + errors = inspect_json_column(column, table) + + assert errors == [] + + def test_type_json_schema_valid(self): + table = pl.DataFrame({"data": ['{"name": "test"}']}).lazy() + column = ObjectColumn( + name="data", + type="object", + property=ObjectColumnProperty(), + ) + + errors = inspect_json_column( + column, + table, + type_json_schema={ + "type": "object", + "properties": {"name": {"type": "string"}}, + "required": ["name"], + }, + ) + + assert errors == [] + + def test_type_json_schema_invalid(self): + table = pl.DataFrame({"data": ['{"name": 123}']}).lazy() + column = ObjectColumn( + name="data", + type="object", + property=ObjectColumnProperty(), + ) + + errors = inspect_json_column( + column, + table, + type_json_schema={ + "type": "object", + "properties": {"name": {"type": "string"}}, + "required": ["name"], + }, + ) + + assert len(errors) == 1 + assert errors[0].type == "cell/type" diff --git a/table/fairspec_table/actions/column/inspect.py b/table/fairspec_table/actions/column/inspect.py new file mode 100644 index 0000000..02ac8ef --- /dev/null +++ b/table/fairspec_table/actions/column/inspect.py @@ -0,0 +1,192 @@ +from __future__ import annotations + +import json + +import polars as pl +from fairspec_metadata import ColumnType +from fairspec_metadata import CellError +from fairspec_metadata import ColumnTypeError +from fairspec_metadata import TableError +from pydantic import TypeAdapter + +from fairspec_table.models import CellMapping, ColumnMapping +from fairspec_table.models.table import Table +from fairspec_table.settings import NUMBER_COLUMN_NAME + +from .checks.const import check_cell_const +from .checks.enum import check_cell_enum +from .checks.max_items import check_cell_max_items +from .checks.max_length import check_cell_max_length +from .checks.maximum import create_check_cell_maximum +from .checks.min_items import check_cell_min_items +from .checks.min_length import check_cell_min_length +from .checks.minimum import create_check_cell_minimum +from .checks.missing import check_cell_missing +from .checks.multiple_of import check_cell_multiple_of +from .checks.pattern import check_cell_pattern +from .checks.type import check_cell_type +from .normalize import normalize_column +from .types.array import inspect_array_column +from .types.duration import inspect_duration_column +from .types.geojson import inspect_geojson_column +from .types.object import inspect_object_column +from .types.topojson import inspect_topojson_column +from .types.wkb import inspect_wkb_column +from .types.wkt import inspect_wkt_column + + +def inspect_column( + mapping: ColumnMapping, + table: Table, + *, + max_errors: int, +) -> list[TableError]: + errors: list[TableError] = [] + + type_errors = _inspect_type(mapping) + errors.extend(type_errors) + + if not type_errors: + data_errors = _inspect_cells(mapping, table, max_errors=max_errors) + errors.extend(data_errors) + + return errors + + +COMPAT_MAPPING: dict[type[pl.DataType], list[str]] = { + pl.Boolean: ["boolean"], + pl.Categorical: ["string"], + pl.Date: ["date"], + pl.Datetime: ["date-time"], + pl.Float32: ["number", "integer"], + pl.Float64: ["number", "integer"], + pl.Int8: ["integer", "number"], + pl.Int16: ["integer", "number"], + pl.Int32: ["integer", "number"], + pl.Int64: ["integer", "number"], + pl.List: ["list"], + pl.String: ["unknown"], + pl.Time: ["time"], + pl.UInt8: ["integer", "number"], + pl.UInt16: ["integer", "number"], + pl.UInt32: ["integer", "number"], + pl.UInt64: ["integer", "number"], +} + + +def _inspect_type(mapping: ColumnMapping) -> list[TableError]: + errors: list[TableError] = [] + + compat_types: list[str] = [] + for dtype_cls, types in COMPAT_MAPPING.items(): + if issubclass(mapping.source.type, dtype_cls): + compat_types = types + break + + target_types = {mapping.target.type, "unknown"} + is_compat = bool(set(compat_types) & target_types) + + if not is_compat: + errors.append( + ColumnTypeError( + type="column/type", + columnName=mapping.target.name, + expectedColumnType=ColumnType(mapping.target.type), + actualColumnType=ColumnType( + compat_types[0] if compat_types else "unknown" + ), + ) + ) + + return errors + + +def _inspect_cells( + mapping: ColumnMapping, + table: Table, + *, + max_errors: int, +) -> list[TableError]: + target = mapping.target + match target.type: + case "duration": + return inspect_duration_column(target, table) # type: ignore[arg-type, return-type] + case "wkb": + return inspect_wkb_column(target, table) # type: ignore[arg-type, return-type] + case "wkt": + return inspect_wkt_column(target, table) # type: ignore[arg-type, return-type] + case "array": + return inspect_array_column(target, table) # type: ignore[arg-type, return-type] + case "object": + return inspect_object_column(target, table) # type: ignore[arg-type, return-type] + case "geojson": + return inspect_geojson_column(target, table) # type: ignore[arg-type, return-type] + case "topojson": + return inspect_topojson_column(target, table) # type: ignore[arg-type, return-type] + case _: + return _inspect_cells_in_polars(mapping, table, max_errors=max_errors) + + +def _inspect_cells_in_polars( + mapping: ColumnMapping, + table: Table, + *, + max_errors: int, +) -> list[TableError]: + errors: list[TableError] = [] + + column_check_table = table.with_row_index(NUMBER_COLUMN_NAME, 1).select( + pl.col(NUMBER_COLUMN_NAME), + normalize_column(mapping).alias("target"), + normalize_column(mapping, keep_type=True).alias("source"), + pl.lit(None).alias("error"), + ) + + check_functions = [ + check_cell_type, + check_cell_missing, + check_cell_enum, + check_cell_const, + create_check_cell_minimum(), + create_check_cell_maximum(), + create_check_cell_minimum(is_exclusive=True), + create_check_cell_maximum(is_exclusive=True), + check_cell_multiple_of, + check_cell_min_length, + check_cell_max_length, + check_cell_min_items, + check_cell_max_items, + check_cell_pattern, + ] + + for check_cell in check_functions: + cell_mapping = CellMapping(source=pl.col("source"), target=pl.col("target")) + + check = check_cell(mapping.target, cell_mapping) + if not check: + continue + + column_check_table = column_check_table.with_columns( + pl.when(pl.col("error").is_not_null()) + .then(pl.col("error")) + .when(check.is_error_expr) + .then(pl.lit(json.dumps(check.error_template.model_dump(by_alias=True)))) + .otherwise(pl.lit(None)) + .alias("error"), + ) + + column_check_frame: pl.DataFrame = ( # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + column_check_table.filter(pl.col("error").is_not_null()) + .drop("target") + .head(max_errors) + .collect() + ) + + _cell_error_adapter = TypeAdapter(CellError) + for row in column_check_frame.to_dicts(): + error_dict = json.loads(row["error"]) + error_dict["rowNumber"] = row[NUMBER_COLUMN_NAME] + error_dict["cell"] = str(row["source"] if row["source"] is not None else "") + errors.append(_cell_error_adapter.validate_python(error_dict)) + + return errors diff --git a/table/fairspec_table/actions/column/inspect_spec.py b/table/fairspec_table/actions/column/inspect_spec.py new file mode 100644 index 0000000..a607f63 --- /dev/null +++ b/table/fairspec_table/actions/column/inspect_spec.py @@ -0,0 +1,228 @@ +from __future__ import annotations + +import polars as pl +from fairspec_metadata import BooleanColumnProperty +from fairspec_metadata import DateColumnProperty +from fairspec_metadata import DateTimeColumnProperty +from fairspec_metadata import IntegerColumnProperty +from fairspec_metadata import NumberColumnProperty +from fairspec_metadata import StringColumnProperty +from fairspec_metadata import TimeColumnProperty +from fairspec_metadata import CellTypeError +from fairspec_metadata import ColumnMissingError, ColumnTypeError +from fairspec_metadata import TableSchema + +from fairspec_table.actions.table.inspect import inspect_table + + +class TestInspectColumnName: + def test_should_report_error_when_column_names_dont_match(self): + table = pl.DataFrame({"actual_id": [1, 2, 3]}).lazy() + table_schema = TableSchema( + allRequired=True, properties={"id": NumberColumnProperty()} + ) + + errors = inspect_table(table, table_schema=table_schema) + + assert len(errors) == 1 + assert isinstance(errors[0], ColumnMissingError) + assert errors[0].columnName == "id" + + def test_should_not_error_when_column_names_match(self): + table = pl.DataFrame({"id": [1, 2, 3]}).lazy() + table_schema = TableSchema(properties={"id": NumberColumnProperty()}) + + errors = inspect_table(table, table_schema=table_schema) + + assert len(errors) == 0 + + def test_should_be_case_sensitive_when_comparing_column_names(self): + table = pl.DataFrame({"ID": [1, 2, 3]}).lazy() + table_schema = TableSchema( + allRequired=True, properties={"id": NumberColumnProperty()} + ) + + errors = inspect_table(table, table_schema=table_schema) + + assert len(errors) == 1 + assert isinstance(errors[0], ColumnMissingError) + assert errors[0].columnName == "id" + + +class TestInspectColumnType: + def test_should_report_error_when_column_types_dont_match(self): + table = pl.DataFrame({"id": [True, False, True]}).lazy() + table_schema = TableSchema(properties={"id": IntegerColumnProperty()}) + + errors = inspect_table(table, table_schema=table_schema) + + assert len(errors) == 1 + assert isinstance(errors[0], ColumnTypeError) + assert errors[0].columnName == "id" + assert errors[0].expectedColumnType == "integer" + assert errors[0].actualColumnType == "boolean" + + def test_should_not_error_when_column_types_match(self): + table = pl.DataFrame({"id": [1, 2, 3]}).lazy() + table_schema = TableSchema(properties={"id": NumberColumnProperty()}) + + errors = inspect_table(table, table_schema=table_schema) + + assert len(errors) == 0 + + +class TestInspectColumnNamedNumber: + def test_should_not_crash_when_data_has_column_named_number(self): + table = pl.DataFrame( + {"name": ["Alice", "Bob", "Charlie"], "number": [1, 2, 3]} + ).lazy() + table_schema = TableSchema( + properties={ + "name": StringColumnProperty(), + "number": IntegerColumnProperty(), + } + ) + + errors = inspect_table(table, table_schema=table_schema) + + assert len(errors) == 0 + + def test_should_report_cell_errors_with_correct_row_numbers_for_column_named_number( + self, + ): + table = pl.DataFrame( + {"name": ["Alice", "Bob", "Charlie"], "number": ["1", "bad", "3"]} + ).lazy() + table_schema = TableSchema( + properties={ + "name": StringColumnProperty(), + "number": IntegerColumnProperty(), + } + ) + + errors = inspect_table(table, table_schema=table_schema) + + cell_errors = [e for e in errors if isinstance(e, CellTypeError)] + assert len(cell_errors) == 1 + assert cell_errors[0].cell == "bad" + assert cell_errors[0].columnName == "number" + assert cell_errors[0].rowNumber == 2 + + +class TestInspectCellTypes: + def test_should_validate_string_to_integer_conversion_errors(self): + table = pl.DataFrame({"id": ["1", "bad", "3", "4x"]}).lazy() + table_schema = TableSchema(properties={"id": IntegerColumnProperty()}) + + errors = inspect_table(table, table_schema=table_schema) + + assert len(errors) == 2 + assert isinstance(errors[0], CellTypeError) + assert errors[0].cell == "bad" + assert errors[0].columnName == "id" + assert errors[0].columnType == "integer" + assert errors[0].rowNumber == 2 + assert isinstance(errors[1], CellTypeError) + assert errors[1].cell == "4x" + assert errors[1].rowNumber == 4 + + def test_should_validate_string_to_number_conversion_errors(self): + table = pl.DataFrame({"price": ["10.5", "twenty", "30.75", "$40"]}).lazy() + table_schema = TableSchema(properties={"price": NumberColumnProperty()}) + + errors = inspect_table(table, table_schema=table_schema) + + assert len(errors) == 2 + assert isinstance(errors[0], CellTypeError) + assert errors[0].cell == "twenty" + assert errors[0].columnName == "price" + assert errors[0].columnType == "number" + assert errors[0].rowNumber == 2 + assert isinstance(errors[1], CellTypeError) + assert errors[1].cell == "$40" + assert errors[1].rowNumber == 4 + + def test_should_validate_string_to_boolean_conversion_errors(self): + table = pl.DataFrame({"active": ["true", "yes", "false", "0", "1"]}).lazy() + table_schema = TableSchema(properties={"active": BooleanColumnProperty()}) + + errors = inspect_table(table, table_schema=table_schema) + + assert len(errors) == 1 + assert isinstance(errors[0], CellTypeError) + assert errors[0].cell == "yes" + assert errors[0].columnName == "active" + assert errors[0].columnType == "boolean" + assert errors[0].rowNumber == 2 + + def test_should_validate_string_to_date_conversion_errors(self): + table = pl.DataFrame( + {"created": ["2023-01-15", "Jan 15, 2023", "20230115", "not-a-date"]} + ).lazy() + table_schema = TableSchema(properties={"created": DateColumnProperty()}) + + errors = inspect_table(table, table_schema=table_schema) + + cell_errors = [e for e in errors if isinstance(e, CellTypeError)] + assert len(cell_errors) == 3 + assert cell_errors[0].cell == "Jan 15, 2023" + assert cell_errors[0].rowNumber == 2 + assert cell_errors[1].cell == "20230115" + assert cell_errors[1].rowNumber == 3 + assert cell_errors[2].cell == "not-a-date" + assert cell_errors[2].rowNumber == 4 + + def test_should_validate_string_to_time_conversion_errors(self): + table = pl.DataFrame( + {"time": ["14:30:00", "2:30pm", "invalid", "14h30"]} + ).lazy() + table_schema = TableSchema(properties={"time": TimeColumnProperty()}) + + errors = inspect_table(table, table_schema=table_schema) + + cell_errors = [e for e in errors if isinstance(e, CellTypeError)] + assert len(cell_errors) == 3 + assert cell_errors[0].cell == "2:30pm" + assert cell_errors[0].rowNumber == 2 + assert cell_errors[1].cell == "invalid" + assert cell_errors[1].rowNumber == 3 + assert cell_errors[2].cell == "14h30" + assert cell_errors[2].rowNumber == 4 + + def test_should_validate_string_to_datetime_conversion_errors(self): + table = pl.DataFrame( + { + "timestamp": [ + "2023-01-15T14:30:00", + "January 15, 2023 2:30 PM", + "2023-01-15 14:30", + "not-a-datetime", + ] + } + ).lazy() + table_schema = TableSchema(properties={"timestamp": DateTimeColumnProperty()}) + + errors = inspect_table(table, table_schema=table_schema) + + assert len(errors) > 0 + cell_errors = [e for e in errors if isinstance(e, CellTypeError)] + assert len(cell_errors) >= 2 + cells = {e.cell for e in cell_errors} + assert "January 15, 2023 2:30 PM" in cells + assert "not-a-datetime" in cells + + def test_should_pass_validation_when_all_cells_are_valid(self): + table = pl.DataFrame({"id": ["1", "2", "3", "4"]}).lazy() + table_schema = TableSchema(properties={"id": IntegerColumnProperty()}) + + errors = inspect_table(table, table_schema=table_schema) + + assert len(errors) == 0 + + def test_should_validate_with_non_string_source_data(self): + table = pl.DataFrame({"is_active": [True, False, True, False]}).lazy() + table_schema = TableSchema(properties={"is_active": BooleanColumnProperty()}) + + errors = inspect_table(table, table_schema=table_schema) + + assert len(errors) == 0 diff --git a/table/fairspec_table/actions/column/narrow.py b/table/fairspec_table/actions/column/narrow.py new file mode 100644 index 0000000..e14d7db --- /dev/null +++ b/table/fairspec_table/actions/column/narrow.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_metadata import CategoricalColumn + +from fairspec_table.helpers import get_categorical_values_and_labels +from fairspec_table.models import ColumnMapping + +INTEGER_VARIANTS = {pl.Int8, pl.Int16, pl.Int32, pl.Int64} +NUMBER_VARIANTS = {pl.Float32, pl.Float64} +STRING_VARIANTS = {pl.String, pl.Categorical} +ALPHANUMERIC_VARIANTS = INTEGER_VARIANTS | NUMBER_VARIANTS | STRING_VARIANTS + + +def narrow_column(mapping: ColumnMapping, column_expr: pl.Expr) -> pl.Expr: + variant = mapping.source.type + + if mapping.target.type == "boolean": + if variant in INTEGER_VARIANTS: + column_expr = ( + pl.when(column_expr.eq(1)) + .then(pl.lit(True)) + .when(column_expr.eq(0)) + .then(pl.lit(False)) + .otherwise(pl.lit(None)) + ) + + if mapping.target.type == "integer": + if variant in NUMBER_VARIANTS: + column_expr = ( + pl.when(column_expr.eq(column_expr.round(0))) + .then(column_expr.cast(pl.Int64)) + .otherwise(pl.lit(None)) + ) + + if isinstance(mapping.target, CategoricalColumn): + if variant in ALPHANUMERIC_VARIANTS: + values, labels = get_categorical_values_and_labels(mapping.target) + + if values: + return column_expr.replace_strict( + values, labels, default=None, return_dtype=pl.Categorical + ) + + return column_expr diff --git a/table/fairspec_table/actions/column/narrow_spec.py b/table/fairspec_table/actions/column/narrow_spec.py new file mode 100644 index 0000000..bd1f369 --- /dev/null +++ b/table/fairspec_table/actions/column/narrow_spec.py @@ -0,0 +1,119 @@ +from __future__ import annotations + +import polars as pl +import pytest + +from fairspec_metadata import CategoricalColumn, IntegerColumn +from fairspec_metadata import ( + IntegerCategoricalColumnProperty, + IntegerCategoryItem, + StringCategoricalColumnProperty, +) +from fairspec_metadata import IntegerColumnProperty + +from fairspec_table.models import ColumnMapping, PolarsColumn + +from .narrow import narrow_column + + +class TestNarrowToInteger: + def test_narrow_float_to_integer(self): + table = pl.DataFrame({"id": [1.0, 2.0, 3.0]}).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="id", type=pl.Float64), + target=IntegerColumn( + name="id", + type="integer", + property=IntegerColumnProperty(), + ), + ) + + result = table.select(narrow_column(mapping, pl.col("id"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [ + {"id": 1}, + {"id": 2}, + {"id": 3}, + ] + + def test_non_integer_float_becomes_null(self): + table = pl.DataFrame({"id": [1.0, 2.0, 3.5]}).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="id", type=pl.Float64), + target=IntegerColumn( + name="id", + type="integer", + property=IntegerColumnProperty(), + ), + ) + + result = table.select(narrow_column(mapping, pl.col("id"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [ + {"id": 1}, + {"id": 2}, + {"id": None}, + ] + + +class TestNarrowToCategorical: + @pytest.mark.parametrize( + "cell, expected", + [ + ("red", "red"), + ("green", "green"), + ("yellow", None), + ], + ) + def test_string_categorical(self, cell: str, expected: str | None): + table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="name", type=pl.String), + target=CategoricalColumn( + name="name", + type="categorical", + property=StringCategoricalColumnProperty( + type="string", + format="categorical", + categories=["red", "green", "blue"], + ), + ), + ) + + result = table.select(narrow_column(mapping, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + (1, "Low"), + (2, "High"), + (3, None), + ], + ) + def test_integer_categorical(self, cell: int, expected: str | None): + table = pl.DataFrame([pl.Series("name", [cell], pl.Int64)]).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="name", type=pl.Int64), + target=CategoricalColumn( + name="name", + type="categorical", + property=IntegerCategoricalColumnProperty( + type="integer", + format="categorical", + categories=[ + IntegerCategoryItem(value=1, label="Low"), + IntegerCategoryItem(value=2, label="High"), + ], + ), + ), + ) + + result = table.select(narrow_column(mapping, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] diff --git a/table/fairspec_table/actions/column/normalize.py b/table/fairspec_table/actions/column/normalize.py new file mode 100644 index 0000000..20623d6 --- /dev/null +++ b/table/fairspec_table/actions/column/normalize.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_table.models import ColumnMapping + +from .narrow import narrow_column +from .parse import parse_column +from .substitute import substitute_column + + +def normalize_column( + mapping: ColumnMapping, + *, + keep_type: bool = False, +) -> pl.Expr: + column_expr = pl.col(mapping.source.name) + column_expr = substitute_column(mapping, column_expr) + + if not keep_type: + column_expr = parse_column(mapping, column_expr) + column_expr = narrow_column(mapping, column_expr) + + return column_expr.alias(mapping.target.name) diff --git a/table/fairspec_table/actions/column/parse.py b/table/fairspec_table/actions/column/parse.py new file mode 100644 index 0000000..cf1720f --- /dev/null +++ b/table/fairspec_table/actions/column/parse.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_metadata import ( + Base64Column, + BooleanColumn, + DateColumn, + DateTimeColumn, + DecimalColumn, + EmailColumn, + HexColumn, + IntegerColumn, + ListColumn, + NumberColumn, + TimeColumn, + UrlColumn, +) + +from fairspec_table.models import ColumnMapping + +from .types.base64 import parse_base64_column +from .types.boolean import parse_boolean_column +from .types.date import parse_date_column +from .types.date_time import parse_date_time_column +from .types.decimal import parse_decimal_column +from .types.email import parse_email_column +from .types.hex import parse_hex_column +from .types.integer import parse_integer_column +from .types.list import parse_list_column +from .types.number import parse_number_column +from .types.time import parse_time_column +from .types.url import parse_url_column + + +def parse_column(mapping: ColumnMapping, column_expr: pl.Expr) -> pl.Expr: + if mapping.source.type != pl.String: + return column_expr + + column = mapping.target + if isinstance(column, Base64Column): + return parse_base64_column(column, column_expr) + elif isinstance(column, BooleanColumn): + return parse_boolean_column(column, column_expr) + elif isinstance(column, DateColumn): + return parse_date_column(column, column_expr) + elif isinstance(column, DateTimeColumn): + return parse_date_time_column(column, column_expr) + elif isinstance(column, DecimalColumn): + return parse_decimal_column(column, column_expr) + elif isinstance(column, EmailColumn): + return parse_email_column(column, column_expr) + elif isinstance(column, HexColumn): + return parse_hex_column(column, column_expr) + elif isinstance(column, IntegerColumn): + return parse_integer_column(column, column_expr) + elif isinstance(column, ListColumn): + return parse_list_column(column, column_expr) + elif isinstance(column, NumberColumn): + return parse_number_column(column, column_expr) + elif isinstance(column, TimeColumn): + return parse_time_column(column, column_expr) + elif isinstance(column, UrlColumn): + return parse_url_column(column, column_expr) + else: + return column_expr diff --git a/table/fairspec_table/actions/column/stringify.py b/table/fairspec_table/actions/column/stringify.py new file mode 100644 index 0000000..b6b625b --- /dev/null +++ b/table/fairspec_table/actions/column/stringify.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_metadata import ( + BooleanColumn, + DateColumn, + DateTimeColumn, + DecimalColumn, + IntegerColumn, + ListColumn, + NumberColumn, + TimeColumn, + UnknownColumn, +) + +from fairspec_table.models import ColumnMapping + +from .types.boolean import stringify_boolean_column +from .types.date import stringify_date_column +from .types.date_time import stringify_date_time_column +from .types.decimal import stringify_decimal_column +from .types.integer import stringify_integer_column +from .types.list import stringify_list_column +from .types.number import stringify_number_column +from .types.time import stringify_time_column +from .types.unknown import stringify_unknown_column + + +def stringify_column(mapping: ColumnMapping, column_expr: pl.Expr) -> pl.Expr: + if mapping.source.type == pl.String: + return column_expr + + column = mapping.target + if isinstance(column, BooleanColumn): + return stringify_boolean_column(column, column_expr) + elif isinstance(column, DateColumn): + return stringify_date_column(column, column_expr) + elif isinstance(column, DateTimeColumn): + return stringify_date_time_column(column, column_expr) + elif isinstance(column, DecimalColumn): + return stringify_decimal_column(column, column_expr) + elif isinstance(column, IntegerColumn): + return stringify_integer_column(column, column_expr) + elif isinstance(column, ListColumn): + return stringify_list_column(column, column_expr) + elif isinstance(column, NumberColumn): + return stringify_number_column(column, column_expr) + elif isinstance(column, TimeColumn): + return stringify_time_column(column, column_expr) + elif isinstance(column, UnknownColumn): + return stringify_unknown_column(column, column_expr) + else: + return column_expr diff --git a/table/fairspec_table/actions/column/substitute.py b/table/fairspec_table/actions/column/substitute.py new file mode 100644 index 0000000..0f90489 --- /dev/null +++ b/table/fairspec_table/actions/column/substitute.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_table.models import ColumnMapping, PolarsColumn + + +def substitute_column(mapping: ColumnMapping, column_expr: pl.Expr) -> pl.Expr: + missing_value_type = _get_missing_value_type(mapping.source) + if not missing_value_type: + return column_expr + + flatten_missing_values = [ + item.value if hasattr(item, "value") else item + for item in (mapping.target.property.missingValues or []) + ] + + compatible_missing_values = ( + [ + value + for value in flatten_missing_values + if isinstance(value, str) + if missing_value_type == "string" + ] + if missing_value_type == "string" + else [ + value for value in flatten_missing_values if isinstance(value, (int, float)) + ] + ) + + if not compatible_missing_values: + return column_expr + + return ( + pl.when(column_expr.is_in(compatible_missing_values)) + .then(pl.lit(None)) + .otherwise(column_expr) + .alias(mapping.target.name) + ) + + +def _get_missing_value_type(polars_column: PolarsColumn) -> str | None: + polars_type = polars_column.type + + if polars_type == pl.String: + return "string" + if polars_type in (pl.Int8, pl.Int16, pl.Int32, pl.Int64, pl.Float32, pl.Float64): + return "number" + + return None diff --git a/table/fairspec_table/actions/column/substitute_spec.py b/table/fairspec_table/actions/column/substitute_spec.py new file mode 100644 index 0000000..f8382c7 --- /dev/null +++ b/table/fairspec_table/actions/column/substitute_spec.py @@ -0,0 +1,274 @@ +from __future__ import annotations + +import polars as pl +import pytest + +from fairspec_metadata import IntegerColumn, StringColumn +from fairspec_metadata import IntegerColumnProperty +from fairspec_metadata import StringColumnProperty + +from fairspec_table.models import ColumnMapping, PolarsColumn + +from .substitute import substitute_column + + +class TestSubstituteColumnString: + @pytest.mark.parametrize( + "cell, expected", + [ + ("x", None), + ("-", "-"), + ("", ""), + ("value", "value"), + ], + ) + def test_missing_value_x(self, cell: str, expected: str | None): + table = pl.DataFrame({"name": [cell]}).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="name", type=pl.String), + target=StringColumn( + name="name", + type="string", + property=StringColumnProperty(missingValues=["x"]), + ), + ) + + result = table.select(substitute_column(mapping, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("-", None), + ("", ""), + ("x", "x"), + ("value", "value"), + ], + ) + def test_missing_value_dash(self, cell: str, expected: str | None): + table = pl.DataFrame({"name": [cell]}).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="name", type=pl.String), + target=StringColumn( + name="name", + type="string", + property=StringColumnProperty(missingValues=["-"]), + ), + ) + + result = table.select(substitute_column(mapping, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("", None), + ("-", "-"), + ("x", "x"), + ("value", "value"), + ], + ) + def test_missing_value_empty(self, cell: str, expected: str | None): + table = pl.DataFrame({"name": [cell]}).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="name", type=pl.String), + target=StringColumn( + name="name", + type="string", + property=StringColumnProperty(missingValues=[""]), + ), + ) + + result = table.select(substitute_column(mapping, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("n/a", None), + ("-", "-"), + ("", ""), + ("value", "value"), + ], + ) + def test_missing_value_na(self, cell: str, expected: str | None): + table = pl.DataFrame({"name": [cell]}).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="name", type=pl.String), + target=StringColumn( + name="name", + type="string", + property=StringColumnProperty(missingValues=["n/a"]), + ), + ) + + result = table.select(substitute_column(mapping, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("-", None), + ("x", None), + ("", ""), + ("value", "value"), + ], + ) + def test_multiple_missing_values(self, cell: str, expected: str | None): + table = pl.DataFrame({"name": [cell]}).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="name", type=pl.String), + target=StringColumn( + name="name", + type="string", + property=StringColumnProperty(missingValues=["-", "x"]), + ), + ) + + result = table.select(substitute_column(mapping, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + +class TestSubstituteColumnInteger: + @pytest.mark.parametrize( + "value, expected", + [ + (-1, None), + (0, 0), + (1, 1), + (42, 42), + ], + ) + def test_missing_value_int64(self, value: int, expected: int | None): + table = pl.DataFrame([pl.Series("value", [value], pl.Int64)]).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="value", type=pl.Int64), + target=IntegerColumn( + name="value", + type="integer", + property=IntegerColumnProperty(missingValues=[-1]), + ), + ) + + result = table.select(substitute_column(mapping, pl.col("value"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"value": expected}] + + @pytest.mark.parametrize( + "value, expected", + [ + (-999, None), + (0, 0), + (1, 1), + (100, 100), + ], + ) + def test_missing_value_neg999_int64(self, value: int, expected: int | None): + table = pl.DataFrame([pl.Series("value", [value], pl.Int64)]).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="value", type=pl.Int64), + target=IntegerColumn( + name="value", + type="integer", + property=IntegerColumnProperty(missingValues=[-999]), + ), + ) + + result = table.select(substitute_column(mapping, pl.col("value"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"value": expected}] + + @pytest.mark.parametrize( + "value, expected", + [ + (-1, None), + (-99, None), + (0, 0), + (42, 42), + ], + ) + def test_multiple_missing_values_int64(self, value: int, expected: int | None): + table = pl.DataFrame([pl.Series("value", [value], pl.Int64)]).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="value", type=pl.Int64), + target=IntegerColumn( + name="value", + type="integer", + property=IntegerColumnProperty(missingValues=[-1, -99]), + ), + ) + + result = table.select(substitute_column(mapping, pl.col("value"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"value": expected}] + + @pytest.mark.parametrize( + "value, expected", + [ + (-1, None), + (0, 0), + (1, 1), + (42, 42), + ], + ) + def test_missing_value_float64(self, value: int, expected: int | None): + table = pl.DataFrame([pl.Series("value", [float(value)], pl.Float64)]).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="value", type=pl.Float64), + target=IntegerColumn( + name="value", + type="integer", + property=IntegerColumnProperty(missingValues=[-1]), + ), + ) + + result = table.select(substitute_column(mapping, pl.col("value"))) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + row = frame.to_dicts()[0] + + if expected is None: + assert row["value"] is None + else: + assert row["value"] == float(expected) + + @pytest.mark.parametrize( + "value, expected", + [ + (-1, None), + (-99, None), + (0, 0), + (42, 42), + ], + ) + def test_multiple_missing_values_float64(self, value: int, expected: int | None): + table = pl.DataFrame([pl.Series("value", [float(value)], pl.Float64)]).lazy() + mapping = ColumnMapping( + source=PolarsColumn(name="value", type=pl.Float64), + target=IntegerColumn( + name="value", + type="integer", + property=IntegerColumnProperty(missingValues=[-1, -99]), + ), + ) + + result = table.select(substitute_column(mapping, pl.col("value"))) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + row = frame.to_dicts()[0] + + if expected is None: + assert row["value"] is None + else: + assert row["value"] == float(expected) diff --git a/table/fairspec_table/actions/column/types/__init__.py b/table/fairspec_table/actions/column/types/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/actions/column/types/array.py b/table/fairspec_table/actions/column/types/array.py new file mode 100644 index 0000000..e55b4eb --- /dev/null +++ b/table/fairspec_table/actions/column/types/array.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +import json + +from fairspec_metadata import ( + ArrayColumn, + CellJsonError, + CellTypeError, + ColumnType, + inspect_json, +) + +from fairspec_table.models.table import Table +from fairspec_table.settings import NUMBER_COLUMN_NAME + + +def inspect_array_column( + column: ArrayColumn, table: Table +) -> list[CellTypeError | CellJsonError]: + errors: list[CellTypeError | CellJsonError] = [] + + import polars as pl + + frame: pl.DataFrame = ( # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + table.with_row_index(NUMBER_COLUMN_NAME, 1) + .select(pl.col(NUMBER_COLUMN_NAME), pl.col(column.name).alias("source")) + .collect() + ) + + constraint_json_schema = column.property.model_dump( + exclude_none=True, by_alias=True + ) + + for row in frame.to_dicts(): + if row["source"] is None: + continue + + target = None + try: + target = json.loads(row["source"]) + except (json.JSONDecodeError, TypeError): + pass + + if target is None or not isinstance(target, list): + errors.append( + CellTypeError( + type="cell/type", + cell=str(row["source"]), + columnName=column.name, + columnType=ColumnType(column.type), + rowNumber=row[NUMBER_COLUMN_NAME], + ) + ) + continue + + if constraint_json_schema: + constraint_errors = inspect_json(target, json_schema=constraint_json_schema) + for error in constraint_errors: + errors.append( + CellJsonError( + type="cell/json", + cell=str(row["source"]), + columnName=column.name, + rowNumber=row[NUMBER_COLUMN_NAME], + message=error["message"], + jsonPointer=error["jsonPointer"], + ) + ) + + return errors diff --git a/table/fairspec_table/actions/column/types/array_spec.py b/table/fairspec_table/actions/column/types/array_spec.py new file mode 100644 index 0000000..d4a6367 --- /dev/null +++ b/table/fairspec_table/actions/column/types/array_spec.py @@ -0,0 +1,279 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_metadata import ArrayColumn, ArrayColumnProperty +from fairspec_metadata import CellJsonError, CellTypeError + +from .array import inspect_array_column + + +class TestInspectArrayColumn: + def test_valid_json_arrays(self): + table = pl.DataFrame( + { + "tags": ['["tag1","tag2"]', "[1,2,3]", '["a","b","c"]'], + } + ).lazy() + column = ArrayColumn( + name="tags", + type="array", + property=ArrayColumnProperty(), + ) + + errors = inspect_array_column(column, table) + + assert len(errors) == 0 + + def test_empty_arrays(self): + table = pl.DataFrame( + { + "items": ["[]", "[]", "[]"], + } + ).lazy() + column = ArrayColumn( + name="items", + type="array", + property=ArrayColumnProperty(), + ) + + errors = inspect_array_column(column, table) + + assert len(errors) == 0 + + def test_null_values(self): + table = pl.DataFrame( + { + "data": ['["value"]', None, "[1,2,3]"], + } + ).lazy() + column = ArrayColumn( + name="data", + type="array", + property=ArrayColumnProperty(), + ) + + errors = inspect_array_column(column, table) + + assert len(errors) == 0 + + def test_json_objects_error(self): + table = pl.DataFrame( + { + "data": ["[1,2,3]", '{"key":"value"}', '["a","b"]'], + } + ).lazy() + column = ArrayColumn( + name="data", + type="array", + property=ArrayColumnProperty(), + ) + + errors = inspect_array_column(column, table) + + assert len(errors) == 1 + assert isinstance(errors[0], CellTypeError) + assert errors[0].columnName == "data" + assert errors[0].columnType == "array" + assert errors[0].rowNumber == 2 + assert errors[0].cell == '{"key":"value"}' + + def test_invalid_json_error(self): + table = pl.DataFrame( + { + "data": ['["valid"]', "invalid json", "[1,2,3]", "[broken"], + } + ).lazy() + column = ArrayColumn( + name="data", + type="array", + property=ArrayColumnProperty(), + ) + + errors = inspect_array_column(column, table) + + type_errors = [e for e in errors if e.type == "cell/type"] + assert len(type_errors) == 2 + assert any(e.rowNumber == 2 and e.cell == "invalid json" for e in type_errors) + assert any(e.rowNumber == 4 and e.cell == "[broken" for e in type_errors) + + def test_nested_arrays(self): + table = pl.DataFrame( + { + "matrix": [ + "[[1,2],[3,4]]", + "[[5,6],[7,8]]", + '[["a","b"],["c","d"]]', + ], + } + ).lazy() + column = ArrayColumn( + name="matrix", + type="array", + property=ArrayColumnProperty(), + ) + + errors = inspect_array_column(column, table) + + assert len(errors) == 0 + + def test_empty_strings_error(self): + table = pl.DataFrame( + { + "data": ['["valid"]', "", "[1,2,3]"], + } + ).lazy() + column = ArrayColumn( + name="data", + type="array", + property=ArrayColumnProperty(), + ) + + errors = inspect_array_column(column, table) + + assert len(errors) == 1 + assert errors[0].type == "cell/type" + assert errors[0].rowNumber == 2 + assert errors[0].cell == "" + + def test_json_primitives_error(self): + table = pl.DataFrame( + { + "data": ['"string"', "123", "true", "false", "null"], + } + ).lazy() + column = ArrayColumn( + name="data", + type="array", + property=ArrayColumnProperty(), + ) + + errors = inspect_array_column(column, table) + + assert len(errors) == 5 + assert errors[0].rowNumber == 1 + assert errors[0].cell == '"string"' + assert errors[1].rowNumber == 2 + assert errors[1].cell == "123" + assert errors[2].rowNumber == 3 + assert errors[2].cell == "true" + assert errors[3].rowNumber == 4 + assert errors[3].cell == "false" + assert errors[4].rowNumber == 5 + assert errors[4].cell == "null" + + def test_valid_arrays_matching_json_schema(self): + table = pl.DataFrame( + { + "scores": ["[80,90,100]", "[75,85,95]", "[90,95,100]"], + } + ).lazy() + column = ArrayColumn( + name="scores", + type="array", + property=ArrayColumnProperty( + items={"type": "number"}, + minItems=3, + maxItems=3, + ), + ) + + errors = inspect_array_column(column, table) + + assert len(errors) == 0 + + def test_arrays_not_matching_json_schema(self): + table = pl.DataFrame( + { + "numbers": [ + "[1,2,3]", + '["not","numbers"]', + "[1]", + "[4,5,6]", + ], + } + ).lazy() + column = ArrayColumn( + name="numbers", + type="array", + property=ArrayColumnProperty( + items={"type": "number"}, + minItems=2, + ), + ) + + errors = inspect_array_column(column, table) + + json_errors = [e for e in errors if isinstance(e, CellJsonError)] + assert len(json_errors) == 3 + assert json_errors[0].rowNumber == 2 + assert json_errors[0].cell == '["not","numbers"]' + assert json_errors[0].message == "'not' is not of type 'number'" + assert json_errors[0].jsonPointer == "/0" + assert json_errors[1].rowNumber == 2 + assert json_errors[1].message == "'numbers' is not of type 'number'" + assert json_errors[1].jsonPointer == "/1" + assert json_errors[2].rowNumber == 3 + assert json_errors[2].cell == "[1]" + assert json_errors[2].message == "[1] is too short" + assert json_errors[2].jsonPointer == "/" + + def test_complex_json_schema_with_array_of_objects(self): + table = pl.DataFrame( + { + "users": [ + '[{"name":"John","age":30},{"name":"Jane","age":25}]', + '[{"name":"Bob","age":"invalid"}]', + ], + } + ).lazy() + column = ArrayColumn( + name="users", + type="array", + property=ArrayColumnProperty( + items={ + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "number"}, + }, + "required": ["name", "age"], + }, + ), + ) + + errors = inspect_array_column(column, table) + + assert len(errors) == 1 + assert isinstance(errors[0], CellJsonError) + assert errors[0].rowNumber == 2 + assert errors[0].cell == '[{"name":"Bob","age":"invalid"}]' + assert errors[0].message == "'invalid' is not of type 'number'" + assert errors[0].jsonPointer == "/0/age" + + def test_json_schema_with_unique_items(self): + table = pl.DataFrame( + { + "tags": [ + '["unique","values"]', + '["duplicate","duplicate"]', + ], + } + ).lazy() + column = ArrayColumn( + name="tags", + type="array", + property=ArrayColumnProperty( + items={"type": "string"}, + uniqueItems=True, + ), + ) + + errors = inspect_array_column(column, table) + + assert len(errors) == 1 + assert isinstance(errors[0], CellJsonError) + assert errors[0].rowNumber == 2 + assert errors[0].cell == '["duplicate","duplicate"]' + assert "has non-unique elements" in errors[0].message + assert errors[0].jsonPointer == "/" diff --git a/table/fairspec_table/actions/column/types/base64.py b/table/fairspec_table/actions/column/types/base64.py new file mode 100644 index 0000000..a27c44f --- /dev/null +++ b/table/fairspec_table/actions/column/types/base64.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_metadata import Base64Column + +from fairspec_table.settings import BASE64_REGEX + + +def parse_base64_column(column: Base64Column, column_expr: pl.Expr) -> pl.Expr: + return ( + pl.when(column_expr.str.contains(BASE64_REGEX)) + .then(column_expr) + .otherwise(pl.lit(None)) + .alias(column.name) + ) diff --git a/table/fairspec_table/actions/column/types/base64_spec.py b/table/fairspec_table/actions/column/types/base64_spec.py new file mode 100644 index 0000000..4c840da --- /dev/null +++ b/table/fairspec_table/actions/column/types/base64_spec.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +import polars as pl +import pytest + +from fairspec_metadata import Base64Column, Base64ColumnProperty + +from .base64 import parse_base64_column + + +COLUMN = Base64Column( + name="name", + type="base64", + property=Base64ColumnProperty(), +) + + +class TestParseBase64Column: + @pytest.mark.parametrize( + "cell, expected", + [ + ("SGVsbG8gV29ybGQ=", "SGVsbG8gV29ybGQ="), + ("YWJjZGVm", "YWJjZGVm"), + ("!!!invalid!!!", None), + ("not base64", None), + ], + ) + def test_parse(self, cell: str, expected: str | None): + table = pl.DataFrame({"name": [cell]}).lazy() + result = table.select(parse_base64_column(COLUMN, pl.col("name"))) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] diff --git a/table/fairspec_table/actions/column/types/boolean.py b/table/fairspec_table/actions/column/types/boolean.py new file mode 100644 index 0000000..585bcbc --- /dev/null +++ b/table/fairspec_table/actions/column/types/boolean.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_metadata import BooleanColumn + +DEFAULT_TRUE_VALUES = ["true", "True", "TRUE", "1"] +DEFAULT_FALSE_VALUES = ["false", "False", "FALSE", "0"] + +DEFAULT_TRUE_VALUE = "true" +DEFAULT_FALSE_VALUE = "false" + + +def parse_boolean_column(column: BooleanColumn, column_expr: pl.Expr) -> pl.Expr: + true_values = column.property.trueValues or DEFAULT_TRUE_VALUES + false_values = column.property.falseValues or DEFAULT_FALSE_VALUES + + for value in true_values: + column_expr = column_expr.str.replace(f"^{value}$", "1", literal=False) + for value in false_values: + column_expr = column_expr.str.replace(f"^{value}$", "0", literal=False) + + column_expr = column_expr.cast(pl.Int8, strict=False) + + return ( + pl.when(column_expr.eq(1)) + .then(pl.lit(True)) + .when(column_expr.eq(0)) + .then(pl.lit(False)) + .otherwise(pl.lit(None)) + .alias(column.name) + ) + + +def stringify_boolean_column(column: BooleanColumn, column_expr: pl.Expr) -> pl.Expr: + true_value = ( + column.property.trueValues[0] + if column.property.trueValues + else DEFAULT_TRUE_VALUE + ) + false_value = ( + column.property.falseValues[0] + if column.property.falseValues + else DEFAULT_FALSE_VALUE + ) + + return ( + pl.when(column_expr.eq(pl.lit(True))) + .then(pl.lit(true_value)) + .otherwise(pl.lit(false_value)) + .alias(column.name) + ) diff --git a/table/fairspec_table/actions/column/types/boolean_spec.py b/table/fairspec_table/actions/column/types/boolean_spec.py new file mode 100644 index 0000000..63415c6 --- /dev/null +++ b/table/fairspec_table/actions/column/types/boolean_spec.py @@ -0,0 +1,207 @@ +from __future__ import annotations + +import polars as pl +import pytest + +from fairspec_metadata import BooleanColumn, BooleanColumnProperty + +from .boolean import parse_boolean_column, stringify_boolean_column + + +class TestParseBooleanColumn: + @pytest.mark.parametrize( + "cell, expected", + [ + ("true", True), + ("True", True), + ("TRUE", True), + ("1", True), + ("false", False), + ("False", False), + ("FALSE", False), + ("0", False), + ("", None), + ("invalid", None), + ("truthy", None), + ("falsy", None), + ("2", None), + ("-100", None), + ("t", None), + ("f", None), + ("3.14", None), + ], + ) + def test_default(self, cell: str, expected: bool | None): + table = pl.DataFrame({"name": [cell]}).lazy() + column = BooleanColumn( + name="name", + type="boolean", + property=BooleanColumnProperty(), + ) + + result = table.select(parse_boolean_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("Y", True), + ("y", True), + ("yes", True), + ("true", None), + ], + ) + def test_true_values(self, cell: str, expected: bool | None): + table = pl.DataFrame({"name": [cell]}).lazy() + column = BooleanColumn( + name="name", + type="boolean", + property=BooleanColumnProperty( + trueValues=["Y", "y", "yes"], + ), + ) + + result = table.select(parse_boolean_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("N", False), + ("n", False), + ("no", False), + ("false", None), + ], + ) + def test_false_values(self, cell: str, expected: bool | None): + table = pl.DataFrame({"name": [cell]}).lazy() + column = BooleanColumn( + name="name", + type="boolean", + property=BooleanColumnProperty( + falseValues=["N", "n", "no"], + ), + ) + + result = table.select(parse_boolean_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("oui", True), + ("si", True), + ("non", False), + ("no", False), + ], + ) + def test_true_values_and_false_values(self, cell: str, expected: bool | None): + table = pl.DataFrame({"name": [cell]}).lazy() + column = BooleanColumn( + name="name", + type="boolean", + property=BooleanColumnProperty( + trueValues=["oui", "si"], + falseValues=["non", "no"], + ), + ) + + result = table.select(parse_boolean_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + +class TestStringifyBooleanColumn: + @pytest.mark.parametrize( + "value, expected", + [ + (True, "true"), + (False, "false"), + ], + ) + def test_default(self, value: bool, expected: str): + table = pl.DataFrame({"name": [value]}).lazy() + column = BooleanColumn( + name="name", + type="boolean", + property=BooleanColumnProperty(), + ) + + result = table.select(stringify_boolean_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "value, expected", + [ + (True, "Y"), + (False, "false"), + ], + ) + def test_true_values(self, value: bool, expected: str): + table = pl.DataFrame({"name": [value]}).lazy() + column = BooleanColumn( + name="name", + type="boolean", + property=BooleanColumnProperty( + trueValues=["Y", "y", "yes"], + ), + ) + + result = table.select(stringify_boolean_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "value, expected", + [ + (True, "true"), + (False, "N"), + ], + ) + def test_false_values(self, value: bool, expected: str): + table = pl.DataFrame({"name": [value]}).lazy() + column = BooleanColumn( + name="name", + type="boolean", + property=BooleanColumnProperty( + falseValues=["N", "n", "no"], + ), + ) + + result = table.select(stringify_boolean_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "value, expected", + [ + (True, "oui"), + (False, "non"), + ], + ) + def test_true_values_and_false_values(self, value: bool, expected: str): + table = pl.DataFrame({"name": [value]}).lazy() + column = BooleanColumn( + name="name", + type="boolean", + property=BooleanColumnProperty( + trueValues=["oui", "si"], + falseValues=["non", "no"], + ), + ) + + result = table.select(stringify_boolean_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] diff --git a/table/fairspec_table/actions/column/types/date.py b/table/fairspec_table/actions/column/types/date.py new file mode 100644 index 0000000..bf773ff --- /dev/null +++ b/table/fairspec_table/actions/column/types/date.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_metadata import DateColumn + +DEFAULT_FORMAT = "%Y-%m-%d" + + +def parse_date_column(column: DateColumn, column_expr: pl.Expr) -> pl.Expr: + fmt = column.property.temporalFormat or DEFAULT_FORMAT + return column_expr.str.strptime(pl.Date, fmt, strict=False) + + +def stringify_date_column(column: DateColumn, column_expr: pl.Expr) -> pl.Expr: + fmt = column.property.temporalFormat or DEFAULT_FORMAT + return column_expr.dt.strftime(fmt) diff --git a/table/fairspec_table/actions/column/types/date_spec.py b/table/fairspec_table/actions/column/types/date_spec.py new file mode 100644 index 0000000..b45e81a --- /dev/null +++ b/table/fairspec_table/actions/column/types/date_spec.py @@ -0,0 +1,153 @@ +from __future__ import annotations + +import datetime + +import polars as pl +import pytest + +from fairspec_metadata import DateColumn, DateColumnProperty + +from .date import parse_date_column, stringify_date_column + + +class TestParseDateColumn: + @pytest.mark.parametrize( + "cell, expected", + [ + ("2019-01-01", datetime.date(2019, 1, 1)), + ("10th Jan 1969", None), + ("invalid", None), + ("", None), + ], + ) + def test_default(self, cell: str, expected: datetime.date | None): + table = pl.DataFrame({"name": [cell]}).lazy() + column = DateColumn( + name="name", + type="date", + property=DateColumnProperty(), + ) + + result = table.select(parse_date_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("21/11/2006", datetime.date(2006, 11, 21)), + ("invalid", None), + ("", None), + ], + ) + def test_temporal_format_dmy(self, cell: str, expected: datetime.date | None): + table = pl.DataFrame({"name": [cell]}).lazy() + column = DateColumn( + name="name", + type="date", + property=DateColumnProperty(temporalFormat="%d/%m/%Y"), + ) + + result = table.select(parse_date_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("2006/11/21", datetime.date(2006, 11, 21)), + ], + ) + def test_temporal_format_ymd(self, cell: str, expected: datetime.date): + table = pl.DataFrame({"name": [cell]}).lazy() + column = DateColumn( + name="name", + type="date", + property=DateColumnProperty(temporalFormat="%Y/%m/%d"), + ) + + result = table.select(parse_date_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("21/11/06", None), + ], + ) + def test_invalid_temporal_format(self, cell: str, expected: datetime.date | None): + table = pl.DataFrame({"name": [cell]}).lazy() + column = DateColumn( + name="name", + type="date", + property=DateColumnProperty(temporalFormat="invalid"), + ) + + result = table.select(parse_date_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + +class TestStringifyDateColumn: + @pytest.mark.parametrize( + "value, expected", + [ + (datetime.date(2019, 1, 1), "2019-01-01"), + (datetime.date(2006, 11, 21), "2006-11-21"), + ], + ) + def test_default(self, value: datetime.date, expected: str): + table = pl.DataFrame({"name": pl.Series([value], dtype=pl.Date)}).lazy() + column = DateColumn( + name="name", + type="date", + property=DateColumnProperty(), + ) + + result = table.select(stringify_date_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "value, expected", + [ + (datetime.date(2006, 11, 21), "21/11/2006"), + ], + ) + def test_temporal_format_dmy(self, value: datetime.date, expected: str): + table = pl.DataFrame({"name": pl.Series([value], dtype=pl.Date)}).lazy() + column = DateColumn( + name="name", + type="date", + property=DateColumnProperty(temporalFormat="%d/%m/%Y"), + ) + + result = table.select(stringify_date_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "value, expected", + [ + (datetime.date(2006, 11, 21), "2006/11/21"), + ], + ) + def test_temporal_format_ymd(self, value: datetime.date, expected: str): + table = pl.DataFrame({"name": pl.Series([value], dtype=pl.Date)}).lazy() + column = DateColumn( + name="name", + type="date", + property=DateColumnProperty(temporalFormat="%Y/%m/%d"), + ) + + result = table.select(stringify_date_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] diff --git a/table/fairspec_table/actions/column/types/date_time.py b/table/fairspec_table/actions/column/types/date_time.py new file mode 100644 index 0000000..5d41653 --- /dev/null +++ b/table/fairspec_table/actions/column/types/date_time.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_metadata import DateTimeColumn + +DEFAULT_FORMAT = "%Y-%m-%dT%H:%M:%S" + + +def parse_date_time_column(column: DateTimeColumn, column_expr: pl.Expr) -> pl.Expr: + fmt = column.property.temporalFormat or DEFAULT_FORMAT + return column_expr.str.strptime(pl.Datetime, fmt, strict=False) + + +def stringify_date_time_column(column: DateTimeColumn, column_expr: pl.Expr) -> pl.Expr: + fmt = column.property.temporalFormat or DEFAULT_FORMAT + return column_expr.dt.strftime(fmt) diff --git a/table/fairspec_table/actions/column/types/date_time_spec.py b/table/fairspec_table/actions/column/types/date_time_spec.py new file mode 100644 index 0000000..2cdfe43 --- /dev/null +++ b/table/fairspec_table/actions/column/types/date_time_spec.py @@ -0,0 +1,163 @@ +from __future__ import annotations + +import datetime + +import polars as pl +import pytest + +from fairspec_metadata import DateTimeColumn, DateTimeColumnProperty + +from .date_time import parse_date_time_column, stringify_date_time_column + + +class TestParseDateTimeColumn: + @pytest.mark.parametrize( + "cell, expected", + [ + ( + "2014-01-01T06:00:00", + datetime.datetime(2014, 1, 1, 6, 0, 0), + ), + ("Mon 1st Jan 2014 9 am", None), + ("invalid", None), + ("", None), + ], + ) + def test_default(self, cell: str, expected: datetime.datetime | None): + table = pl.DataFrame({"name": [cell]}).lazy() + column = DateTimeColumn( + name="name", + type="date-time", + property=DateTimeColumnProperty(), + ) + + result = table.select(parse_date_time_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ( + "21/11/2006 16:30", + datetime.datetime(2006, 11, 21, 16, 30), + ), + ("invalid", None), + ("", None), + ], + ) + def test_temporal_format(self, cell: str, expected: datetime.datetime | None): + table = pl.DataFrame({"name": [cell]}).lazy() + column = DateTimeColumn( + name="name", + type="date-time", + property=DateTimeColumnProperty( + format="date-time", + temporalFormat="%d/%m/%Y %H:%M", + ), + ) + + result = table.select(parse_date_time_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("21/11/06 16:30", None), + ], + ) + def test_invalid_temporal_format( + self, cell: str, expected: datetime.datetime | None + ): + table = pl.DataFrame({"name": [cell]}).lazy() + column = DateTimeColumn( + name="name", + type="date-time", + property=DateTimeColumnProperty(temporalFormat="invalid"), + ) + + result = table.select(parse_date_time_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + +class TestStringifyDateTimeColumn: + @pytest.mark.parametrize( + "value, expected", + [ + ( + datetime.datetime(2014, 1, 1, 6, 0, 0), + "2014-01-01T06:00:00", + ), + ( + datetime.datetime(2006, 11, 21, 16, 30, 0), + "2006-11-21T16:30:00", + ), + ], + ) + def test_default(self, value: datetime.datetime, expected: str): + table = pl.DataFrame({"name": pl.Series([value], dtype=pl.Datetime)}).lazy() + column = DateTimeColumn( + name="name", + type="date-time", + property=DateTimeColumnProperty(), + ) + + result = table.select(stringify_date_time_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "value, expected", + [ + ( + datetime.datetime(2006, 11, 21, 16, 30, 0), + "21/11/2006 16:30", + ), + ], + ) + def test_temporal_format_dmy_hm(self, value: datetime.datetime, expected: str): + table = pl.DataFrame({"name": pl.Series([value], dtype=pl.Datetime)}).lazy() + column = DateTimeColumn( + name="name", + type="date-time", + property=DateTimeColumnProperty( + format="date-time", + temporalFormat="%d/%m/%Y %H:%M", + ), + ) + + result = table.select(stringify_date_time_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "value, expected", + [ + ( + datetime.datetime(2014, 1, 1, 6, 0, 0), + "2014/01/01T06:00:00", + ), + ], + ) + def test_temporal_format_ymd_hms(self, value: datetime.datetime, expected: str): + table = pl.DataFrame({"name": pl.Series([value], dtype=pl.Datetime)}).lazy() + column = DateTimeColumn( + name="name", + type="date-time", + property=DateTimeColumnProperty( + format="date-time", + temporalFormat="%Y/%m/%dT%H:%M:%S", + ), + ) + + result = table.select(stringify_date_time_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] diff --git a/table/fairspec_table/actions/column/types/decimal.py b/table/fairspec_table/actions/column/types/decimal.py new file mode 100644 index 0000000..c9d2741 --- /dev/null +++ b/table/fairspec_table/actions/column/types/decimal.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +import re + +import polars as pl + +from fairspec_metadata import DecimalColumn + + +def parse_decimal_column(column: DecimalColumn, column_expr: pl.Expr) -> pl.Expr: + decimal_char = column.property.decimalChar or "." + group_char = column.property.groupChar or "" + with_text = column.property.withText + + if group_char == "." and decimal_char == ",": + column_expr = column_expr.str.replace_all(",", "###DECIMAL###") + column_expr = column_expr.str.replace_all(r"\.", "") + column_expr = column_expr.str.replace_all("###DECIMAL###", ".") + else: + if group_char: + escaped_group_char = re.escape(group_char) + column_expr = column_expr.str.replace_all(escaped_group_char, "") + + if decimal_char and decimal_char != ".": + column_expr = column_expr.str.replace_all(re.escape(decimal_char), ".") + + if with_text: + column_expr = column_expr.str.replace_all(r"[^\d\-.e]", "") + + column_expr = column_expr.cast(pl.Decimal(scale=18), strict=False) + return column_expr + + +def stringify_decimal_column(_column: DecimalColumn, column_expr: pl.Expr) -> pl.Expr: + column_expr = column_expr.cast(pl.String) + return column_expr diff --git a/table/fairspec_table/actions/column/types/decimal_spec.py b/table/fairspec_table/actions/column/types/decimal_spec.py new file mode 100644 index 0000000..65b5035 --- /dev/null +++ b/table/fairspec_table/actions/column/types/decimal_spec.py @@ -0,0 +1,204 @@ +from __future__ import annotations + +from decimal import Decimal + +import polars as pl +import pytest + +from fairspec_metadata import DecimalColumn, DecimalColumnProperty + +from .decimal import parse_decimal_column, stringify_decimal_column + + +class TestParseDecimalColumn: + @pytest.mark.parametrize( + "cell, expected", + [ + ("1", Decimal("1")), + ("2", Decimal("2")), + ("1000", Decimal("1000")), + ("1.5", Decimal("1.5")), + ("4.14159", Decimal("4.14159")), + ("-42", Decimal("-42")), + ("-3.14", Decimal("-3.14")), + ("", None), + ("bad", None), + ("text", None), + ], + ) + def test_default(self, cell: str, expected: Decimal | None): + table = pl.DataFrame({"name": [cell]}).lazy() + column = DecimalColumn( + name="name", + type="decimal", + property=DecimalColumnProperty(), + ) + + result = table.select(parse_decimal_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("1", Decimal("1")), + ("1,000", Decimal("1000")), + ("1,000,000", Decimal("1000000")), + ("1,234.56", Decimal("1234.56")), + ], + ) + def test_group_char_comma(self, cell: str, expected: Decimal): + table = pl.DataFrame({"name": [cell]}).lazy() + column = DecimalColumn( + name="name", + type="decimal", + property=DecimalColumnProperty(groupChar=","), + ) + + result = table.select(parse_decimal_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("1,5", Decimal("1.5")), + ("3,14", Decimal("3.14")), + ], + ) + def test_decimal_char_comma(self, cell: str, expected: Decimal): + table = pl.DataFrame({"name": [cell]}).lazy() + column = DecimalColumn( + name="name", + type="decimal", + property=DecimalColumnProperty(decimalChar=","), + ) + + result = table.select(parse_decimal_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("1.234,56", Decimal("1234.56")), + ("1.000,00", Decimal("1000.00")), + ], + ) + def test_group_char_dot_decimal_char_comma(self, cell: str, expected: Decimal): + table = pl.DataFrame({"name": [cell]}).lazy() + column = DecimalColumn( + name="name", + type="decimal", + property=DecimalColumnProperty( + format="decimal", groupChar=".", decimalChar="," + ), + ) + + result = table.select(parse_decimal_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("$1.5", Decimal("1.5")), + ("1.5%", Decimal("1.5")), + ("\u20ac1000", Decimal("1000")), + ("1000\u20ac", Decimal("1000")), + ], + ) + def test_with_text(self, cell: str, expected: Decimal): + table = pl.DataFrame({"name": [cell]}).lazy() + column = DecimalColumn( + name="name", + type="decimal", + property=DecimalColumnProperty(withText=True), + ) + + result = table.select(parse_decimal_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("$1,000.00", Decimal("1000.00")), + ("1,234.56$", Decimal("1234.56")), + ], + ) + def test_with_text_and_group_char(self, cell: str, expected: Decimal): + table = pl.DataFrame({"name": [cell]}).lazy() + column = DecimalColumn( + name="name", + type="decimal", + property=DecimalColumnProperty( + format="decimal", withText=True, groupChar="," + ), + ) + + result = table.select(parse_decimal_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("\u20ac 1.000,00", Decimal("1000.00")), + ("1.000,00 \u20ac", Decimal("1000.00")), + ("1.234,56 \u20ac", Decimal("1234.56")), + ], + ) + def test_with_text_group_char_dot_decimal_char_comma( + self, cell: str, expected: Decimal + ): + table = pl.DataFrame({"name": [cell]}).lazy() + column = DecimalColumn( + name="name", + type="decimal", + property=DecimalColumnProperty( + format="decimal", + withText=True, + groupChar=".", + decimalChar=",", + ), + ) + + result = table.select(parse_decimal_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + +class TestStringifyDecimalColumn: + @pytest.mark.parametrize( + "value, expected", + [ + (Decimal("1.0"), "1.0"), + (Decimal("2.0"), "2.0"), + (Decimal("1000.0"), "1000.0"), + (Decimal("3.14"), "3.14"), + (Decimal("42.5"), "42.5"), + (Decimal("-1.0"), "-1.0"), + (Decimal("-100.5"), "-100.5"), + (Decimal("0.0"), "0.0"), + ], + ) + def test_default(self, value: Decimal, expected: str): + table = pl.DataFrame({"name": pl.Series([value], dtype=pl.Decimal)}).lazy() + column = DecimalColumn( + name="name", + type="decimal", + property=DecimalColumnProperty(), + ) + + result = table.select(stringify_decimal_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] diff --git a/table/fairspec_table/actions/column/types/duration.py b/table/fairspec_table/actions/column/types/duration.py new file mode 100644 index 0000000..576d3f7 --- /dev/null +++ b/table/fairspec_table/actions/column/types/duration.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +import isodate +import polars as pl + +from fairspec_metadata import CellTypeError, ColumnType, DurationColumn +from fairspec_metadata import CellError + +from fairspec_table.models.table import Table +from fairspec_table.settings import NUMBER_COLUMN_NAME + + +def inspect_duration_column(column: DurationColumn, table: Table) -> list[CellError]: + errors: list[CellError] = [] + + frame: pl.DataFrame = ( # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + table.with_row_index(NUMBER_COLUMN_NAME, 1) + .select(pl.col(NUMBER_COLUMN_NAME), pl.col(column.name).alias("source")) + .collect() + ) + + for row in frame.to_dicts(): + if row["source"] is None: + continue + + target = None + try: + target = isodate.parse_duration(row["source"]) + except Exception: + pass + + if target is None: + errors.append( + CellTypeError( + type="cell/type", + cell=str(row["source"]), + columnName=column.name, + columnType=ColumnType(column.type), + rowNumber=row[NUMBER_COLUMN_NAME], + ) + ) + + return errors diff --git a/table/fairspec_table/actions/column/types/duration_spec.py b/table/fairspec_table/actions/column/types/duration_spec.py new file mode 100644 index 0000000..ca114a0 --- /dev/null +++ b/table/fairspec_table/actions/column/types/duration_spec.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +import polars as pl +import pytest + +from fairspec_metadata import DurationColumn, DurationColumnProperty + +from .duration import inspect_duration_column + + +COLUMN = DurationColumn( + name="name", + type="duration", + property=DurationColumnProperty(), +) + + +class TestInspectDurationColumn: + @pytest.mark.parametrize( + "cell, valid", + [ + ("P23DT23H", True), + ("P1Y2M3DT4H5M6S", True), + ("PT30M", True), + ("P1D", True), + ("PT1H", True), + ("P1W", True), + ("PT0S", True), + ("ghijkl", False), + ("0x1234", False), + ("hello world", False), + ], + ) + def test_inspect(self, cell: str, valid: bool): + table = pl.DataFrame({"name": [cell]}).lazy() + errors = inspect_duration_column(COLUMN, table) + assert (len(errors) == 0) == valid diff --git a/table/fairspec_table/actions/column/types/email.py b/table/fairspec_table/actions/column/types/email.py new file mode 100644 index 0000000..2cb96aa --- /dev/null +++ b/table/fairspec_table/actions/column/types/email.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_metadata import EmailColumn + +from fairspec_table.settings import RFC5322_EMAIL_REGEX + + +def parse_email_column(column: EmailColumn, column_expr: pl.Expr) -> pl.Expr: + return ( + pl.when(column_expr.str.contains(RFC5322_EMAIL_REGEX)) + .then(column_expr) + .otherwise(pl.lit(None)) + .alias(column.name) + ) diff --git a/table/fairspec_table/actions/column/types/email_spec.py b/table/fairspec_table/actions/column/types/email_spec.py new file mode 100644 index 0000000..31b3b91 --- /dev/null +++ b/table/fairspec_table/actions/column/types/email_spec.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +import polars as pl +import pytest + +from fairspec_metadata import EmailColumn, EmailColumnProperty + +from .email import parse_email_column + + +COLUMN = EmailColumn( + name="name", + type="email", + property=EmailColumnProperty(), +) + + +class TestParseEmailColumn: + @pytest.mark.parametrize( + "cell, expected", + [ + ("user@example.com", "user@example.com"), + ("test.name@domain.org", "test.name@domain.org"), + ("user+tag@example.co.uk", "user+tag@example.co.uk"), + ("", None), + ("invalid", None), + ("@example.com", None), + ("user@", None), + ("user example.com", None), + ], + ) + def test_parse(self, cell: str, expected: str | None): + table = pl.DataFrame({"name": [cell]}).lazy() + result = table.select(parse_email_column(COLUMN, pl.col("name"))) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] diff --git a/table/fairspec_table/actions/column/types/geojson.py b/table/fairspec_table/actions/column/types/geojson.py new file mode 100644 index 0000000..e58151e --- /dev/null +++ b/table/fairspec_table/actions/column/types/geojson.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +import json +from importlib.resources import files + +from fairspec_metadata import CellTypeError, ColumnType, GeojsonColumn, inspect_json + +from fairspec_table.models.table import Table +from fairspec_table.settings import NUMBER_COLUMN_NAME + + +def inspect_geojson_column(column: GeojsonColumn, table: Table) -> list[CellTypeError]: + errors: list[CellTypeError] = [] + + import polars as pl + + frame: pl.DataFrame = ( # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + table.with_row_index(NUMBER_COLUMN_NAME, 1) + .select(pl.col(NUMBER_COLUMN_NAME), pl.col(column.name).alias("source")) + .collect() + ) + + type_json_schema = _load_geojson_schema() + + for row in frame.to_dicts(): + if row["source"] is None: + continue + + target = None + try: + target = json.loads(row["source"]) + except (json.JSONDecodeError, TypeError): + pass + + if target is None or not isinstance(target, dict): + errors.append( + CellTypeError( + type="cell/type", + cell=str(row["source"]), + columnName=column.name, + columnType=ColumnType(column.type), + rowNumber=row[NUMBER_COLUMN_NAME], + ) + ) + continue + + format_errors = inspect_json(target, json_schema=type_json_schema) + if format_errors: + errors.append( + CellTypeError( + type="cell/type", + cell=str(row["source"]), + columnName=column.name, + columnType=ColumnType(column.type), + rowNumber=row[NUMBER_COLUMN_NAME], + ) + ) + + return errors + + +def _load_geojson_schema() -> dict[str, object]: + schema_file = files("fairspec_table.schemas").joinpath("geojson.json") + return json.loads(schema_file.read_text(encoding="utf-8")) diff --git a/table/fairspec_table/actions/column/types/geojson_spec.py b/table/fairspec_table/actions/column/types/geojson_spec.py new file mode 100644 index 0000000..579d864 --- /dev/null +++ b/table/fairspec_table/actions/column/types/geojson_spec.py @@ -0,0 +1,313 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_metadata import GeojsonColumn, GeojsonColumnProperty + +from .geojson import inspect_geojson_column + + +class TestInspectGeojsonColumn: + def test_valid_geojson_point(self): + table = pl.DataFrame( + { + "location": [ + '{"type":"Point","coordinates":[0,0]}', + '{"type":"Point","coordinates":[12.5,41.9]}', + '{"type":"Point","coordinates":[-73.9,40.7]}', + ], + } + ).lazy() + column = GeojsonColumn( + name="location", + type="geojson", + property=GeojsonColumnProperty(), + ) + + errors = inspect_geojson_column(column, table) + + assert len(errors) == 0 + + def test_valid_geojson_geometries(self): + table = pl.DataFrame( + { + "geometry": [ + '{"type":"LineString","coordinates":[[0,0],[1,1]]}', + '{"type":"Polygon","coordinates":[[[0,0],[1,0],[1,1],[0,1],[0,0]]]}', + '{"type":"MultiPoint","coordinates":[[0,0],[1,1]]}', + ], + } + ).lazy() + column = GeojsonColumn( + name="geometry", + type="geojson", + property=GeojsonColumnProperty(), + ) + + errors = inspect_geojson_column(column, table) + + assert len(errors) == 0 + + def test_valid_geojson_feature(self): + table = pl.DataFrame( + { + "feature": [ + '{"type":"Feature","geometry":{"type":"Point","coordinates":[0,0]},"properties":{"name":"Test"}}', + '{"type":"Feature","geometry":{"type":"LineString","coordinates":[[0,0],[1,1]]},"properties":{"id":1}}', + '{"type":"Feature","geometry":null,"properties":{}}', + ], + } + ).lazy() + column = GeojsonColumn( + name="feature", + type="geojson", + property=GeojsonColumnProperty(), + ) + + errors = inspect_geojson_column(column, table) + + assert len(errors) == 0 + + def test_valid_geojson_feature_collection(self): + table = pl.DataFrame( + { + "collection": [ + '{"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Point","coordinates":[0,0]},"properties":{}}]}', + '{"type":"FeatureCollection","features":[]}', + ], + } + ).lazy() + column = GeojsonColumn( + name="collection", + type="geojson", + property=GeojsonColumnProperty(), + ) + + errors = inspect_geojson_column(column, table) + + assert len(errors) == 0 + + def test_null_values(self): + table = pl.DataFrame( + { + "location": [ + '{"type":"Point","coordinates":[0,0]}', + None, + '{"type":"Feature","geometry":null,"properties":{}}', + ], + } + ).lazy() + column = GeojsonColumn( + name="location", + type="geojson", + property=GeojsonColumnProperty(), + ) + + errors = inspect_geojson_column(column, table) + + assert len(errors) == 0 + + def test_json_arrays_error(self): + table = pl.DataFrame( + { + "data": [ + '{"type":"Point","coordinates":[0,0]}', + "[[0,0],[1,1]]", + '{"type":"Feature","geometry":null,"properties":{}}', + ], + } + ).lazy() + column = GeojsonColumn( + name="data", + type="geojson", + property=GeojsonColumnProperty(), + ) + + errors = inspect_geojson_column(column, table) + + assert len(errors) == 1 + assert errors[0].type == "cell/type" + assert errors[0].columnName == "data" + assert errors[0].columnType == "geojson" + assert errors[0].rowNumber == 2 + assert errors[0].cell == "[[0,0],[1,1]]" + + def test_invalid_json_error(self): + table = pl.DataFrame( + { + "data": [ + '{"type":"Point","coordinates":[0,0]}', + "invalid json", + "{broken}", + ], + } + ).lazy() + column = GeojsonColumn( + name="data", + type="geojson", + property=GeojsonColumnProperty(), + ) + + errors = inspect_geojson_column(column, table) + + type_errors = [e for e in errors if e.type == "cell/type"] + assert len(type_errors) == 2 + assert any(e.rowNumber == 2 and e.cell == "invalid json" for e in type_errors) + assert any(e.rowNumber == 3 and e.cell == "{broken}" for e in type_errors) + + def test_empty_strings_error(self): + table = pl.DataFrame( + { + "data": [ + '{"type":"Point","coordinates":[0,0]}', + "", + '{"type":"Feature","geometry":null,"properties":{}}', + ], + } + ).lazy() + column = GeojsonColumn( + name="data", + type="geojson", + property=GeojsonColumnProperty(), + ) + + errors = inspect_geojson_column(column, table) + + assert len(errors) == 1 + assert errors[0].type == "cell/type" + assert errors[0].rowNumber == 2 + assert errors[0].cell == "" + + def test_json_primitives_error(self): + table = pl.DataFrame( + { + "data": ['"string"', "123", "true", "false", "null"], + } + ).lazy() + column = GeojsonColumn( + name="data", + type="geojson", + property=GeojsonColumnProperty(), + ) + + errors = inspect_geojson_column(column, table) + + assert len(errors) == 5 + assert errors[0].rowNumber == 1 + assert errors[0].cell == '"string"' + assert errors[1].rowNumber == 2 + assert errors[1].cell == "123" + assert errors[2].rowNumber == 3 + assert errors[2].cell == "true" + assert errors[3].rowNumber == 4 + assert errors[3].cell == "false" + assert errors[4].rowNumber == 5 + assert errors[4].cell == "null" + + def test_invalid_geojson_point_coordinates(self): + table = pl.DataFrame( + { + "location": [ + '{"type":"Point","coordinates":[0,0]}', + '{"type":"Point","coordinates":[0]}', + '{"type":"Point","coordinates":[0,0,0,0]}', + ], + } + ).lazy() + column = GeojsonColumn( + name="location", + type="geojson", + property=GeojsonColumnProperty(), + ) + + errors = inspect_geojson_column(column, table) + + assert len(errors) == 2 + assert any( + e.rowNumber == 2 and e.cell == '{"type":"Point","coordinates":[0]}' + for e in errors + ) + assert any( + e.rowNumber == 3 and e.cell == '{"type":"Point","coordinates":[0,0,0,0]}' + for e in errors + ) + + def test_invalid_geojson_linestring(self): + table = pl.DataFrame( + { + "line": [ + '{"type":"LineString","coordinates":[[0,0],[1,1]]}', + '{"type":"LineString","coordinates":[[0,0]]}', + '{"type":"LineString","coordinates":[0,0]}', + ], + } + ).lazy() + column = GeojsonColumn( + name="line", + type="geojson", + property=GeojsonColumnProperty(), + ) + + errors = inspect_geojson_column(column, table) + + assert len(errors) == 2 + assert any( + e.rowNumber == 2 and e.cell == '{"type":"LineString","coordinates":[[0,0]]}' + for e in errors + ) + assert any( + e.rowNumber == 3 and e.cell == '{"type":"LineString","coordinates":[0,0]}' + for e in errors + ) + + def test_incomplete_geojson_feature(self): + table = pl.DataFrame( + { + "feature": [ + '{"type":"Feature","geometry":{"type":"Point","coordinates":[0,0]},"properties":{}}', + '{"type":"Feature","geometry":{"type":"Point","coordinates":[0,0]}}', + '{"type":"Feature","properties":{}}', + ], + } + ).lazy() + column = GeojsonColumn( + name="feature", + type="geojson", + property=GeojsonColumnProperty(), + ) + + errors = inspect_geojson_column(column, table) + + assert len(errors) == 2 + assert any( + e.rowNumber == 2 + and e.cell + == '{"type":"Feature","geometry":{"type":"Point","coordinates":[0,0]}}' + for e in errors + ) + assert any( + e.rowNumber == 3 and e.cell == '{"type":"Feature","properties":{}}' + for e in errors + ) + + def test_invalid_geojson_feature_collection(self): + table = pl.DataFrame( + { + "collection": [ + '{"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Point","coordinates":[0,0]},"properties":{}}]}', + '{"type":"FeatureCollection"}', + ], + } + ).lazy() + column = GeojsonColumn( + name="collection", + type="geojson", + property=GeojsonColumnProperty(), + ) + + errors = inspect_geojson_column(column, table) + + assert len(errors) == 1 + assert errors[0].type == "cell/type" + assert errors[0].rowNumber == 2 + assert errors[0].cell == '{"type":"FeatureCollection"}' diff --git a/table/fairspec_table/actions/column/types/hex.py b/table/fairspec_table/actions/column/types/hex.py new file mode 100644 index 0000000..85ce5fa --- /dev/null +++ b/table/fairspec_table/actions/column/types/hex.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_metadata import HexColumn + +from fairspec_table.settings import HEX_REGEX + + +def parse_hex_column(column: HexColumn, column_expr: pl.Expr) -> pl.Expr: + return ( + pl.when(column_expr.str.contains(HEX_REGEX)) + .then(column_expr) + .otherwise(pl.lit(None)) + .alias(column.name) + ) diff --git a/table/fairspec_table/actions/column/types/hex_spec.py b/table/fairspec_table/actions/column/types/hex_spec.py new file mode 100644 index 0000000..d36f760 --- /dev/null +++ b/table/fairspec_table/actions/column/types/hex_spec.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +import polars as pl +import pytest + +from fairspec_metadata import HexColumn, HexColumnProperty + +from .hex import parse_hex_column + + +COLUMN = HexColumn( + name="name", + type="hex", + property=HexColumnProperty(), +) + + +class TestParseHexColumn: + @pytest.mark.parametrize( + "cell, expected", + [ + ("0123456789abcdef", "0123456789abcdef"), + ("ABCDEF", "ABCDEF"), + ("deadbeef", "deadbeef"), + ("ghijkl", None), + ("0x1234", None), + ("hello world", None), + ], + ) + def test_parse(self, cell: str, expected: str | None): + table = pl.DataFrame({"name": [cell]}).lazy() + result = table.select(parse_hex_column(COLUMN, pl.col("name"))) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] diff --git a/table/fairspec_table/actions/column/types/integer.py b/table/fairspec_table/actions/column/types/integer.py new file mode 100644 index 0000000..1da84f0 --- /dev/null +++ b/table/fairspec_table/actions/column/types/integer.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +import re + +import polars as pl + +from fairspec_metadata import IntegerColumn + + +def parse_integer_column(column: IntegerColumn, column_expr: pl.Expr) -> pl.Expr: + group_char = column.property.groupChar + with_text = column.property.withText + + if with_text: + column_expr = column_expr.str.replace_all(r"^[^\d\-]+", "") + column_expr = column_expr.str.replace_all(r"[^\d\-]+$", "") + + if group_char: + escaped_group_char = re.escape(group_char) + column_expr = column_expr.str.replace_all(escaped_group_char, "") + + column_expr = column_expr.cast(pl.Int64, strict=False) + return column_expr + + +def stringify_integer_column(_column: IntegerColumn, column_expr: pl.Expr) -> pl.Expr: + column_expr = column_expr.cast(pl.String) + return column_expr diff --git a/table/fairspec_table/actions/column/types/integer_spec.py b/table/fairspec_table/actions/column/types/integer_spec.py new file mode 100644 index 0000000..1a55112 --- /dev/null +++ b/table/fairspec_table/actions/column/types/integer_spec.py @@ -0,0 +1,160 @@ +from __future__ import annotations + +import polars as pl +import pytest + +from fairspec_metadata import IntegerColumn, IntegerColumnProperty + +from .integer import parse_integer_column, stringify_integer_column + + +class TestParseIntegerColumn: + @pytest.mark.parametrize( + "cell, expected", + [ + ("1", 1), + ("2", 2), + ("1000", 1000), + ("0", 0), + ("00", 0), + ("01", 1), + ("000835", 835), + ("", None), + ("2.1", None), + ("bad", None), + ("0.0003", None), + ("3.14", None), + ("1/2", None), + ], + ) + def test_default(self, cell: str, expected: int | None): + table = pl.DataFrame({"name": [cell]}).lazy() + column = IntegerColumn( + name="name", + type="integer", + property=IntegerColumnProperty(), + ) + + result = table.select(parse_integer_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("1", 1), + ("1,000", 1000), + ("1,000,000", 1000000), + ("000,001", 1), + ], + ) + def test_group_char_comma(self, cell: str, expected: int): + table = pl.DataFrame({"name": [cell]}).lazy() + column = IntegerColumn( + name="name", + type="integer", + property=IntegerColumnProperty(groupChar=","), + ) + + result = table.select(parse_integer_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, group_char, expected", + [ + ("1 000", " ", 1000), + ("1'000'000", "'", 1000000), + ("1.000.000", ".", 1000000), + ], + ) + def test_group_char_other(self, cell: str, group_char: str, expected: int): + table = pl.DataFrame({"name": [cell]}).lazy() + column = IntegerColumn( + name="name", + type="integer", + property=IntegerColumnProperty(groupChar=group_char), + ) + + result = table.select(parse_integer_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("1", 1), + ("1000", 1000), + ("$1000", 1000), + ("1000$", 1000), + ("\u20ac1000", 1000), + ("1000\u20ac", 1000), + ("-12\u20ac", -12), + ("\u20ac-12", -12), + ("1,000", None), + ], + ) + def test_with_text(self, cell: str, expected: int | None): + table = pl.DataFrame({"name": [cell]}).lazy() + column = IntegerColumn( + name="name", + type="integer", + property=IntegerColumnProperty(withText=True), + ) + + result = table.select(parse_integer_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("$1,000,000", 1000000), + ("1,000,000$", 1000000), + ], + ) + def test_with_text_and_group_char(self, cell: str, expected: int): + table = pl.DataFrame({"name": [cell]}).lazy() + column = IntegerColumn( + name="name", + type="integer", + property=IntegerColumnProperty(groupChar=",", withText=True), + ) + + result = table.select(parse_integer_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + +class TestStringifyIntegerColumn: + @pytest.mark.parametrize( + "value, expected", + [ + (1, "1"), + (2, "2"), + (1000, "1000"), + (42, "42"), + (-1, "-1"), + (-100, "-100"), + (0, "0"), + (1234567890, "1234567890"), + (-1234567890, "-1234567890"), + ], + ) + def test_default(self, value: int, expected: str): + table = pl.DataFrame({"name": pl.Series([value], dtype=pl.Int64)}).lazy() + column = IntegerColumn( + name="name", + type="integer", + property=IntegerColumnProperty(), + ) + + result = table.select(stringify_integer_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] diff --git a/table/fairspec_table/actions/column/types/list.py b/table/fairspec_table/actions/column/types/list.py new file mode 100644 index 0000000..f334e58 --- /dev/null +++ b/table/fairspec_table/actions/column/types/list.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_metadata import ListColumn + + +def parse_list_column(column: ListColumn, column_expr: pl.Expr) -> pl.Expr: + delimiter = column.property.delimiter or "," + item_type = column.property.itemType + + dtype = _get_item_dtype(item_type) + + column_expr = column_expr.str.split(delimiter) + + if dtype != pl.String: + column_expr = column_expr.cast(pl.List(dtype), strict=False) + + return column_expr + + +def stringify_list_column(column: ListColumn, column_expr: pl.Expr) -> pl.Expr: + delimiter = column.property.delimiter or "," + + return column_expr.cast(pl.List(pl.String)).list.join(delimiter, ignore_nulls=True) + + +ITEM_TYPE_MAP: dict[str, type[pl.DataType]] = { + "integer": pl.Int64, + "number": pl.Float64, + "boolean": pl.Boolean, + "date-time": pl.Datetime, + "date": pl.Date, + "time": pl.Time, +} + + +def _get_item_dtype(item_type: str | None) -> type[pl.DataType]: + if item_type and item_type in ITEM_TYPE_MAP: + return ITEM_TYPE_MAP[item_type] + return pl.String diff --git a/table/fairspec_table/actions/column/types/list_spec.py b/table/fairspec_table/actions/column/types/list_spec.py new file mode 100644 index 0000000..312f6c1 --- /dev/null +++ b/table/fairspec_table/actions/column/types/list_spec.py @@ -0,0 +1,229 @@ +from __future__ import annotations + +import polars as pl +import pytest + +from fairspec_metadata import ListColumn, ListColumnProperty + + +from .list import parse_list_column, stringify_list_column + + +class TestParseListColumn: + @pytest.mark.parametrize( + "cell, expected", + [ + ("a,b,c", ["a", "b", "c"]), + ("1,2,3", ["1", "2", "3"]), + ("foo,bar,baz", ["foo", "bar", "baz"]), + ("single", ["single"]), + ("a,,c", ["a", "", "c"]), + (",b,", ["", "b", ""]), + (",,,", ["", "", "", ""]), + ], + ) + def test_default(self, cell: str, expected: list[str]): + table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() + column = ListColumn( + name="name", + type="list", + property=ListColumnProperty(), + ) + + result = table.select(parse_list_column(column, pl.col("name")).alias("name")) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + actual = frame.to_dicts()[0]["name"] + + assert actual == expected + + @pytest.mark.parametrize( + "cell, expected", + [ + ("1,2,3", [1, 2, 3]), + ("0,-1,42", [0, -1, 42]), + ("-10,0,10", [-10, 0, 10]), + ("42", [42]), + ("1,,3", [1, None, 3]), + (",2,", [None, 2, None]), + ("1,a,3", [1, None, 3]), + ("1.5,2,3", [None, 2, 3]), + ], + ) + def test_items_integer(self, cell: str, expected: list[int | None]): + table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() + column = ListColumn( + name="name", + type="list", + property=ListColumnProperty(itemType="integer"), # ty: ignore[invalid-argument-type] https://github.com/astral-sh/ty/issues/2403 + ) + + result = table.select(parse_list_column(column, pl.col("name")).alias("name")) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + actual = frame.to_dicts()[0]["name"] + + assert actual == expected + + @pytest.mark.parametrize( + "cell, expected", + [ + ("1.5,2.1,3.7", [1.5, 2.1, 3.7]), + ("0,-1.1,42", [0.0, -1.1, 42.0]), + ("-10.5,0,10", [-10.5, 0.0, 10.0]), + ("3.14", [3.14]), + ("1.1,,3.3", [1.1, None, 3.3]), + (",2.2,", [None, 2.2, None]), + ("1.1,a,3.3", [1.1, None, 3.3]), + ], + ) + def test_items_number(self, cell: str, expected: list[float | None]): + table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() + column = ListColumn( + name="name", + type="list", + property=ListColumnProperty(itemType="number"), # ty: ignore[invalid-argument-type] https://github.com/astral-sh/ty/issues/2403 + ) + + result = table.select(parse_list_column(column, pl.col("name")).alias("name")) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + actual = frame.to_dicts()[0]["name"] + + assert actual == expected + + @pytest.mark.parametrize( + "cell, expected", + [ + ("a;b;c", ["a", "b", "c"]), + ("1;2;3", ["1", "2", "3"]), + ("single", ["single"]), + ("a;;c", ["a", "", "c"]), + ], + ) + def test_delimiter_semicolon(self, cell: str, expected: list[str]): + table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() + column = ListColumn( + name="name", + type="list", + property=ListColumnProperty(delimiter=";"), + ) + + result = table.select(parse_list_column(column, pl.col("name")).alias("name")) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + actual = frame.to_dicts()[0]["name"] + + assert actual == expected + + +class TestStringifyListColumn: + @pytest.mark.parametrize( + "value, expected", + [ + (["a", "b", "c"], "a,b,c"), + (["foo", "bar", "baz"], "foo,bar,baz"), + (["1", "2", "3"], "1,2,3"), + (["single"], "single"), + (["a", "", "c"], "a,,c"), + (["", "b", ""], ",b,"), + (["", "", "", ""], ",,,"), + ([None, "b", None], "b"), + (["a", None, "c"], "a,c"), + ([], ""), + ], + ) + def test_default(self, value: list[str | None], expected: str): + table = pl.DataFrame([pl.Series("name", [value], pl.List(pl.String))]).lazy() + column = ListColumn( + name="name", + type="list", + property=ListColumnProperty(), + ) + + result = table.select( + stringify_list_column(column, pl.col("name")).alias("name") + ) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + actual = frame.to_dicts()[0]["name"] + + assert actual == expected + + @pytest.mark.parametrize( + "value, expected", + [ + ([1, 2, 3], "1,2,3"), + ([0, -1, 42], "0,-1,42"), + ([-10, 0, 10], "-10,0,10"), + ([42], "42"), + ([1, None, 3], "1,3"), + ([None, 2, None], "2"), + ([], ""), + ], + ) + def test_items_integer(self, value: list[int | None], expected: str): + table = pl.DataFrame([pl.Series("name", [value], pl.List(pl.Int16))]).lazy() + column = ListColumn( + name="name", + type="list", + property=ListColumnProperty(itemType="integer"), # ty: ignore[invalid-argument-type] https://github.com/astral-sh/ty/issues/2403 + ) + + result = table.select( + stringify_list_column(column, pl.col("name")).alias("name") + ) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + actual = frame.to_dicts()[0]["name"] + + assert actual == expected + + @pytest.mark.parametrize( + "value, expected", + [ + ([1.5, 2.1, 3.7], "1.5,2.1,3.7"), + ([0.0, -1.1, 42.0], "0.0,-1.1,42.0"), + ([-10.5, 0.0, 10.0], "-10.5,0.0,10.0"), + ([3.14], "3.14"), + ([1.1, None, 3.3], "1.1,3.3"), + ([None, 2.2, None], "2.2"), + ([], ""), + ], + ) + def test_items_number(self, value: list[float | None], expected: str): + table = pl.DataFrame([pl.Series("name", [value], pl.List(pl.Float64))]).lazy() + column = ListColumn( + name="name", + type="list", + property=ListColumnProperty(itemType="number"), # ty: ignore[invalid-argument-type] https://github.com/astral-sh/ty/issues/2403 + ) + + result = table.select( + stringify_list_column(column, pl.col("name")).alias("name") + ) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + actual = frame.to_dicts()[0]["name"] + + assert actual == expected + + @pytest.mark.parametrize( + "value, expected", + [ + (["a", "b", "c"], "a;b;c"), + (["1", "2", "3"], "1;2;3"), + (["single"], "single"), + (["a", "", "c"], "a;;c"), + (["", "b", ""], ";b;"), + ([], ""), + ], + ) + def test_delimiter_semicolon(self, value: list[str], expected: str): + table = pl.DataFrame([pl.Series("name", [value], pl.List(pl.String))]).lazy() + column = ListColumn( + name="name", + type="list", + property=ListColumnProperty(delimiter=";"), + ) + + result = table.select( + stringify_list_column(column, pl.col("name")).alias("name") + ) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + actual = frame.to_dicts()[0]["name"] + + assert actual == expected diff --git a/table/fairspec_table/actions/column/types/number.py b/table/fairspec_table/actions/column/types/number.py new file mode 100644 index 0000000..8fef693 --- /dev/null +++ b/table/fairspec_table/actions/column/types/number.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +import re + +import polars as pl + +from fairspec_metadata import NumberColumn + + +def parse_number_column(column: NumberColumn, column_expr: pl.Expr) -> pl.Expr: + decimal_char = column.property.decimalChar or "." + group_char = column.property.groupChar or "" + with_text = column.property.withText + + if group_char == "." and decimal_char == ",": + column_expr = column_expr.str.replace_all(",", "###DECIMAL###") + column_expr = column_expr.str.replace_all(r"\.", "") + column_expr = column_expr.str.replace_all("###DECIMAL###", ".") + else: + if group_char: + escaped_group_char = re.escape(group_char) + column_expr = column_expr.str.replace_all(escaped_group_char, "") + + if decimal_char and decimal_char != ".": + column_expr = column_expr.str.replace_all(re.escape(decimal_char), ".") + + if with_text: + column_expr = column_expr.str.replace_all(r"[^\d\-.e]", "") + + column_expr = column_expr.cast(pl.Float64, strict=False) + return column_expr + + +def stringify_number_column(_column: NumberColumn, column_expr: pl.Expr) -> pl.Expr: + column_expr = column_expr.cast(pl.String) + return column_expr diff --git a/table/fairspec_table/actions/column/types/number_spec.py b/table/fairspec_table/actions/column/types/number_spec.py new file mode 100644 index 0000000..a6ed4d2 --- /dev/null +++ b/table/fairspec_table/actions/column/types/number_spec.py @@ -0,0 +1,200 @@ +from __future__ import annotations + +import polars as pl +import pytest + +from fairspec_metadata import NumberColumn, NumberColumnProperty + +from .number import parse_number_column, stringify_number_column + + +class TestParseNumberColumn: + @pytest.mark.parametrize( + "cell, expected", + [ + ("1", 1.0), + ("2", 2.0), + ("1000", 1000.0), + ("1.5", 1.5), + ("4.14159", 4.14159), + ("-42", -42.0), + ("-3.14", -3.14), + ("", None), + ("bad", None), + ("text", None), + ], + ) + def test_default(self, cell: str, expected: float | None): + table = pl.DataFrame({"name": [cell]}).lazy() + column = NumberColumn( + name="name", + type="number", + property=NumberColumnProperty(), + ) + + result = table.select(parse_number_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("1", 1.0), + ("1,000", 1000.0), + ("1,000,000", 1000000.0), + ("1,234.56", 1234.56), + ], + ) + def test_group_char_comma(self, cell: str, expected: float): + table = pl.DataFrame({"name": [cell]}).lazy() + column = NumberColumn( + name="name", + type="number", + property=NumberColumnProperty(groupChar=","), + ) + + result = table.select(parse_number_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("1,5", 1.5), + ("3,14", 3.14), + ], + ) + def test_decimal_char_comma(self, cell: str, expected: float): + table = pl.DataFrame({"name": [cell]}).lazy() + column = NumberColumn( + name="name", + type="number", + property=NumberColumnProperty(decimalChar=","), + ) + + result = table.select(parse_number_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("1.234,56", 1234.56), + ("1.000,00", 1000.0), + ], + ) + def test_group_char_dot_decimal_char_comma(self, cell: str, expected: float): + table = pl.DataFrame({"name": [cell]}).lazy() + column = NumberColumn( + name="name", + type="number", + property=NumberColumnProperty(groupChar=".", decimalChar=","), + ) + + result = table.select(parse_number_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("$1.5", 1.5), + ("1.5%", 1.5), + ("\u20ac1000", 1000.0), + ("1000\u20ac", 1000.0), + ], + ) + def test_with_text(self, cell: str, expected: float): + table = pl.DataFrame({"name": [cell]}).lazy() + column = NumberColumn( + name="name", + type="number", + property=NumberColumnProperty(withText=True), + ) + + result = table.select(parse_number_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("$1,000.00", 1000.0), + ("1,234.56$", 1234.56), + ], + ) + def test_with_text_and_group_char(self, cell: str, expected: float): + table = pl.DataFrame({"name": [cell]}).lazy() + column = NumberColumn( + name="name", + type="number", + property=NumberColumnProperty(withText=True, groupChar=","), + ) + + result = table.select(parse_number_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("\u20ac 1.000,00", 1000.0), + ("1.000,00 \u20ac", 1000.0), + ("1.234,56 \u20ac", 1234.56), + ], + ) + def test_with_text_group_char_dot_decimal_char_comma( + self, cell: str, expected: float + ): + table = pl.DataFrame({"name": [cell]}).lazy() + column = NumberColumn( + name="name", + type="number", + property=NumberColumnProperty( + withText=True, groupChar=".", decimalChar="," + ), + ) + + result = table.select(parse_number_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + +class TestStringifyNumberColumn: + @pytest.mark.parametrize( + "value, expected", + [ + (1.0, "1.0"), + (2.0, "2.0"), + (1000.0, "1000.0"), + (3.14, "3.14"), + (42.5, "42.5"), + (-1.0, "-1.0"), + (-100.5, "-100.5"), + (0.0, "0.0"), + (-123.456789, "-123.456789"), + (1234567890.123, "1234567890.123"), + (-9876543210.987, "-9876543210.987"), + (0.001, "0.001"), + (-0.0001, "-0.0001"), + ], + ) + def test_default(self, value: float, expected: str): + table = pl.DataFrame({"name": pl.Series([value], dtype=pl.Float64)}).lazy() + column = NumberColumn( + name="name", + type="number", + property=NumberColumnProperty(), + ) + + result = table.select(stringify_number_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] diff --git a/table/fairspec_table/actions/column/types/object.py b/table/fairspec_table/actions/column/types/object.py new file mode 100644 index 0000000..af21816 --- /dev/null +++ b/table/fairspec_table/actions/column/types/object.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +import json + +from fairspec_metadata import ( + CellJsonError, + CellTypeError, + ColumnType, + ObjectColumn, + inspect_json, +) + +from fairspec_table.models.table import Table +from fairspec_table.settings import NUMBER_COLUMN_NAME + + +def inspect_object_column( + column: ObjectColumn, table: Table +) -> list[CellTypeError | CellJsonError]: + errors: list[CellTypeError | CellJsonError] = [] + + import polars as pl + + frame: pl.DataFrame = ( # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + table.with_row_index(NUMBER_COLUMN_NAME, 1) + .select(pl.col(NUMBER_COLUMN_NAME), pl.col(column.name).alias("source")) + .collect() + ) + + constraint_json_schema = column.property.model_dump( + exclude_none=True, by_alias=True + ) + + for row in frame.to_dicts(): + if row["source"] is None: + continue + + target = None + try: + target = json.loads(row["source"]) + except (json.JSONDecodeError, TypeError): + pass + + if target is None or not isinstance(target, dict): + errors.append( + CellTypeError( + type="cell/type", + cell=str(row["source"]), + columnName=column.name, + columnType=ColumnType(column.type), + rowNumber=row[NUMBER_COLUMN_NAME], + ) + ) + continue + + if constraint_json_schema: + constraint_errors = inspect_json(target, json_schema=constraint_json_schema) + for error in constraint_errors: + errors.append( + CellJsonError( + type="cell/json", + cell=str(row["source"]), + columnName=column.name, + rowNumber=row[NUMBER_COLUMN_NAME], + message=error["message"], + jsonPointer=error["jsonPointer"], + ) + ) + + return errors diff --git a/table/fairspec_table/actions/column/types/object_spec.py b/table/fairspec_table/actions/column/types/object_spec.py new file mode 100644 index 0000000..f552c1f --- /dev/null +++ b/table/fairspec_table/actions/column/types/object_spec.py @@ -0,0 +1,301 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_metadata import ObjectColumn, ObjectColumnProperty +from fairspec_metadata import CellJsonError, CellTypeError + +from .object import inspect_object_column + + +class TestInspectObjectColumn: + def test_valid_json_objects(self): + table = pl.DataFrame( + { + "metadata": [ + '{"key":"value"}', + '{"num":123}', + '{"arr":[1,2,3]}', + ], + } + ).lazy() + column = ObjectColumn( + name="metadata", + type="object", + property=ObjectColumnProperty(), + ) + + errors = inspect_object_column(column, table) + + assert len(errors) == 0 + + def test_json_arrays_error(self): + table = pl.DataFrame( + { + "data": ["[1,2,3]", '{"key":"value"}', '["a","b","c"]'], + } + ).lazy() + column = ObjectColumn( + name="data", + type="object", + property=ObjectColumnProperty(), + ) + + errors = inspect_object_column(column, table) + + assert len(errors) == 2 + assert isinstance(errors[0], CellTypeError) + assert errors[0].columnName == "data" + assert errors[0].columnType == "object" + assert errors[0].rowNumber == 1 + assert errors[0].cell == "[1,2,3]" + assert errors[1].rowNumber == 3 + assert errors[1].cell == '["a","b","c"]' + + def test_null_values(self): + table = pl.DataFrame( + { + "config": ['{"key":"value"}', None, '{"num":123}'], + } + ).lazy() + column = ObjectColumn( + name="config", + type="object", + property=ObjectColumnProperty(), + ) + + errors = inspect_object_column(column, table) + + assert len(errors) == 0 + + def test_invalid_json_error(self): + table = pl.DataFrame( + { + "data": [ + '{"valid":true}', + "invalid json", + '{"key":"value"}', + "{broken}", + ], + } + ).lazy() + column = ObjectColumn( + name="data", + type="object", + property=ObjectColumnProperty(), + ) + + errors = inspect_object_column(column, table) + + type_errors = [e for e in errors if e.type == "cell/type"] + assert len(type_errors) == 2 + assert any(e.rowNumber == 2 and e.cell == "invalid json" for e in type_errors) + assert any(e.rowNumber == 4 and e.cell == "{broken}" for e in type_errors) + + def test_complex_nested_json(self): + table = pl.DataFrame( + { + "complex": [ + '{"user":{"name":"John","age":30,"tags":["admin","user"]}}', + '{"nested":{"deep":{"value":true}}}', + '{"array":[{"id":1},{"id":2}]}', + ], + } + ).lazy() + column = ObjectColumn( + name="complex", + type="object", + property=ObjectColumnProperty(), + ) + + errors = inspect_object_column(column, table) + + assert len(errors) == 0 + + def test_empty_strings_error(self): + table = pl.DataFrame( + { + "data": ['{"valid":true}', "", '{"key":"value"}'], + } + ).lazy() + column = ObjectColumn( + name="data", + type="object", + property=ObjectColumnProperty(), + ) + + errors = inspect_object_column(column, table) + + assert len(errors) == 1 + assert errors[0].type == "cell/type" + assert errors[0].rowNumber == 2 + assert errors[0].cell == "" + + def test_json_primitives_error(self): + table = pl.DataFrame( + { + "data": ['"string"', "123", "true", "false", "null"], + } + ).lazy() + column = ObjectColumn( + name="data", + type="object", + property=ObjectColumnProperty(), + ) + + errors = inspect_object_column(column, table) + + assert len(errors) == 5 + assert errors[0].rowNumber == 1 + assert errors[0].cell == '"string"' + assert errors[1].rowNumber == 2 + assert errors[1].cell == "123" + assert errors[2].rowNumber == 3 + assert errors[2].cell == "true" + assert errors[3].rowNumber == 4 + assert errors[3].cell == "false" + assert errors[4].rowNumber == 5 + assert errors[4].cell == "null" + + def test_valid_objects_matching_json_schema(self): + table = pl.DataFrame( + { + "user": [ + '{"name":"John","age":30}', + '{"name":"Jane","age":25}', + '{"name":"Bob","age":35}', + ], + } + ).lazy() + column = ObjectColumn( + name="user", + type="object", + property=ObjectColumnProperty( + properties={ + "name": {"type": "string"}, + "age": {"type": "number"}, + }, + required=["name", "age"], + ), + ) + + errors = inspect_object_column(column, table) + + assert len(errors) == 0 + + def test_objects_not_matching_json_schema(self): + table = pl.DataFrame( + { + "user": [ + '{"name":"John","age":30}', + '{"name":"Jane"}', + '{"age":25}', + '{"name":"Bob","age":"invalid"}', + ], + } + ).lazy() + column = ObjectColumn( + name="user", + type="object", + property=ObjectColumnProperty( + properties={ + "name": {"type": "string"}, + "age": {"type": "number"}, + }, + required=["name", "age"], + ), + ) + + errors = inspect_object_column(column, table) + + json_errors = [e for e in errors if isinstance(e, CellJsonError)] + assert len(json_errors) == 3 + assert json_errors[0].rowNumber == 2 + assert json_errors[0].cell == '{"name":"Jane"}' + assert json_errors[0].message == "'age' is a required property" + assert json_errors[0].jsonPointer == "/" + assert json_errors[1].rowNumber == 3 + assert json_errors[1].cell == '{"age":25}' + assert json_errors[1].message == "'name' is a required property" + assert json_errors[1].jsonPointer == "/" + assert json_errors[2].rowNumber == 4 + assert json_errors[2].cell == '{"name":"Bob","age":"invalid"}' + assert json_errors[2].message == "'invalid' is not of type 'number'" + assert json_errors[2].jsonPointer == "/age" + + def test_complex_json_schema_with_nested_objects(self): + table = pl.DataFrame( + { + "config": [ + '{"database":{"host":"localhost","port":5432},"cache":{"enabled":true}}', + '{"database":{"host":"localhost","port":"invalid"},"cache":{"enabled":true}}', + ], + } + ).lazy() + column = ObjectColumn( + name="config", + type="object", + property=ObjectColumnProperty( + properties={ + "database": { + "type": "object", + "properties": { + "host": {"type": "string"}, + "port": {"type": "number"}, + }, + "required": ["host", "port"], + }, + "cache": { + "type": "object", + "properties": { + "enabled": {"type": "boolean"}, + }, + "required": ["enabled"], + }, + }, + required=["database", "cache"], + ), + ) + + errors = inspect_object_column(column, table) + + assert len(errors) == 1 + assert isinstance(errors[0], CellJsonError) + assert errors[0].rowNumber == 2 + assert errors[0].message == "'invalid' is not of type 'number'" + assert errors[0].jsonPointer == "/database/port" + + def test_json_schema_with_array_properties(self): + table = pl.DataFrame( + { + "data": [ + '{"items":[1,2,3],"name":"test"}', + '{"items":["not","numbers"],"name":"test"}', + ], + } + ).lazy() + column = ObjectColumn( + name="data", + type="object", + property=ObjectColumnProperty( + properties={ + "items": { + "type": "array", + "items": {"type": "number"}, + }, + "name": {"type": "string"}, + }, + required=["items", "name"], + ), + ) + + errors = inspect_object_column(column, table) + + json_errors = [e for e in errors if isinstance(e, CellJsonError)] + assert len(json_errors) == 2 + assert json_errors[0].rowNumber == 2 + assert json_errors[0].message == "'not' is not of type 'number'" + assert json_errors[0].jsonPointer == "/items/0" + assert json_errors[1].rowNumber == 2 + assert json_errors[1].message == "'numbers' is not of type 'number'" + assert json_errors[1].jsonPointer == "/items/1" diff --git a/table/fairspec_table/actions/column/types/time.py b/table/fairspec_table/actions/column/types/time.py new file mode 100644 index 0000000..fb57309 --- /dev/null +++ b/table/fairspec_table/actions/column/types/time.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_metadata import TimeColumn + +DEFAULT_FORMAT = "%H:%M:%S" + + +def parse_time_column(column: TimeColumn, column_expr: pl.Expr) -> pl.Expr: + fmt = column.property.temporalFormat or DEFAULT_FORMAT + return ( + pl.concat_str([pl.lit("1970-01-01T"), column_expr]) + .str.strptime(pl.Datetime, f"%Y-%m-%dT{fmt}", strict=False) + .cast(pl.Time) + .alias(column.name) + ) + + +def stringify_time_column(column: TimeColumn, column_expr: pl.Expr) -> pl.Expr: + fmt = column.property.temporalFormat or DEFAULT_FORMAT + return column_expr.dt.strftime(fmt) diff --git a/table/fairspec_table/actions/column/types/time_spec.py b/table/fairspec_table/actions/column/types/time_spec.py new file mode 100644 index 0000000..d4b73bb --- /dev/null +++ b/table/fairspec_table/actions/column/types/time_spec.py @@ -0,0 +1,100 @@ +from __future__ import annotations + +import datetime + +import polars as pl +import pytest + +from fairspec_metadata import TimeColumn, TimeColumnProperty + +from .time import parse_time_column, stringify_time_column + + +class TestParseTimeColumn: + @pytest.mark.parametrize( + "cell, expected", + [ + ("06:00:00", datetime.time(6, 0, 0)), + ("09:00", None), + ("3 am", None), + ("3.00", None), + ("invalid", None), + ("", None), + ], + ) + def test_default(self, cell: str, expected: datetime.time | None): + table = pl.DataFrame({"name": [cell]}).lazy() + column = TimeColumn( + name="name", + type="time", + property=TimeColumnProperty(), + ) + + result = table.select(parse_time_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "cell, expected", + [ + ("06:00", datetime.time(6, 0, 0)), + ("06:50", datetime.time(6, 50, 0)), + ("invalid", None), + ("", None), + ], + ) + def test_temporal_format_hm(self, cell: str, expected: datetime.time | None): + table = pl.DataFrame({"name": [cell]}).lazy() + column = TimeColumn( + name="name", + type="time", + property=TimeColumnProperty(temporalFormat="%H:%M"), + ) + + result = table.select(parse_time_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + +class TestStringifyTimeColumn: + @pytest.mark.parametrize( + "value, expected", + [ + (datetime.time(6, 0, 0), "06:00:00"), + (datetime.time(16, 30, 0), "16:30:00"), + ], + ) + def test_default(self, value: datetime.time, expected: str): + table = pl.DataFrame({"name": pl.Series([value], dtype=pl.Time)}).lazy() + column = TimeColumn( + name="name", + type="time", + property=TimeColumnProperty(), + ) + + result = table.select(stringify_time_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] + + @pytest.mark.parametrize( + "value, expected", + [ + (datetime.time(6, 0, 0), "06:00"), + (datetime.time(16, 30, 0), "16:30"), + ], + ) + def test_temporal_format_hm(self, value: datetime.time, expected: str): + table = pl.DataFrame({"name": pl.Series([value], dtype=pl.Time)}).lazy() + column = TimeColumn( + name="name", + type="time", + property=TimeColumnProperty(temporalFormat="%H:%M"), + ) + + result = table.select(stringify_time_column(column, pl.col("name"))) + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] diff --git a/table/fairspec_table/actions/column/types/topojson.py b/table/fairspec_table/actions/column/types/topojson.py new file mode 100644 index 0000000..c22775d --- /dev/null +++ b/table/fairspec_table/actions/column/types/topojson.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +import json +from importlib.resources import files + +from fairspec_metadata import CellTypeError, ColumnType, TopojsonColumn, inspect_json + +from fairspec_table.models.table import Table +from fairspec_table.settings import NUMBER_COLUMN_NAME + + +def inspect_topojson_column( + column: TopojsonColumn, table: Table +) -> list[CellTypeError]: + errors: list[CellTypeError] = [] + + import polars as pl + + frame: pl.DataFrame = ( # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + table.with_row_index(NUMBER_COLUMN_NAME, 1) + .select(pl.col(NUMBER_COLUMN_NAME), pl.col(column.name).alias("source")) + .collect() + ) + + type_json_schema = _load_topojson_schema() + + for row in frame.to_dicts(): + if row["source"] is None: + continue + + target = None + try: + target = json.loads(row["source"]) + except (json.JSONDecodeError, TypeError): + pass + + if target is None or not isinstance(target, dict): + errors.append( + CellTypeError( + type="cell/type", + cell=str(row["source"]), + columnName=column.name, + columnType=ColumnType(column.type), + rowNumber=row[NUMBER_COLUMN_NAME], + ) + ) + continue + + format_errors = inspect_json(target, json_schema=type_json_schema) + if format_errors: + errors.append( + CellTypeError( + type="cell/type", + cell=str(row["source"]), + columnName=column.name, + columnType=ColumnType(column.type), + rowNumber=row[NUMBER_COLUMN_NAME], + ) + ) + + return errors + + +def _load_topojson_schema() -> dict[str, object]: + schema_file = files("fairspec_table.schemas").joinpath("topojson.json") + return json.loads(schema_file.read_text(encoding="utf-8")) diff --git a/table/fairspec_table/actions/column/types/topojson_spec.py b/table/fairspec_table/actions/column/types/topojson_spec.py new file mode 100644 index 0000000..b61c3c8 --- /dev/null +++ b/table/fairspec_table/actions/column/types/topojson_spec.py @@ -0,0 +1,92 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_metadata import TopojsonColumn, TopojsonColumnProperty + +from .topojson import inspect_topojson_column + + +class TestInspectTopojsonColumn: + def test_valid_topojson(self): + table = pl.DataFrame( + { + "topology": [ + '{"type":"Topology","objects":{"example":{"type":"GeometryCollection","geometries":[{"type":"Point","coordinates":[0,0]}]}},"arcs":[]}', + '{"type":"Topology","objects":{"collection":{"type":"GeometryCollection","geometries":[]}},"arcs":[]}', + ], + } + ).lazy() + column = TopojsonColumn( + name="topology", + type="topojson", + property=TopojsonColumnProperty(), + ) + + errors = inspect_topojson_column(column, table) + + assert len(errors) == 0 + + def test_invalid_topojson_structure(self): + table = pl.DataFrame( + { + "topology": [ + '{"type":"Topology","objects":{"example":{"type":"GeometryCollection","geometries":[]}},"arcs":[]}', + '{"type":"Topology","objects":{}}', + '{"type":"Topology"}', + ], + } + ).lazy() + column = TopojsonColumn( + name="topology", + type="topojson", + property=TopojsonColumnProperty(), + ) + + errors = inspect_topojson_column(column, table) + + assert len(errors) == 2 + assert any( + e.rowNumber == 2 and e.cell == '{"type":"Topology","objects":{}}' + for e in errors + ) + assert any(e.rowNumber == 3 and e.cell == '{"type":"Topology"}' for e in errors) + + def test_topojson_geometry_objects(self): + table = pl.DataFrame( + { + "geometry": [ + '{"type":"Point","coordinates":[0,0]}', + '{"type":"LineString","arcs":[0,1]}', + '{"type":"Polygon","arcs":[[0,1,2]]}', + ], + } + ).lazy() + column = TopojsonColumn( + name="geometry", + type="topojson", + property=TopojsonColumnProperty(), + ) + + errors = inspect_topojson_column(column, table) + + assert len(errors) == 0 + + def test_null_values(self): + table = pl.DataFrame( + { + "topology": [ + '{"type":"Topology","objects":{"example":{"type":"GeometryCollection","geometries":[]}},"arcs":[]}', + None, + ], + } + ).lazy() + column = TopojsonColumn( + name="topology", + type="topojson", + property=TopojsonColumnProperty(), + ) + + errors = inspect_topojson_column(column, table) + + assert len(errors) == 0 diff --git a/table/fairspec_table/actions/column/types/unknown.py b/table/fairspec_table/actions/column/types/unknown.py new file mode 100644 index 0000000..22dd788 --- /dev/null +++ b/table/fairspec_table/actions/column/types/unknown.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_metadata import UnknownColumn + + +def stringify_unknown_column(_column: UnknownColumn, column_expr: pl.Expr) -> pl.Expr: + return column_expr.cast(pl.String) diff --git a/table/fairspec_table/actions/column/types/unknown_spec.py b/table/fairspec_table/actions/column/types/unknown_spec.py new file mode 100644 index 0000000..87393df --- /dev/null +++ b/table/fairspec_table/actions/column/types/unknown_spec.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +import polars as pl +import pytest + +from fairspec_metadata import UnknownColumn, UnknownColumnProperty + +from .unknown import stringify_unknown_column + + +COLUMN = UnknownColumn( + name="name", + type="unknown", + property=UnknownColumnProperty(), +) + + +class TestStringifyUnknownColumn: + @pytest.mark.parametrize( + "value, expected", + [ + (1.0, "1.0"), + (3.14, "3.14"), + (True, "true"), + (False, "false"), + ("text", "text"), + ], + ) + def test_stringify(self, value: object, expected: str): + table = pl.DataFrame({"name": [value]}).lazy() + result = table.select(stringify_unknown_column(COLUMN, pl.col("name"))) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] diff --git a/table/fairspec_table/actions/column/types/url.py b/table/fairspec_table/actions/column/types/url.py new file mode 100644 index 0000000..379162f --- /dev/null +++ b/table/fairspec_table/actions/column/types/url.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_metadata import UrlColumn + +from fairspec_table.settings import URL_REGEX + + +def parse_url_column(column: UrlColumn, column_expr: pl.Expr) -> pl.Expr: + return ( + pl.when(column_expr.str.contains(URL_REGEX)) + .then(column_expr) + .otherwise(pl.lit(None)) + .alias(column.name) + ) diff --git a/table/fairspec_table/actions/column/types/url_spec.py b/table/fairspec_table/actions/column/types/url_spec.py new file mode 100644 index 0000000..6b82431 --- /dev/null +++ b/table/fairspec_table/actions/column/types/url_spec.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +import polars as pl +import pytest + +from fairspec_metadata import UrlColumn, UrlColumnProperty + +from .url import parse_url_column + + +COLUMN = UrlColumn( + name="name", + type="url", + property=UrlColumnProperty(), +) + + +class TestParseUrlColumn: + @pytest.mark.parametrize( + "cell, expected", + [ + ("https://example.com", "https://example.com"), + ("http://example.com", "http://example.com"), + ("https://example.com/path?query=1", "https://example.com/path?query=1"), + ("", None), + ("example.com", None), + ("ftp://example.com", None), + ("not a url", None), + ], + ) + def test_parse(self, cell: str, expected: str | None): + table = pl.DataFrame({"name": [cell]}).lazy() + result = table.select(parse_url_column(COLUMN, pl.col("name"))) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": expected}] diff --git a/table/fairspec_table/actions/column/types/wkb.py b/table/fairspec_table/actions/column/types/wkb.py new file mode 100644 index 0000000..bcd4e73 --- /dev/null +++ b/table/fairspec_table/actions/column/types/wkb.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +import shapely + +from fairspec_metadata import CellTypeError, ColumnType, WkbColumn + +from fairspec_table.models.table import Table +from fairspec_table.settings import NUMBER_COLUMN_NAME + + +def inspect_wkb_column(column: WkbColumn, table: Table) -> list[CellTypeError]: + errors: list[CellTypeError] = [] + + import polars as pl + + frame: pl.DataFrame = ( # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + table.with_row_index(NUMBER_COLUMN_NAME, 1) + .select(pl.col(NUMBER_COLUMN_NAME), pl.col(column.name).alias("source")) + .collect() + ) + + for row in frame.to_dicts(): + if row["source"] is None: + continue + + target = None + try: + raw = bytes.fromhex(row["source"]) + target = shapely.from_wkb(raw) + except Exception: + pass + + if target is None: + errors.append( + CellTypeError( + type="cell/type", + cell=str(row["source"]), + columnName=column.name, + columnType=ColumnType(column.type), + rowNumber=row[NUMBER_COLUMN_NAME], + ) + ) + + return errors diff --git a/table/fairspec_table/actions/column/types/wkb_spec.py b/table/fairspec_table/actions/column/types/wkb_spec.py new file mode 100644 index 0000000..924cf83 --- /dev/null +++ b/table/fairspec_table/actions/column/types/wkb_spec.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +import polars as pl +import pytest + +from fairspec_metadata import WkbColumn, WkbColumnProperty + +from .wkb import inspect_wkb_column + + +class TestInspectWkbColumn: + @pytest.mark.parametrize( + "cell, valid", + [ + ("0101000000000000000000f03f0000000000000040", True), + ("ghijkl", False), + ("0x1234", False), + ("hello world", False), + ], + ) + def test_wkb_validation(self, cell: str, valid: bool): + table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() + column = WkbColumn( + name="name", + type="wkb", + property=WkbColumnProperty(), + ) + + errors = inspect_wkb_column(column, table) + + assert (len(errors) == 0) == valid diff --git a/table/fairspec_table/actions/column/types/wkt.py b/table/fairspec_table/actions/column/types/wkt.py new file mode 100644 index 0000000..a0a80d5 --- /dev/null +++ b/table/fairspec_table/actions/column/types/wkt.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +import shapely + +from fairspec_metadata import CellTypeError, ColumnType, WktColumn + +from fairspec_table.models.table import Table +from fairspec_table.settings import NUMBER_COLUMN_NAME + + +def inspect_wkt_column(column: WktColumn, table: Table) -> list[CellTypeError]: + errors: list[CellTypeError] = [] + + import polars as pl + + frame: pl.DataFrame = ( # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + table.with_row_index(NUMBER_COLUMN_NAME, 1) + .select(pl.col(NUMBER_COLUMN_NAME), pl.col(column.name).alias("source")) + .collect() + ) + + for row in frame.to_dicts(): + if row["source"] is None: + continue + + target = None + try: + target = shapely.from_wkt(row["source"]) + except Exception: + pass + + if target is None: + errors.append( + CellTypeError( + type="cell/type", + cell=str(row["source"]), + columnName=column.name, + columnType=ColumnType(column.type), + rowNumber=row[NUMBER_COLUMN_NAME], + ) + ) + + return errors diff --git a/table/fairspec_table/actions/column/types/wkt_spec.py b/table/fairspec_table/actions/column/types/wkt_spec.py new file mode 100644 index 0000000..4f1a1ae --- /dev/null +++ b/table/fairspec_table/actions/column/types/wkt_spec.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +import polars as pl +import pytest + +from fairspec_metadata import WktColumn, WktColumnProperty + +from .wkt import inspect_wkt_column + + +class TestInspectWktColumn: + @pytest.mark.parametrize( + "cell, valid", + [ + ("POINT (0 0)", True), + ("MULTIPOINT ((0 0), (1 1))", True), + ("ghijkl", False), + ("0x1234", False), + ("hello world", False), + ], + ) + def test_wkt_validation(self, cell: str, valid: bool): + table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() + column = WktColumn( + name="name", + type="wkt", + property=WktColumnProperty(), + ) + + errors = inspect_wkt_column(column, table) + + assert (len(errors) == 0) == valid diff --git a/table/fairspec_table/actions/data/__init__.py b/table/fairspec_table/actions/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/actions/data/file_dialect.py b/table/fairspec_table/actions/data/file_dialect.py new file mode 100644 index 0000000..05c753c --- /dev/null +++ b/table/fairspec_table/actions/data/file_dialect.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +from fairspec_table.helpers import get_header_rows +from fairspec_table.models import ( + DataRecord, + DataRow, + FileDialectWithHeaderAndCommentRows, +) + + +def get_records_from_rows( + rows: list[DataRow], + file_dialect: FileDialectWithHeaderAndCommentRows | None = None, +) -> list[DataRecord]: + records: list[DataRecord] = [] + + header = _get_header_from_rows(rows, file_dialect) + content = _get_content_from_rows(rows, file_dialect) + + labels = _get_labels_from_header(header, file_dialect) + if labels is None: + return records + + for row in content: + if _get_is_commented_row(row, file_dialect): + continue + + records.append( + { + labels[index]: row[index] + for index in range(len(labels)) + if index < len(row) + } + ) + + return records + + +def _get_header_from_rows( + rows: list[DataRow], + file_dialect: FileDialectWithHeaderAndCommentRows | None = None, +) -> list[DataRow]: + if file_dialect is not None and file_dialect.columnNames is not None: + return [list(file_dialect.columnNames)] + + header_rows = get_header_rows(file_dialect) + + if not header_rows: + length = max((len(row) for row in rows), default=0) + labels = [f"column{idx + 1}" for idx in range(length)] + return [labels] + + header: list[DataRow] = [] + for number in header_rows: + if number - 1 < len(rows): + header.append(rows[number - 1]) + + return header + + +def _get_content_from_rows( + rows: list[DataRow], + file_dialect: FileDialectWithHeaderAndCommentRows | None = None, +) -> list[DataRow]: + header_rows = get_header_rows(file_dialect) + comment_rows = ( + file_dialect.commentRows + if file_dialect is not None and file_dialect.commentRows is not None + else [] + ) + skip_rows = header_rows[0] - 1 if header_rows else 0 + + content: list[DataRow] = [] + for index, row in enumerate(rows): + number = index + 1 + + if number <= skip_rows: + continue + + if number in header_rows: + continue + + if number in comment_rows: + continue + + if _get_is_commented_row(row, file_dialect): + continue + + content.append(row) + + return content + + +def _get_labels_from_header( + header: list[DataRow], + file_dialect: FileDialectWithHeaderAndCommentRows | None = None, +) -> list[str] | None: + if not header: + return None + + labels = [str(v) for v in header[0]] + header_join = ( + file_dialect.headerJoin + if file_dialect is not None and file_dialect.headerJoin is not None + else " " + ) + + for row in header[1:]: + for index, label in enumerate(row): + prefix = labels[index] if index < len(labels) else "" + labels[index] = header_join.join( + str(part) for part in [prefix, label] if part + ) + + return labels + + +def _get_is_commented_row( + row: list[object], + file_dialect: FileDialectWithHeaderAndCommentRows | None = None, +) -> bool: + if file_dialect is None or file_dialect.commentPrefix is None: + return False + + if not row or not isinstance(row[0], str): + return False + + return row[0].startswith(file_dialect.commentPrefix) diff --git a/table/fairspec_table/actions/data/file_dialect_spec.py b/table/fairspec_table/actions/data/file_dialect_spec.py new file mode 100644 index 0000000..48104e7 --- /dev/null +++ b/table/fairspec_table/actions/data/file_dialect_spec.py @@ -0,0 +1,407 @@ +from __future__ import annotations + +from fairspec_metadata import CsvFileDialect + +from .file_dialect import get_records_from_rows + + +class TestGetRecordsFromRows: + def test_convert_rows_to_records_with_default_header(self): + rows = [ + ["name", "age", "city"], + ["Alice", 30, "NYC"], + ["Bob", 25, "LA"], + ] + + result = get_records_from_rows(rows) + + assert result == [ + {"name": "Alice", "age": 30, "city": "NYC"}, + {"name": "Bob", "age": 25, "city": "LA"}, + ] + + def test_single_row_with_header(self): + rows: list[list[object]] = [["name", "age", "city"]] + + result = get_records_from_rows(rows) + + assert result == [] + + def test_empty_rows(self): + rows: list[list[object]] = [] + + result = get_records_from_rows(rows) + + assert result == [] + + def test_rows_without_header_when_header_is_false(self): + rows = [ + ["Alice", 30, "NYC"], + ["Bob", 25, "LA"], + ] + + result = get_records_from_rows( + rows, + CsvFileDialect(headerRows=False), + ) + + assert result == [ + {"column1": "Alice", "column2": 30, "column3": "NYC"}, + {"column1": "Bob", "column2": 25, "column3": "LA"}, + ] + + def test_custom_header_rows(self): + rows = [ + ["skip this row"], + ["name", "age", "city"], + ["Alice", 30, "NYC"], + ["Bob", 25, "LA"], + ] + + result = get_records_from_rows( + rows, + CsvFileDialect(headerRows=[2]), + ) + + assert result == [ + {"name": "Alice", "age": 30, "city": "NYC"}, + {"name": "Bob", "age": 25, "city": "LA"}, + ] + + def test_multiple_header_rows_with_default_join(self): + rows = [ + ["first", "last", "contact"], + ["name", "name", "email"], + ["Alice", "Smith", "alice@example.com"], + ["Bob", "Jones", "bob@example.com"], + ] + + result = get_records_from_rows( + rows, + CsvFileDialect(headerRows=[1, 2]), + ) + + assert result == [ + { + "first name": "Alice", + "last name": "Smith", + "contact email": "alice@example.com", + }, + { + "first name": "Bob", + "last name": "Jones", + "contact email": "bob@example.com", + }, + ] + + def test_multiple_header_rows_with_custom_join(self): + rows = [ + ["user", "user", "meta"], + ["first", "last", "created"], + ["Alice", "Smith", "2023-01-01"], + ["Bob", "Jones", "2023-01-02"], + ] + + result = get_records_from_rows( + rows, + CsvFileDialect(headerRows=[1, 2], headerJoin="_"), + ) + + assert result == [ + {"user_first": "Alice", "user_last": "Smith", "meta_created": "2023-01-01"}, + {"user_first": "Bob", "user_last": "Jones", "meta_created": "2023-01-02"}, + ] + + def test_skip_comment_rows_by_row_number(self): + rows = [ + ["name", "age", "city"], + ["Alice", 30, "NYC"], + ["# Comment row", "ignored", "data"], + ["Bob", 25, "LA"], + ] + + result = get_records_from_rows( + rows, + CsvFileDialect(commentRows=[3]), + ) + + assert result == [ + {"name": "Alice", "age": 30, "city": "NYC"}, + {"name": "Bob", "age": 25, "city": "LA"}, + ] + + def test_skip_rows_with_comment_character(self): + rows = [ + ["name", "age", "city"], + ["Alice", 30, "NYC"], + ["# Comment", "ignored", "data"], + ["Bob", 25, "LA"], + ["Regular row", "data", "value"], + ] + + result = get_records_from_rows( + rows, + CsvFileDialect(commentPrefix="#"), + ) + + assert result == [ + {"name": "Alice", "age": 30, "city": "NYC"}, + {"name": "Bob", "age": 25, "city": "LA"}, + {"name": "Regular row", "age": "data", "city": "value"}, + ] + + def test_skip_rows_with_multiple_comment_characters(self): + rows = [ + ["name", "age", "city"], + ["Alice", 30, "NYC"], + ["# Comment 1", "ignored", "data"], + ["Bob", 25, "LA"], + ["## Comment 2", "ignored", "data"], + ] + + result = get_records_from_rows( + rows, + CsvFileDialect(commentPrefix="#"), + ) + + assert result == [ + {"name": "Alice", "age": 30, "city": "NYC"}, + {"name": "Bob", "age": 25, "city": "LA"}, + ] + + def test_not_skip_rows_when_first_cell_is_not_string(self): + rows = [ + ["name", "age", "city"], + ["Alice", 30, "NYC"], + [123, "data", "test"], + ["Bob", 25, "LA"], + ] + + result = get_records_from_rows( + rows, + CsvFileDialect(commentPrefix="#"), + ) + + assert result == [ + {"name": "Alice", "age": 30, "city": "NYC"}, + {"name": 123, "age": "data", "city": "test"}, + {"name": "Bob", "age": 25, "city": "LA"}, + ] + + def test_rows_with_different_lengths(self): + rows = [ + ["name", "age", "city", "country"], + ["Alice", 30, "NYC"], + ["Bob", 25, "LA", "USA"], + ["Charlie"], + ] + + result = get_records_from_rows(rows) + + assert result == [ + {"name": "Alice", "age": 30, "city": "NYC"}, + {"name": "Bob", "age": 25, "city": "LA", "country": "USA"}, + {"name": "Charlie"}, + ] + + def test_null_values(self): + rows = [ + ["name", "age", "city"], + ["Alice", None, None], + [None, 25, "LA"], + ] + + result = get_records_from_rows(rows) + + assert result == [ + {"name": "Alice", "age": None, "city": None}, + {"name": None, "age": 25, "city": "LA"}, + ] + + def test_boolean_and_number_types(self): + rows = [ + ["name", "active", "count"], + ["Alice", True, 100], + ["Bob", False, 0], + ] + + result = get_records_from_rows(rows) + + assert result == [ + {"name": "Alice", "active": True, "count": 100}, + {"name": "Bob", "active": False, "count": 0}, + ] + + def test_convert_header_values_to_strings(self): + rows = [ + [1, 2, 3], + ["Alice", 30, "NYC"], + ["Bob", 25, "LA"], + ] + + result = get_records_from_rows(rows) + + assert result == [ + {"1": "Alice", "2": 30, "3": "NYC"}, + {"1": "Bob", "2": 25, "3": "LA"}, + ] + + def test_empty_header_cells(self): + rows = [ + ["name", "", "city"], + ["Alice", 30, "NYC"], + ["Bob", 25, "LA"], + ] + + result = get_records_from_rows(rows) + + assert result == [ + {"name": "Alice", "": 30, "city": "NYC"}, + {"name": "Bob", "": 25, "city": "LA"}, + ] + + def test_multi_row_headers_with_empty_cells(self): + rows = [ + ["person", "", "location"], + ["first", "last", "city"], + ["Alice", "Smith", "NYC"], + ] + + result = get_records_from_rows( + rows, + CsvFileDialect(headerRows=[1, 2]), + ) + + assert result == [ + {"person first": "Alice", "last": "Smith", "location city": "NYC"}, + ] + + def test_combination_of_header_rows_and_comment_rows(self): + rows = [ + ["skip row 1"], + ["name", "age", "city"], + ["# Comment", "data", "data"], + ["Alice", 30, "NYC"], + ["Bob", 25, "LA"], + ] + + result = get_records_from_rows( + rows, + CsvFileDialect(headerRows=[2], commentRows=[3]), + ) + + assert result == [ + {"name": "Alice", "age": 30, "city": "NYC"}, + {"name": "Bob", "age": 25, "city": "LA"}, + ] + + def test_combination_of_comment_rows_and_comment_prefix(self): + rows = [ + ["name", "age", "city"], + ["# Inline comment", "data", "data"], + ["Alice", 30, "NYC"], + ["Comment by row number", "data", "data"], + ["Bob", 25, "LA"], + ] + + result = get_records_from_rows( + rows, + CsvFileDialect(commentRows=[4], commentPrefix="#"), + ) + + assert result == [ + {"name": "Alice", "age": 30, "city": "NYC"}, + {"name": "Bob", "age": 25, "city": "LA"}, + ] + + def test_generate_column_names_based_on_longest_row_when_no_header(self): + rows = [ + ["Alice", 30], + ["Bob", 25, "LA", "USA"], + ["Charlie", 35, "SF"], + ] + + result = get_records_from_rows( + rows, + CsvFileDialect(headerRows=False), + ) + + assert result == [ + {"column1": "Alice", "column2": 30}, + {"column1": "Bob", "column2": 25, "column3": "LA", "column4": "USA"}, + {"column1": "Charlie", "column2": 35, "column3": "SF"}, + ] + + def test_use_column_names_when_provided_with_header_rows_false(self): + rows = [ + ["Alice", 30, "NYC"], + ["Bob", 25, "LA"], + ] + + result = get_records_from_rows( + rows, + CsvFileDialect( + format="csv", + headerRows=False, + columnNames=["name", "age", "city"], + ), + ) + + assert result == [ + {"name": "Alice", "age": 30, "city": "NYC"}, + {"name": "Bob", "age": 25, "city": "LA"}, + ] + + def test_use_column_names_even_when_file_has_headers(self): + rows = [ + ["firstName", "years", "location"], + ["Alice", 30, "NYC"], + ["Bob", 25, "LA"], + ] + + result = get_records_from_rows( + rows, + CsvFileDialect(columnNames=["name", "age", "city"]), + ) + + assert result == [ + {"name": "Alice", "age": 30, "city": "NYC"}, + {"name": "Bob", "age": 25, "city": "LA"}, + ] + + def test_column_names_with_rows_longer_than_column_names_array(self): + rows = [ + ["Alice", 30, "NYC", "USA"], + ["Bob", 25, "LA", "USA"], + ] + + result = get_records_from_rows( + rows, + CsvFileDialect( + format="csv", + headerRows=False, + columnNames=["name", "age"], + ), + ) + + assert result == [ + {"name": "Alice", "age": 30}, + {"name": "Bob", "age": 25}, + ] + + def test_column_names_with_rows_shorter_than_column_names_array(self): + rows: list[list[object]] = [["Alice", 30], ["Bob"]] + + result = get_records_from_rows( + rows, + CsvFileDialect( + format="csv", + headerRows=False, + columnNames=["name", "age", "city", "country"], + ), + ) + + assert result == [ + {"name": "Alice", "age": 30}, + {"name": "Bob"}, + ] diff --git a/table/fairspec_table/actions/table/__init__.py b/table/fairspec_table/actions/table/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/actions/table/checks/__init__.py b/table/fairspec_table/actions/table/checks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/actions/table/checks/key.py b/table/fairspec_table/actions/table/checks/key.py new file mode 100644 index 0000000..b1d3fb1 --- /dev/null +++ b/table/fairspec_table/actions/table/checks/key.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, Literal + +import polars as pl +from fairspec_metadata import RowPrimaryKeyError, RowUniqueKeyError + +if TYPE_CHECKING: + from fairspec_table.models import SchemaMapping + + +@dataclass +class RowKeyCheck: + is_error_expr: pl.Expr + error_template: str + + +def create_row_key_checks(mapping: SchemaMapping) -> list[RowKeyCheck]: + unique_keys = mapping.target.uniqueKeys or [] + primary_key = mapping.target.primaryKey + + checks: list[RowKeyCheck] = [] + + if primary_key: + checks.append(_create_row_key_check(primary_key, key_type="primary")) + + for key in unique_keys: + checks.append(_create_row_key_check(key, key_type="unique")) + + return checks + + +def _create_row_key_check( + key_columns: list[str], + *, + key_type: Literal["primary", "unique"], +) -> RowKeyCheck: + is_error_expr = ( + pl.concat_list(key_columns).is_first_distinct().not_() + & pl.concat_list(key_columns).list.min().is_not_null() + ) + + error_model: RowPrimaryKeyError | RowUniqueKeyError + if key_type == "primary": + error_model = RowPrimaryKeyError( + type="row/primaryKey", columnNames=key_columns, rowNumber=0 + ) + else: + error_model = RowUniqueKeyError( + type="row/uniqueKey", columnNames=key_columns, rowNumber=0 + ) + + return RowKeyCheck( + is_error_expr=is_error_expr, + error_template=error_model.model_dump_json(), + ) diff --git a/table/fairspec_table/actions/table/checks/key_spec.py b/table/fairspec_table/actions/table/checks/key_spec.py new file mode 100644 index 0000000..3e5218e --- /dev/null +++ b/table/fairspec_table/actions/table/checks/key_spec.py @@ -0,0 +1,196 @@ +from __future__ import annotations + +import polars as pl +from fairspec_metadata import IntegerColumnProperty +from fairspec_metadata import StringColumnProperty +from fairspec_metadata import RowPrimaryKeyError, RowUniqueKeyError +from fairspec_metadata import TableSchema + +from fairspec_table.actions.table.inspect import inspect_table + + +class TestInspectTableRowUnique: + def test_should_not_error_when_all_rows_are_unique_for_primary_key(self): + table = pl.DataFrame( + { + "id": [1, 2, 3, 4, 5], + "name": ["Alice", "Bob", "Charlie", "David", "Eve"], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + }, + primaryKey=["id"], + ) + + errors = inspect_table(table, table_schema=table_schema) + + assert len(errors) == 0 + + def test_should_error_for_duplicate_primary_key_rows(self): + table = pl.DataFrame( + { + "id": [1, 2, 3, 2, 5], + "name": ["Alice", "Bob", "Charlie", "Bob2", "Eve"], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + }, + primaryKey=["id"], + ) + + errors = inspect_table(table, table_schema=table_schema) + + pk_errors = [e for e in errors if isinstance(e, RowPrimaryKeyError)] + assert len(pk_errors) == 1 + assert pk_errors[0].rowNumber == 4 + assert pk_errors[0].columnNames == ["id"] + + def test_should_not_error_when_all_rows_are_unique_for_unique_key(self): + table = pl.DataFrame( + { + "id": [1, 2, 3, 4, 5], + "email": [ + "a@test.com", + "b@test.com", + "c@test.com", + "d@test.com", + "e@test.com", + ], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": IntegerColumnProperty(), + "email": StringColumnProperty(), + }, + uniqueKeys=[["email"]], + ) + + errors = inspect_table(table, table_schema=table_schema) + + assert len(errors) == 0 + + def test_should_error_for_duplicate_unique_key_rows(self): + table = pl.DataFrame( + { + "id": [1, 2, 3, 4, 5], + "email": [ + "a@test.com", + "b@test.com", + "a@test.com", + "d@test.com", + "b@test.com", + ], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": IntegerColumnProperty(), + "email": StringColumnProperty(), + }, + uniqueKeys=[["email"]], + ) + + errors = inspect_table(table, table_schema=table_schema) + + uk_errors = [e for e in errors if isinstance(e, RowUniqueKeyError)] + assert len(uk_errors) == 2 + assert uk_errors[0].rowNumber == 3 + assert uk_errors[0].columnNames == ["email"] + assert uk_errors[1].rowNumber == 5 + assert uk_errors[1].columnNames == ["email"] + + def test_should_handle_composite_unique_keys(self): + table = pl.DataFrame( + { + "category": ["A", "A", "B", "A", "B"], + "subcategory": ["X", "Y", "X", "X", "Y"], + "value": [1, 2, 3, 4, 5], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "category": StringColumnProperty(), + "subcategory": StringColumnProperty(), + "value": IntegerColumnProperty(), + }, + uniqueKeys=[["category", "subcategory"]], + ) + + errors = inspect_table(table, table_schema=table_schema) + + uk_errors = [e for e in errors if isinstance(e, RowUniqueKeyError)] + assert len(uk_errors) == 1 + assert uk_errors[0].rowNumber == 4 + assert uk_errors[0].columnNames == ["category", "subcategory"] + + def test_should_handle_both_primary_key_and_unique_keys(self): + table = pl.DataFrame( + { + "id": [1, 2, 3, 2, 5], + "email": [ + "a@test.com", + "b@test.com", + "c@test.com", + "d@test.com", + "a@test.com", + ], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": IntegerColumnProperty(), + "email": StringColumnProperty(), + }, + primaryKey=["id"], + uniqueKeys=[["email"]], + ) + + errors = inspect_table(table, table_schema=table_schema) + + assert len(errors) == 2 + pk_errors = [e for e in errors if isinstance(e, RowPrimaryKeyError)] + uk_errors = [e for e in errors if isinstance(e, RowUniqueKeyError)] + assert len(pk_errors) == 1 + assert pk_errors[0].rowNumber == 4 + assert pk_errors[0].columnNames == ["id"] + assert len(uk_errors) == 1 + assert uk_errors[0].rowNumber == 5 + assert uk_errors[0].columnNames == ["email"] + + def test_should_handle_null_values_in_unique_keys_correctly(self): + table = pl.DataFrame( + { + "id": [1, 2, None, 4, None, 2], + "name": ["Alice", "Bob", "Charlie", "David", "Eve", "Bob"], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": IntegerColumnProperty(type=("integer", "null")), + "name": StringColumnProperty(), + }, + uniqueKeys=[["id"], ["id", "name"]], + ) + + errors = inspect_table(table, table_schema=table_schema) + + uk_errors = [e for e in errors if isinstance(e, RowUniqueKeyError)] + assert len(uk_errors) == 2 + assert uk_errors[0].rowNumber == 6 + assert uk_errors[0].columnNames == ["id"] + assert uk_errors[1].rowNumber == 6 + assert uk_errors[1].columnNames == ["id", "name"] diff --git a/table/fairspec_table/actions/table/denormalize.py b/table/fairspec_table/actions/table/denormalize.py new file mode 100644 index 0000000..1eb0ae5 --- /dev/null +++ b/table/fairspec_table/actions/table/denormalize.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +from typing import Unpack + +import polars as pl +from fairspec_metadata import get_columns +from fairspec_metadata import TableSchema + +from fairspec_table.actions.column.denormalize import denormalize_column +from fairspec_table.helpers.schema import get_polars_schema +from fairspec_table.models import ( + ColumnMapping, + DenormalizeColumnOptions, + SchemaMapping, + Table, +) + +from .helpers import merge_missing_values + +HEAD_ROWS = 100 + + +def denormalize_table( + table: Table, + table_schema: TableSchema, + **options: Unpack[DenormalizeColumnOptions], +) -> Table: + head: pl.DataFrame = table.head(HEAD_ROWS).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + polars_schema = get_polars_schema(dict(head.schema)) + + mapping = SchemaMapping(source=polars_schema, target=table_schema) + return table.select(*denormalize_columns(mapping, **options).values()) + + +def denormalize_columns( + mapping: SchemaMapping, + **options: Unpack[DenormalizeColumnOptions], +) -> dict[str, pl.Expr]: + exprs: dict[str, pl.Expr] = {} + columns = get_columns(mapping.target.model_dump()) + + for column in columns: + expr = pl.lit(None).alias(column.name) + + polars_column = next( + (pc for pc in mapping.source.columns if pc.name == column.name), + None, + ) + + if polars_column: + merged_column = merge_missing_values(column, mapping.target) + column_mapping = ColumnMapping(source=polars_column, target=merged_column) + expr = denormalize_column(column_mapping, **options) + + exprs[column.name] = expr + + return exprs diff --git a/table/fairspec_table/actions/table/file_dialect.py b/table/fairspec_table/actions/table/file_dialect.py new file mode 100644 index 0000000..dd96f8a --- /dev/null +++ b/table/fairspec_table/actions/table/file_dialect.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import polars as pl + +from fairspec_table.helpers.file_dialect import get_header_rows +from fairspec_table.settings import NUMBER_COLUMN_NAME + +if TYPE_CHECKING: + from fairspec_table.models import FileDialectWithHeaderAndCommentRows, Table + + +def join_header_rows( + table: Table, + dialect: FileDialectWithHeaderAndCommentRows, +) -> Table: + header_rows = get_header_rows(dialect) + header_offset = header_rows[0] if header_rows else 0 + header_join = dialect.headerJoin if dialect.headerJoin is not None else " " + if len(header_rows) < 2: + return table + + labels = table.collect_schema().names() + + extra_labels_frame: pl.DataFrame = ( # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + table.with_row_index(NUMBER_COLUMN_NAME, 1) + .filter(pl.col(NUMBER_COLUMN_NAME).add(header_offset).is_in(header_rows)) + .select(*[pl.col(name).str.join(header_join) for name in labels]) + .collect() + ) + extra_labels = extra_labels_frame.row(0) + + mapping = { + label: header_join.join([label, extra_labels[index]]) + for index, label in enumerate(labels) + } + + return ( + table.with_row_index(NUMBER_COLUMN_NAME, 1) + .filter(pl.col(NUMBER_COLUMN_NAME).add(header_offset).is_in(header_rows).not_()) + .rename(mapping) + .drop(NUMBER_COLUMN_NAME) + ) + + +def skip_comment_rows( + table: Table, + dialect: FileDialectWithHeaderAndCommentRows, +) -> Table: + header_rows = get_header_rows(dialect) + comment_offset = header_rows[-1] if header_rows else 0 + if not dialect.commentRows: + return table + + return ( + table.with_row_index(NUMBER_COLUMN_NAME, 1) + .filter( + pl.col(NUMBER_COLUMN_NAME) + .add(comment_offset) + .is_in(dialect.commentRows) + .not_() + ) + .drop(NUMBER_COLUMN_NAME) + ) diff --git a/table/fairspec_table/actions/table/file_dialect_spec.py b/table/fairspec_table/actions/table/file_dialect_spec.py new file mode 100644 index 0000000..1133045 --- /dev/null +++ b/table/fairspec_table/actions/table/file_dialect_spec.py @@ -0,0 +1,256 @@ +from __future__ import annotations + +import polars as pl +from fairspec_metadata import CsvFileDialect + +from .file_dialect import join_header_rows, skip_comment_rows + + +class TestJoinHeaderRows: + def test_should_join_two_header_rows_with_default_space_separator(self): + table = pl.DataFrame( + { + "col1": ["first", "name", "header3", "Alice", "Bob"], + "col2": ["last", "name", "header3", "Smith", "Jones"], + "col3": [ + "contact", + "email", + "header3", + "alice@example.com", + "bob@example.com", + ], + } + ).lazy() + + result = join_header_rows(table, CsvFileDialect(headerRows=[2, 3])) + + collected: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert collected.columns == ["col1 first", "col2 last", "col3 contact"] + assert collected.height == 4 + assert collected.row(0) == ("name", "name", "email") + assert collected.row(1) == ("header3", "header3", "header3") + assert collected.row(2) == ("Alice", "Smith", "alice@example.com") + assert collected.row(3) == ("Bob", "Jones", "bob@example.com") + + def test_should_join_two_header_rows_with_custom_separator(self): + table = pl.DataFrame( + { + "col1": ["user", "first", "header3", "Alice", "Bob"], + "col2": ["user", "last", "header3", "Smith", "Jones"], + "col3": ["meta", "created", "header3", "2023-01-01", "2023-01-02"], + } + ).lazy() + + result = join_header_rows( + table, CsvFileDialect(headerRows=[2, 3], headerJoin="_") + ) + + collected: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert collected.columns == ["col1_user", "col2_user", "col3_meta"] + assert collected.height == 4 + assert collected.row(0) == ("first", "last", "created") + assert collected.row(1) == ("header3", "header3", "header3") + assert collected.row(2) == ("Alice", "Smith", "2023-01-01") + assert collected.row(3) == ("Bob", "Jones", "2023-01-02") + + def test_should_return_table_unchanged_when_only_one_header_row(self): + table = pl.DataFrame( + { + "name": ["Alice", "Bob"], + "age": [30, 25], + "city": ["NYC", "LA"], + } + ).lazy() + + result = join_header_rows(table, CsvFileDialect(headerRows=[1])) + + collected: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert collected.columns == ["name", "age", "city"] + assert collected.height == 2 + + def test_should_return_table_unchanged_when_no_header_rows(self): + table = pl.DataFrame( + { + "field1": ["Alice", "Bob"], + "field2": [30, 25], + "field3": ["NYC", "LA"], + } + ).lazy() + + result = join_header_rows(table, CsvFileDialect()) + + collected: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert collected.columns == ["field1", "field2", "field3"] + assert collected.height == 2 + + def test_should_join_three_header_rows(self): + table = pl.DataFrame( + { + "col1": ["person", "user", "first", "header4", "Alice", "Bob"], + "col2": ["person", "user", "last", "header4", "Smith", "Jones"], + "col3": ["location", "address", "city", "header4", "NYC", "LA"], + } + ).lazy() + + result = join_header_rows(table, CsvFileDialect(headerRows=[2, 3, 4])) + + collected: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert collected.columns == [ + "col1 person user", + "col2 person user", + "col3 location address", + ] + assert collected.height == 4 + assert collected.row(0) == ("first", "last", "city") + assert collected.row(1) == ("header4", "header4", "header4") + assert collected.row(2) == ("Alice", "Smith", "NYC") + assert collected.row(3) == ("Bob", "Jones", "LA") + + def test_should_handle_empty_strings_in_header_rows(self): + table = pl.DataFrame( + { + "col1": ["person", "", "header3", "Alice", "Bob"], + "col2": ["", "name", "header3", "Smith", "Jones"], + "col3": ["location", "city", "header3", "NYC", "LA"], + } + ).lazy() + + result = join_header_rows(table, CsvFileDialect(headerRows=[2, 3])) + + collected: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert collected.columns == ["col1 person", "col2 ", "col3 location"] + assert collected.height == 4 + assert collected.row(0) == ("", "name", "city") + assert collected.row(1) == ("header3", "header3", "header3") + assert collected.row(2) == ("Alice", "Smith", "NYC") + assert collected.row(3) == ("Bob", "Jones", "LA") + + +class TestSkipCommentRows: + def test_should_skip_comment_rows_by_row_number(self): + table = pl.DataFrame( + { + "name": ["Alice", "# Comment", "Bob", "Charlie"], + "age": [30, 0, 25, 35], + "city": ["NYC", "ignored", "LA", "SF"], + } + ).lazy() + + result = skip_comment_rows( + table, CsvFileDialect(commentRows=[2], headerRows=False) + ) + + collected: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert collected.height == 3 + assert collected.row(0) == ("Alice", 30, "NYC") + assert collected.row(1) == ("Bob", 25, "LA") + assert collected.row(2) == ("Charlie", 35, "SF") + + def test_should_skip_multiple_comment_rows(self): + table = pl.DataFrame( + { + "name": ["Alice", "# Comment 1", "Bob", "# Comment 2", "Charlie"], + "age": [30, 0, 25, 0, 35], + "city": ["NYC", "ignored", "LA", "ignored", "SF"], + } + ).lazy() + + result = skip_comment_rows( + table, CsvFileDialect(commentRows=[2, 4], headerRows=False) + ) + + collected: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert collected.height == 3 + assert collected.row(0) == ("Alice", 30, "NYC") + assert collected.row(1) == ("Bob", 25, "LA") + assert collected.row(2) == ("Charlie", 35, "SF") + + def test_should_return_table_unchanged_when_no_comment_rows_specified(self): + table = pl.DataFrame( + { + "name": ["Alice", "Bob", "Charlie"], + "age": [30, 25, 35], + "city": ["NYC", "LA", "SF"], + } + ).lazy() + + result = skip_comment_rows(table, CsvFileDialect()) + + collected: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert collected.height == 3 + assert collected.columns == ["name", "age", "city"] + + def test_should_skip_rows_after_header_when_header_rows_specified(self): + table = pl.DataFrame( + { + "col1": ["name", "Alice", "# Comment", "Bob"], + "col2": ["age", "30", "-1", "25"], + "col3": ["city", "NYC", "ignored", "LA"], + } + ).lazy() + + result = skip_comment_rows( + table, CsvFileDialect(headerRows=[2], commentRows=[5]) + ) + + collected: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert collected.height == 3 + assert collected.row(0) == ("name", "age", "city") + assert collected.row(1) == ("Alice", "30", "NYC") + assert collected.row(2) == ("Bob", "25", "LA") + + def test_should_handle_comment_rows_at_the_beginning(self): + table = pl.DataFrame( + { + "name": ["# Skip this", "Alice", "Bob"], + "age": [0, 30, 25], + "city": ["ignored", "NYC", "LA"], + } + ).lazy() + + result = skip_comment_rows( + table, CsvFileDialect(commentRows=[1], headerRows=False) + ) + + collected: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert collected.height == 2 + assert collected.row(0) == ("Alice", 30, "NYC") + assert collected.row(1) == ("Bob", 25, "LA") + + def test_should_handle_comment_rows_at_the_end(self): + table = pl.DataFrame( + { + "name": ["Alice", "Bob", "# Footer comment"], + "age": [30, 25, 0], + "city": ["NYC", "LA", "ignored"], + } + ).lazy() + + result = skip_comment_rows( + table, CsvFileDialect(commentRows=[3], headerRows=False) + ) + + collected: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert collected.height == 2 + assert collected.row(0) == ("Alice", 30, "NYC") + assert collected.row(1) == ("Bob", 25, "LA") + + def test_should_handle_multiple_header_rows_with_comment_rows(self): + table = pl.DataFrame( + { + "col1": ["person", "first", "Alice", "# Comment", "Bob"], + "col2": ["person", "last", "Smith", "ignored", "Jones"], + "col3": ["location", "city", "NYC", "ignored", "LA"], + } + ).lazy() + + result = skip_comment_rows( + table, CsvFileDialect(headerRows=[2, 3], commentRows=[7]) + ) + + collected: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert collected.height == 4 + assert collected.row(0) == ("person", "person", "location") + assert collected.row(1) == ("first", "last", "city") + assert collected.row(2) == ("Alice", "Smith", "NYC") + assert collected.row(3) == ("Bob", "Jones", "LA") diff --git a/table/fairspec_table/actions/table/helpers.py b/table/fairspec_table/actions/table/helpers.py new file mode 100644 index 0000000..5bbc621 --- /dev/null +++ b/table/fairspec_table/actions/table/helpers.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from typing import cast + +from fairspec_metadata import get_base_property_type +from fairspec_metadata import Column +from fairspec_metadata import TableSchema + + +def merge_missing_values(column: Column, table_schema: TableSchema) -> Column: + if not table_schema.missingValues: + return column + + merged_column = column.model_copy(deep=True) + if merged_column.property.missingValues is None: + merged_column.property.missingValues = [] + + property_type = cast("str | list[str] | None", merged_column.property.type) + + for item in table_schema.missingValues: + if get_base_property_type(property_type) == "string": + value = item.value if hasattr(item, "value") else item + if not isinstance(value, str): + continue + + merged_column.property.missingValues.append(item) # type: ignore[arg-type] + + return merged_column diff --git a/table/fairspec_table/actions/table/helpers_spec.py b/table/fairspec_table/actions/table/helpers_spec.py new file mode 100644 index 0000000..1c46f62 --- /dev/null +++ b/table/fairspec_table/actions/table/helpers_spec.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +from fairspec_metadata import StringColumn, StringColumnProperty +from fairspec_metadata import TableSchema, TableSchemaMissingValueItem + +from .helpers import merge_missing_values + + +class TestMergeMissingValues: + def test_no_missing_values_in_schema(self): + column = StringColumn( + name="name", + type="string", + property=StringColumnProperty(), + ) + table_schema = TableSchema() + + result = merge_missing_values(column, table_schema) + + assert result is column + + def test_merge_string_missing_values(self): + column = StringColumn( + name="name", + type="string", + property=StringColumnProperty(), + ) + table_schema = TableSchema(missingValues=["", "NA", "N/A"]) + + result = merge_missing_values(column, table_schema) + + assert result.property.missingValues == ["", "NA", "N/A"] + assert result is not column + + def test_merge_with_existing_column_missing_values(self): + column = StringColumn( + name="name", + type="string", + property=StringColumnProperty(missingValues=["custom"]), + ) + table_schema = TableSchema(missingValues=["", "NA"]) + + result = merge_missing_values(column, table_schema) + + assert result.property.missingValues == ["custom", "", "NA"] + + def test_does_not_mutate_original_column(self): + column = StringColumn( + name="name", + type="string", + property=StringColumnProperty(missingValues=["original"]), + ) + table_schema = TableSchema(missingValues=["added"]) + + merge_missing_values(column, table_schema) + + assert column.property.missingValues == ["original"] + + def test_skip_integer_missing_values_for_string_columns(self): + column = StringColumn( + name="name", + type="string", + property=StringColumnProperty(), + ) + table_schema = TableSchema(missingValues=["", 99]) + + result = merge_missing_values(column, table_schema) + + assert result.property.missingValues == [""] + + def test_skip_integer_missing_value_item_for_string_columns(self): + column = StringColumn( + name="name", + type="string", + property=StringColumnProperty(), + ) + table_schema = TableSchema( + missingValues=[ + "", + TableSchemaMissingValueItem(value=99, label="missing"), + ] + ) + + result = merge_missing_values(column, table_schema) + + assert result.property.missingValues == [""] diff --git a/table/fairspec_table/actions/table/inspect.py b/table/fairspec_table/actions/table/inspect.py new file mode 100644 index 0000000..4ec1e28 --- /dev/null +++ b/table/fairspec_table/actions/table/inspect.py @@ -0,0 +1,115 @@ +from __future__ import annotations + +import json +import math + +import polars as pl +from fairspec_metadata import TableError, get_columns +from fairspec_metadata import ColumnMissingError +from fairspec_metadata import RowError +from fairspec_metadata import TableSchema +from pydantic import TypeAdapter + +from fairspec_table.actions.column.inspect import inspect_column +from fairspec_table.helpers.schema import get_polars_schema +from fairspec_table.models import ColumnMapping, SchemaMapping, Table +from fairspec_table.settings import ERROR_COLUMN_NAME, NUMBER_COLUMN_NAME + +from .checks.key import create_row_key_checks + + +def inspect_table( + table: Table, + *, + table_schema: TableSchema | None = None, + sample_rows: int = 100, + max_errors: int = 1000, +) -> list[TableError]: + errors: list[TableError] = [] + + if table_schema: + sample: pl.DataFrame = table.head(sample_rows).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + polars_schema = get_polars_schema(dict(sample.schema)) + mapping = SchemaMapping(source=polars_schema, target=table_schema) + + column_errors = _inspect_columns(mapping, table, max_errors=max_errors) + errors.extend(column_errors) + + row_errors = _inspect_rows(mapping, table, max_errors=max_errors) + errors.extend(row_errors) + + return errors[:max_errors] + + +def _inspect_columns( + mapping: SchemaMapping, + table: Table, + *, + max_errors: int, +) -> list[TableError]: + errors: list[TableError] = [] + columns = get_columns(mapping.target.model_dump()) + max_column_errors = math.ceil(max_errors / len(columns)) if columns else max_errors + + for column in columns: + polars_column = next( + (pc for pc in mapping.source.columns if pc.name == column.name), + None, + ) + + if not polars_column: + is_required = mapping.target.allRequired or ( + mapping.target.required and column.name in mapping.target.required + ) + if is_required: + errors.append( + ColumnMissingError(type="column/missing", columnName=column.name) + ) + continue + + column_mapping = ColumnMapping(source=polars_column, target=column) + field_errors = inspect_column( + column_mapping, table, max_errors=max_column_errors + ) + errors.extend(field_errors) + + if len(errors) >= max_errors: + break + + return errors + + +def _inspect_rows( + mapping: SchemaMapping, + table: Table, + *, + max_errors: int, +) -> list[TableError]: + errors: list[TableError] = [] + columns = get_columns(mapping.target.model_dump()) + max_row_errors = math.ceil(max_errors / len(columns)) if columns else max_errors + + for check in create_row_key_checks(mapping): + row_check_table = table.with_row_index(NUMBER_COLUMN_NAME, 1).with_columns( + pl.when(check.is_error_expr) + .then(pl.lit(check.error_template)) + .otherwise(pl.lit(None)) + .alias(ERROR_COLUMN_NAME) + ) + + row_check_frame: pl.DataFrame = ( # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + row_check_table.filter(pl.col(ERROR_COLUMN_NAME).is_not_null()) + .head(max_row_errors) + .collect() + ) + + _row_error_adapter = TypeAdapter(RowError) + for row in row_check_frame.to_dicts(): + error_dict = json.loads(row[ERROR_COLUMN_NAME]) + error_dict["rowNumber"] = row[NUMBER_COLUMN_NAME] + errors.append(_row_error_adapter.validate_python(error_dict)) + + if len(errors) >= max_errors: + break + + return errors diff --git a/table/fairspec_table/actions/table/inspect_spec.py b/table/fairspec_table/actions/table/inspect_spec.py new file mode 100644 index 0000000..efdf486 --- /dev/null +++ b/table/fairspec_table/actions/table/inspect_spec.py @@ -0,0 +1,292 @@ +from __future__ import annotations + +import polars as pl +from fairspec_metadata import NumberColumnProperty +from fairspec_metadata import StringColumnProperty +from fairspec_metadata import ColumnMissingError +from fairspec_metadata import TableSchema + +from .inspect import inspect_table + + +class TestInspectTable: + def test_should_pass_when_columns_exactly_match(self): + table = pl.DataFrame( + { + "id": [1, 2], + "name": ["John", "Jane"], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": NumberColumnProperty(), + "name": StringColumnProperty(), + } + ) + + errors = inspect_table(table, table_schema=table_schema) + + assert errors == [] + + def test_should_not_have_columns_error_when_columns_same_length(self): + table = pl.DataFrame( + { + "id": [1, 2], + "age": [30, 25], + } + ).lazy() + + table_schema = TableSchema( + allRequired=True, + properties={ + "id": NumberColumnProperty(), + "name": NumberColumnProperty(), + }, + ) + + errors = inspect_table(table, table_schema=table_schema) + + assert len(errors) == 1 + assert isinstance(errors[0], ColumnMissingError) + assert errors[0].columnName == "name" + + def test_should_detect_missing_columns(self): + table = pl.DataFrame( + { + "id": [1, 2], + } + ).lazy() + + table_schema = TableSchema( + allRequired=True, + properties={ + "id": NumberColumnProperty(), + "name": StringColumnProperty(), + }, + ) + + errors = inspect_table(table, table_schema=table_schema) + + assert len(errors) == 1 + assert isinstance(errors[0], ColumnMissingError) + assert errors[0].columnName == "name" + + def test_should_pass_when_column_names_match_regardless_of_order(self): + table = pl.DataFrame( + { + "name": ["John", "Jane"], + "id": [1, 2], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": NumberColumnProperty(), + "name": StringColumnProperty(), + } + ) + + errors = inspect_table(table, table_schema=table_schema) + + assert errors == [] + + def test_should_detect_missing_columns_with_required(self): + table = pl.DataFrame( + { + "id": [1, 2], + } + ).lazy() + + table_schema = TableSchema( + required=["name"], + properties={ + "id": NumberColumnProperty(), + "name": StringColumnProperty(), + }, + ) + + errors = inspect_table(table, table_schema=table_schema) + + assert len(errors) == 1 + assert isinstance(errors[0], ColumnMissingError) + assert errors[0].columnName == "name" + + def test_should_pass_when_non_required_columns_are_missing(self): + table = pl.DataFrame( + { + "id": [1, 2], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": NumberColumnProperty(), + "name": StringColumnProperty(), + } + ) + + errors = inspect_table(table, table_schema=table_schema) + + assert errors == [] + + def test_should_pass_when_data_contains_all_schema_columns(self): + table = pl.DataFrame( + { + "id": [1, 2], + "name": ["John", "Jane"], + "age": [30, 25], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": NumberColumnProperty(), + "name": StringColumnProperty(), + } + ) + + errors = inspect_table(table, table_schema=table_schema) + + assert errors == [] + + def test_should_pass_when_data_contains_exact_schema_columns(self): + table = pl.DataFrame( + { + "id": [1, 2], + "name": ["John", "Jane"], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": NumberColumnProperty(), + "name": StringColumnProperty(), + } + ) + + errors = inspect_table(table, table_schema=table_schema) + + assert errors == [] + + def test_should_detect_missing_columns_again(self): + table = pl.DataFrame( + { + "id": [1, 2], + } + ).lazy() + + table_schema = TableSchema( + required=["name"], + properties={ + "id": NumberColumnProperty(), + "name": StringColumnProperty(), + }, + ) + + errors = inspect_table(table, table_schema=table_schema) + + assert len(errors) == 1 + assert isinstance(errors[0], ColumnMissingError) + assert errors[0].columnName == "name" + + def test_should_pass_when_schema_contains_all_data_columns(self): + table = pl.DataFrame( + { + "id": [1, 2], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": NumberColumnProperty(), + "name": StringColumnProperty(), + } + ) + + errors = inspect_table(table, table_schema=table_schema) + + assert errors == [] + + def test_should_pass_when_schema_contains_exact_data_columns(self): + table = pl.DataFrame( + { + "id": [1, 2], + "name": ["John", "Jane"], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": NumberColumnProperty(), + "name": StringColumnProperty(), + } + ) + + errors = inspect_table(table, table_schema=table_schema) + + assert errors == [] + + def test_should_pass_when_at_least_one_column_matches(self): + table = pl.DataFrame( + { + "id": [1, 2], + "age": [30, 25], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": NumberColumnProperty(), + "name": StringColumnProperty(), + } + ) + + errors = inspect_table(table, table_schema=table_schema) + + assert errors == [] + + def test_should_detect_missing_columns_with_all_required(self): + table = pl.DataFrame( + { + "id": [1, 2], + } + ).lazy() + + table_schema = TableSchema( + allRequired=True, + properties={ + "id": NumberColumnProperty(), + "name": StringColumnProperty(), + }, + ) + + errors = inspect_table(table, table_schema=table_schema) + + assert len(errors) == 1 + assert isinstance(errors[0], ColumnMissingError) + assert errors[0].columnName == "name" + + def test_should_detect_when_no_columns_match(self): + table = pl.DataFrame( + { + "age": [30, 25], + "email": ["john@example.com", "jane@example.com"], + } + ).lazy() + + table_schema = TableSchema( + allRequired=True, + properties={ + "id": NumberColumnProperty(), + "name": StringColumnProperty(), + }, + ) + + errors = inspect_table(table, table_schema=table_schema) + + assert len(errors) == 2 + assert isinstance(errors[0], ColumnMissingError) + assert errors[0].columnName == "id" + assert isinstance(errors[1], ColumnMissingError) + assert errors[1].columnName == "name" diff --git a/table/fairspec_table/actions/table/normalize.py b/table/fairspec_table/actions/table/normalize.py new file mode 100644 index 0000000..3c0a67c --- /dev/null +++ b/table/fairspec_table/actions/table/normalize.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +import polars as pl +from fairspec_metadata import get_columns +from fairspec_metadata import TableSchema + +from fairspec_table.actions.column.normalize import normalize_column +from fairspec_table.helpers.schema import get_polars_schema +from fairspec_table.models import ColumnMapping, SchemaMapping, Table + +from .helpers import merge_missing_values + +HEAD_ROWS = 100 + + +def normalize_table(table: Table, table_schema: TableSchema) -> Table: + head: pl.DataFrame = table.head(HEAD_ROWS).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + polars_schema = get_polars_schema(dict(head.schema)) + + mapping = SchemaMapping(source=polars_schema, target=table_schema) + return table.select(*normalize_columns(mapping).values()) + + +def normalize_columns(mapping: SchemaMapping) -> dict[str, pl.Expr]: + exprs: dict[str, pl.Expr] = {} + columns = get_columns(mapping.target.model_dump()) + + for column in columns: + expr = pl.lit(None).alias(column.name) + + polars_column = next( + (pc for pc in mapping.source.columns if pc.name == column.name), + None, + ) + + if polars_column: + merged_column = merge_missing_values(column, mapping.target) + column_mapping = ColumnMapping(source=polars_column, target=merged_column) + expr = normalize_column(column_mapping) + + exprs[column.name] = expr + + return exprs diff --git a/table/fairspec_table/actions/table/normalize_spec.py b/table/fairspec_table/actions/table/normalize_spec.py new file mode 100644 index 0000000..c6925fd --- /dev/null +++ b/table/fairspec_table/actions/table/normalize_spec.py @@ -0,0 +1,252 @@ +from __future__ import annotations + +import polars as pl +from fairspec_metadata import BooleanColumnProperty +from fairspec_metadata import IntegerColumnProperty +from fairspec_metadata import StringColumnProperty +from fairspec_metadata import TableSchema + +from .normalize import normalize_table + + +class TestNormalizeTable: + def test_should_work_with_schema(self): + table = pl.DataFrame( + { + "id": [1, 2], + "name": ["english", "中文"], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ) + + records = [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + result = normalize_table(table, table_schema) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == records + + def test_should_work_with_less_fields_in_data(self): + table = pl.DataFrame( + { + "id": [1, 2], + "name": ["english", "中文"], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + "other": BooleanColumnProperty(), + } + ) + + records = [ + {"id": 1, "name": "english", "other": None}, + {"id": 2, "name": "中文", "other": None}, + ] + + result = normalize_table(table, table_schema) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == records + + def test_should_work_with_more_fields_in_data(self): + table = pl.DataFrame( + { + "id": [1, 2], + "name": ["english", "中文"], + "other": [True, False], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ) + + records = [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + result = normalize_table(table, table_schema) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == records + + def test_should_not_work_based_on_fields_order(self): + table = pl.DataFrame( + { + "field1": [1, 2], + "field2": ["english", "中文"], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ) + + records = [ + {"id": None, "name": None}, + ] + + result = normalize_table(table, table_schema) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == records + + def test_should_work_based_on_field_names_equal(self): + table = pl.DataFrame( + { + "name": ["english", "中文"], + "id": [1, 2], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ) + + records = [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + result = normalize_table(table, table_schema) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == records + + def test_should_work_based_on_field_names_subset(self): + table = pl.DataFrame( + { + "name": ["english", "中文"], + "id": [1, 2], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ) + + records = [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + result = normalize_table(table, table_schema) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == records + + def test_should_work_based_on_field_names_superset(self): + table = pl.DataFrame( + { + "name": ["english", "中文"], + "id": [1, 2], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ) + + records = [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + result = normalize_table(table, table_schema) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == records + + def test_should_work_based_on_field_names_partial(self): + table = pl.DataFrame( + { + "name": ["english", "中文"], + "id": [1, 2], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ) + + records = [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + result = normalize_table(table, table_schema) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == records + + def test_should_parse_string_columns(self): + table = pl.DataFrame( + { + "id": ["1", "2"], + "name": ["english", "中文"], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + } + ) + + records = [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + result = normalize_table(table, table_schema) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == records + + def test_should_read_type_errors_as_nulls(self): + table = pl.DataFrame( + { + "id": [1, 2], + "name": ["english", "中文"], + } + ).lazy() + + table_schema = TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": IntegerColumnProperty(), + } + ) + + records = [ + {"id": 1, "name": None}, + {"id": 2, "name": None}, + ] + + result = normalize_table(table, table_schema) + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == records diff --git a/table/fairspec_table/actions/table/query.py b/table/fairspec_table/actions/table/query.py new file mode 100644 index 0000000..23dd5e2 --- /dev/null +++ b/table/fairspec_table/actions/table/query.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_table.models import Table + + +def query_table(table: Table, query: str) -> Table: + context = pl.SQLContext({"self": table}) + return context.execute(query) diff --git a/table/fairspec_table/actions/table/query_spec.py b/table/fairspec_table/actions/table/query_spec.py new file mode 100644 index 0000000..2f0abb6 --- /dev/null +++ b/table/fairspec_table/actions/table/query_spec.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +import polars as pl + +from .query import query_table + + +class TestQueryTable: + def test_select_all(self): + table = pl.DataFrame({"name": ["Alice", "Bob"], "age": [30, 25]}).lazy() + + result = query_table(table, "SELECT * FROM self") + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [ + {"name": "Alice", "age": 30}, + {"name": "Bob", "age": 25}, + ] + + def test_select_with_filter(self): + table = pl.DataFrame({"name": ["Alice", "Bob"], "age": [30, 25]}).lazy() + + result = query_table(table, "SELECT name FROM self WHERE age > 26") + + frame: pl.DataFrame = result.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + assert frame.to_dicts() == [{"name": "Alice"}] + + def test_returns_lazy_frame(self): + table = pl.DataFrame({"x": [1]}).lazy() + + result = query_table(table, "SELECT * FROM self") + + assert isinstance(result, pl.LazyFrame) diff --git a/table/fairspec_table/actions/table_schema/__init__.py b/table/fairspec_table/actions/table_schema/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/actions/table_schema/infer.py b/table/fairspec_table/actions/table_schema/infer.py new file mode 100644 index 0000000..2d83210 --- /dev/null +++ b/table/fairspec_table/actions/table_schema/infer.py @@ -0,0 +1,564 @@ +from __future__ import annotations + +import math +import re +from typing import TYPE_CHECKING, Unpack + +import polars as pl +from fairspec_metadata import create_column_from_property, get_column_properties +from fairspec_metadata import TableSchema + +from fairspec_table.helpers.schema import get_polars_schema +from fairspec_table.models.schema import InferTableSchemaOptions + +if TYPE_CHECKING: + from fairspec_metadata import Column + from fairspec_metadata import Descriptor + + from fairspec_table.models.table import Table + +DEFAULT_MISSING_VALUES = ["", "NA", "N/A", "null", "-"] + +TYPE_MAPPING: dict[type[pl.DataType], str] = { + pl.Boolean: "boolean", + pl.Categorical: "string", + pl.Date: "date", + pl.Datetime: "date-time", + pl.Decimal: "number", + pl.Float32: "number", + pl.Float64: "number", + pl.Int8: "integer", + pl.Int16: "integer", + pl.Int32: "integer", + pl.Int64: "integer", + pl.UInt8: "integer", + pl.UInt16: "integer", + pl.UInt32: "integer", + pl.UInt64: "integer", + pl.List: "array", + pl.Null: "unknown", + pl.String: "string", + pl.Struct: "object", + pl.Time: "time", +} + + +def infer_table_schema_from_table( + table: Table, + **options: Unpack[InferTableSchemaOptions], +) -> TableSchema: + sample_rows = options.get("sampleRows") or 100 + sample: pl.DataFrame = table.head(sample_rows).collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + return infer_table_schema_from_sample(sample, **options) + + +def infer_table_schema_from_sample( + sample: pl.DataFrame, + **options: Unpack[InferTableSchemaOptions], +) -> TableSchema: + confidence = options.get("confidence") or 0.9 + column_types = options.get("columnTypes") + keep_strings = options.get("keepStrings") + effective_missing_values = ( + options["missingValues"] + if options.get("missingValues") is not None + else DEFAULT_MISSING_VALUES + ) + detected_missing_values: set[str] = set() + + regex_mapping = _create_regex_mapping(**options) + + polars_schema = get_polars_schema(sample.schema) + column_names = options.get("columnNames") or [ + c.name for c in polars_schema.columns + ] + + failure_threshold = (sample.height - math.floor(sample.height * confidence)) or 1 + + columns: list[Column] = [] + for name in column_names: + polars_column = next((c for c in polars_schema.columns if c.name == name), None) + if not polars_column: + raise ValueError(f'Column "{name}" not found in the table') + + col_type = ( + (column_types.get(name) if column_types else None) + or TYPE_MAPPING.get(polars_column.type) + or "unknown" + ) + is_nullable = False + + property_dict: Descriptor = _create_property_dict(col_type) + effective_col_type = col_type + + if not (column_types and name in column_types): + if col_type == "array": + if options.get("arrayType") == "list": + effective_col_type = "list" + property_dict = {"type": "string", "format": "list"} + else: + effective_col_type = "array" + property_dict = {"type": "array"} + + if col_type == "string": + has_polars_nulls = sample.filter(pl.col(name).is_null()).height > 0 + missing_filter: pl.Expr = pl.col(name).is_null() + has_missing_values = False + for mv in effective_missing_values: + if sample.filter(pl.col(name) == pl.lit(mv)).height > 0: + detected_missing_values.add(mv) + has_missing_values = True + missing_filter = missing_filter | (pl.col(name) == pl.lit(mv)) + is_nullable = has_polars_nulls or has_missing_values + + if not keep_strings: + effective_sample = sample.filter(missing_filter.not_()) + if effective_sample.height > 0: + effective_failure_threshold = ( + effective_sample.height + - math.floor(effective_sample.height * confidence) + ) or 1 + for regex, nameless_column in regex_mapping.items(): + failures = ( + effective_sample.filter( + pl.col(name).str.contains(regex).not_() + ) + .head(effective_failure_threshold) + .height + ) + + if failures < effective_failure_threshold: + effective_col_type = nameless_column["type"] + property_dict = dict(nameless_column["property"]) + break + + if col_type == "number": + failures = ( + sample.filter((pl.col(name) == pl.col(name).round(0)).not_()) + .head(failure_threshold) + .height + ) + + if failures < failure_threshold: + effective_col_type = "integer" + property_dict = {"type": "integer"} + + if col_type != "string": + if sample.filter(pl.col(name).is_null()).height > 0: + is_nullable = True + + column = _build_column(name, effective_col_type, property_dict) + + if is_nullable: + _make_property_nullable(column) + _enhance_column(column, **options) + columns.append(column) + + table_schema = TableSchema(properties=get_column_properties(columns)) + + if options.get("missingValues") is None and len( + detected_missing_values + ) > 0: + table_schema.missingValues = list(detected_missing_values) + + _enhance_schema(table_schema, **options) + return table_schema + + +def _create_property_dict(col_type: str) -> Descriptor: + match col_type: + case "boolean": + return {"type": "boolean"} + case "integer": + return {"type": "integer"} + case "number": + return {"type": "number"} + case "string": + return {"type": "string"} + case "date": + return {"type": "string", "format": "date"} + case "date-time": + return {"type": "string", "format": "date-time"} + case "time": + return {"type": "string", "format": "time"} + case "array": + return {"type": "array"} + case "object": + return {"type": "object"} + case _: + return {} + + +def _build_column(name: str, col_type: str, property_dict: Descriptor) -> Column: + return create_column_from_property(name, property_dict) + + +def _escape_regex(value: str) -> str: + return re.escape(value) + + +def _derive_month_first(format: str | None) -> bool | None: + if not format: + return None + m_index = format.find("%m") + d_index = format.find("%d") + if m_index == -1 or d_index == -1: + return None + return m_index < d_index + + +def _create_regex_mapping( + **options: Unpack[InferTableSchemaOptions], +) -> dict[str, Descriptor]: + comma_decimal = options.get("commaDecimal") + month_first = options.get("monthFirst") + true_values = options.get("trueValues") + false_values = options.get("falseValues") + decimal_char = options.get("decimalChar") + group_char = options.get("groupChar") + list_delimiter = options.get("listDelimiter") + date_format = options.get("dateFormat") + time_format = options.get("timeFormat") + datetime_format = options.get("datetimeFormat") + + effective_comma_decimal = ( + comma_decimal + if comma_decimal is not None + else (decimal_char == "," or group_char == ".") + ) + + effective_month_first = ( + month_first + if month_first is not None + else ( + _derive_month_first(date_format) + if _derive_month_first(date_format) is not None + else _derive_month_first(datetime_format) + ) + ) + + all_bool_values = [ + *(true_values or ["true", "True", "TRUE"]), + *(false_values or ["false", "False", "FALSE"]), + ] + bool_regex = f"^({'|'.join(_escape_regex(v) for v in all_bool_values)})$" + + list_esc = _escape_regex(list_delimiter or ",") + + mapping: dict[str, Descriptor] = { + "^\\d+$": {"type": "integer", "property": {"type": "integer"}}, + "^\\d{1,3}(,\\d{3})+$": ( + {"type": "number", "property": {"type": "number"}} + if effective_comma_decimal + else {"type": "integer", "property": {"type": "integer", "groupChar": ","}} + ), + "^\\d+\\.\\d+$": ( + {"type": "integer", "property": {"type": "integer", "groupChar": "."}} + if effective_comma_decimal + else {"type": "number", "property": {"type": "number"}} + ), + "^\\d{1,3}(,\\d{3})+\\.\\d+$": { + "type": "number", + "property": {"type": "number", "groupChar": ","}, + }, + "^\\d{1,3}(\\.\\d{3})+,\\d+$": { + "type": "number", + "property": {"type": "number", "groupChar": ".", "decimalChar": ","}, + }, + "^[\\p{Sc}\\s-]*\\d+[%\\p{Sc}\\s]*$": { + "type": "integer", + "property": {"type": "integer", "withText": True}, + }, + "^[\\p{Sc}\\s-]*\\d{1,3}(,\\d{3})+[%\\p{Sc}\\s]*$": ( + {"type": "number", "property": {"type": "number", "withText": True}} + if effective_comma_decimal + else { + "type": "integer", + "property": {"type": "integer", "groupChar": ",", "withText": True}, + } + ), + "^[\\p{Sc}\\s-]*\\d+\\.\\d+[%\\p{Sc}\\s]*$": ( + { + "type": "integer", + "property": {"type": "integer", "groupChar": ".", "withText": True}, + } + if effective_comma_decimal + else {"type": "number", "property": {"type": "number", "withText": True}} + ), + "^[\\p{Sc}\\s-]*\\d{1,3}(,\\d{3})+\\.\\d+[%\\p{Sc}\\s]*$": { + "type": "number", + "property": {"type": "number", "groupChar": ",", "withText": True}, + }, + "^[\\p{Sc}\\s-]*\\d{1,3}(\\.\\d{3})+,\\d+[%\\p{Sc}\\s]*$": { + "type": "number", + "property": { + "type": "number", + "groupChar": ".", + "decimalChar": ",", + "withText": True, + }, + }, + bool_regex: { + "type": "boolean", + "property": {"type": "boolean"}, + }, + "^\\d{4}-\\d{2}-\\d{2}$": { + "type": "date", + "property": {"type": "string", "format": "date"}, + }, + "^\\d{4}/\\d{2}/\\d{2}$": { + "type": "date", + "property": { + "type": "string", + "format": "date", + "temporalFormat": "%Y/%m/%d", + }, + }, + "^\\d{2}/\\d{2}/\\d{4}$": ( + { + "type": "date", + "property": { + "type": "string", + "format": "date", + "temporalFormat": "%m/%d/%Y", + }, + } + if effective_month_first + else { + "type": "date", + "property": { + "type": "string", + "format": "date", + "temporalFormat": "%d/%m/%Y", + }, + } + ), + "^\\d{2}-\\d{2}-\\d{4}$": ( + { + "type": "date", + "property": { + "type": "string", + "format": "date", + "temporalFormat": "%m-%d-%Y", + }, + } + if effective_month_first + else { + "type": "date", + "property": { + "type": "string", + "format": "date", + "temporalFormat": "%d-%m-%Y", + }, + } + ), + "^\\d{2}\\.\\d{2}\\.\\d{4}$": ( + { + "type": "date", + "property": { + "type": "string", + "format": "date", + "temporalFormat": "%m.%d.%Y", + }, + } + if effective_month_first + else { + "type": "date", + "property": { + "type": "string", + "format": "date", + "temporalFormat": "%d.%m.%Y", + }, + } + ), + "^\\d{2}:\\d{2}:\\d{2}$": { + "type": "time", + "property": {"type": "string", "format": "time"}, + }, + "^\\d{2}:\\d{2}$": { + "type": "time", + "property": {"type": "string", "format": "time", "temporalFormat": "%H:%M"}, + }, + "^\\d{1,2}:\\d{2}:\\d{2}\\s*(am|pm|AM|PM)$": { + "type": "time", + "property": { + "type": "string", + "format": "time", + "temporalFormat": "%I:%M:%S %p", + }, + }, + "^\\d{1,2}:\\d{2}\\s*(am|pm|AM|PM)$": { + "type": "time", + "property": { + "type": "string", + "format": "time", + "temporalFormat": "%I:%M %p", + }, + }, + "^\\d{2}:\\d{2}:\\d{2}[+-]\\d{2}:?\\d{2}$": { + "type": "time", + "property": {"type": "string", "format": "time"}, + }, + "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z?$": { + "type": "date-time", + "property": {"type": "string", "format": "date-time"}, + }, + "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}[+-]\\d{2}:?\\d{2}$": { + "type": "date-time", + "property": {"type": "string", "format": "date-time"}, + }, + "^\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}$": { + "type": "date-time", + "property": { + "type": "string", + "format": "date-time", + "temporalFormat": "%Y-%m-%d %H:%M:%S", + }, + }, + "^\\d{2}/\\d{2}/\\d{4} \\d{2}:\\d{2}$": ( + { + "type": "date-time", + "property": { + "type": "string", + "format": "date-time", + "temporalFormat": "%m/%d/%Y %H:%M", + }, + } + if effective_month_first + else { + "type": "date-time", + "property": { + "type": "string", + "format": "date-time", + "temporalFormat": "%d/%m/%Y %H:%M", + }, + } + ), + "^\\d{2}/\\d{2}/\\d{4} \\d{2}:\\d{2}:\\d{2}$": ( + { + "type": "date-time", + "property": { + "type": "string", + "format": "date-time", + "temporalFormat": "%m/%d/%Y %H:%M:%S", + }, + } + if effective_month_first + else { + "type": "date-time", + "property": { + "type": "string", + "format": "date-time", + "temporalFormat": "%d/%m/%Y %H:%M:%S", + }, + } + ), + "^\\{": {"type": "object", "property": {"type": "object"}}, + "^\\[": {"type": "array", "property": {"type": "array"}}, + f"^\\d+{list_esc}\\d+$": { + "type": "list", + "property": {"type": "string", "format": "list", "itemType": "integer"}, + }, + f"^[\\d.]+{list_esc}[\\d.]+$": { + "type": "list", + "property": {"type": "string", "format": "list", "itemType": "number"}, + }, + "^https?://\\S+$": { + "type": "url", + "property": {"type": "string", "format": "url"}, + }, + "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$": { + "type": "email", + "property": {"type": "string", "format": "email"}, + }, + "^(POINT|LINESTRING|POLYGON|MULTIPOINT|MULTILINESTRING|MULTIPOLYGON|GEOMETRYCOLLECTION)\\s*\\(": { + "type": "wkt", + "property": {"type": "string", "format": "wkt"}, + }, + "^P(\\d+Y)?(\\d+M)?(\\d+W)?(\\d+D)?(T(\\d+H)?(\\d+M)?(\\d+(\\.\\d+)?S)?)?$": { + "type": "duration", + "property": {"type": "string", "format": "duration"}, + }, + "^([0-9a-fA-F]{2}){8,}$": { + "type": "hex", + "property": {"type": "string", "format": "hex"}, + }, + } + + canonical_formats: dict[str, str] = { + "date": "%Y-%m-%d", + "time": "%H:%M:%S", + "date-time": "%Y-%m-%dT%H:%M:%S", + } + + to_delete: list[str] = [] + for regex, col in mapping.items(): + user_format = ( + date_format + if col["type"] == "date" + else ( + time_format + if col["type"] == "time" + else (datetime_format if col["type"] == "date-time" else None) + ) + ) + if user_format is None: + continue + entry_format = ( + col["property"].get("temporalFormat") + if "temporalFormat" in col.get("property", {}) + else canonical_formats.get(col["type"]) + ) + if entry_format != user_format: + to_delete.append(regex) + + for regex in to_delete: + del mapping[regex] + + return mapping + + +def _enhance_column(column: Column, **options: Unpack[InferTableSchemaOptions]) -> None: + if not options: + return + if column.type == "boolean": + if options.get("trueValues") is not None: + column.property.trueValues = options["trueValues"] # type: ignore[union-attr] + if options.get("falseValues") is not None: + column.property.falseValues = options["falseValues"] # type: ignore[union-attr] + elif column.type == "integer": + if options.get("groupChar") is not None: + column.property.groupChar = options["groupChar"] # type: ignore[union-attr] + elif column.type == "number": + if options.get("decimalChar") is not None: + column.property.decimalChar = options["decimalChar"] # type: ignore[union-attr] + if options.get("groupChar") is not None: + column.property.groupChar = options["groupChar"] # type: ignore[union-attr] + elif column.type == "date-time": + if options.get("datetimeFormat") is not None: + column.property.temporalFormat = options["datetimeFormat"] # type: ignore[union-attr] + elif column.type == "date": + if options.get("dateFormat") is not None: + column.property.temporalFormat = options["dateFormat"] # type: ignore[union-attr] + elif column.type == "time": + if options.get("timeFormat") is not None: + column.property.temporalFormat = options["timeFormat"] # type: ignore[union-attr] + elif column.type == "list": + if options.get("listDelimiter") is not None: + column.property.delimiter = options["listDelimiter"] # type: ignore[union-attr] + if options.get("listItemType") is not None: + column.property.itemType = options["listItemType"] # type: ignore[union-attr] + + +def _make_property_nullable(column: Column) -> None: + base_type = column.property.type + if base_type and isinstance(base_type, str): + column.property.type = (base_type, "null") # type: ignore[assignment] + + +def _enhance_schema( + table_schema: TableSchema, + **options: Unpack[InferTableSchemaOptions], +) -> None: + if options.get("missingValues") is not None: + table_schema.missingValues = list(options["missingValues"]) diff --git a/table/fairspec_table/actions/table_schema/infer_spec.py b/table/fairspec_table/actions/table_schema/infer_spec.py new file mode 100644 index 0000000..bbca9ba --- /dev/null +++ b/table/fairspec_table/actions/table_schema/infer_spec.py @@ -0,0 +1,736 @@ +from __future__ import annotations + +import polars as pl + +from .infer import infer_table_schema_from_table + + +class TestInferTableSchemaFromTable: + def test_should_infer_from_native_types(self): + table = pl.DataFrame( + { + "integer": pl.Series("integer", [1, 2], pl.Int32), + "number": [1.1, 2.2], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "integer": {"type": "integer"}, + "number": {"type": "number"}, + }, + } + + def test_should_infer_integers_from_floats(self): + table = pl.DataFrame( + { + "id": [1.0, 2.0, 3.0], + "count": [10.0, 20.0, 30.0], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "id": {"type": "integer"}, + "count": {"type": "integer"}, + }, + } + + def test_should_infer_numeric(self): + table = pl.DataFrame( + { + "name1": ["1", "2", "3"], + "name2": ["1,000", "2,000", "3,000"], + "name3": ["1.1", "2.2", "3.3"], + "name4": ["1,000.1", "2,000.2", "3,000.3"], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "name1": {"type": "integer"}, + "name2": {"type": "integer", "groupChar": ","}, + "name3": {"type": "number"}, + "name4": {"type": "number", "groupChar": ","}, + }, + } + + def test_should_infer_numeric_comma_decimal(self): + table = pl.DataFrame( + { + "name1": ["1.000", "2.000", "3.000"], + "name2": ["1.000,5", "2.000,5", "3.000,5"], + } + ).lazy() + + result = infer_table_schema_from_table(table, commaDecimal=True) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "name1": {"type": "integer", "groupChar": "."}, + "name2": {"type": "number", "decimalChar": ",", "groupChar": "."}, + }, + } + + def test_should_infer_numeric_with_text(self): + table = pl.DataFrame( + { + "integer": ["$10", "$20", "$30"], + "percent": ["10%", "20%", "30%"], + "number": ["$10.50", "$20.75", "$30.99"], + "percentNumber": ["10.5%", "20.75%", "30.99%"], + "integerGroupChar": ["$1,000", "$2,000", "$3,000"], + "numberGroupChar": ["$1,000.50", "$2,000.75", "$3,000.99"], + "european": ["\u20ac1.000,50", "\u20ac2.000,75", "\u20ac3.000,99"], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "integer": {"type": "integer", "withText": True}, + "percent": {"type": "integer", "withText": True}, + "number": {"type": "number", "withText": True}, + "percentNumber": {"type": "number", "withText": True}, + "integerGroupChar": { + "type": "integer", + "groupChar": ",", + "withText": True, + }, + "numberGroupChar": { + "type": "number", + "groupChar": ",", + "withText": True, + }, + "european": { + "type": "number", + "groupChar": ".", + "decimalChar": ",", + "withText": True, + }, + }, + } + + def test_should_not_infer_numeric_with_text_for_non_currency_text(self): + table = pl.DataFrame( + { + "ordinal": ["1st", "2nd", "3rd"], + "unit": ["2d", "5h", "10m"], + "label": ["Level 5", "Level 10", "Level 15"], + "hash": ["#10", "#20", "#30"], + "mixed": ["5x", "10x", "15x"], + "word": ["abc", "def", "ghi"], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "ordinal": {"type": "string"}, + "unit": {"type": "string"}, + "label": {"type": "string"}, + "hash": {"type": "string"}, + "mixed": {"type": "string"}, + "word": {"type": "string"}, + }, + } + + def test_should_infer_booleans(self): + table = pl.DataFrame( + { + "name1": ["true", "True", "TRUE"], + "name2": ["false", "False", "FALSE"], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "name1": {"type": "boolean"}, + "name2": {"type": "boolean"}, + }, + } + + def test_should_infer_objects(self): + table = pl.DataFrame( + { + "name1": ['{"a": 1}'], + "name2": ["{}"], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "name1": {"type": "object"}, + "name2": {"type": "object"}, + }, + } + + def test_should_infer_arrays(self): + table = pl.DataFrame( + { + "name1": ["[1,2,3]"], + "name2": ["[]"], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "name1": {"type": "array"}, + "name2": {"type": "array"}, + }, + } + + def test_should_infer_dates_with_iso_format(self): + table = pl.DataFrame( + { + "name1": ["2023-01-15", "2023-02-20", "2023-03-25"], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "name1": {"type": "string", "format": "date"}, + }, + } + + def test_should_infer_dates_with_slash_format(self): + table = pl.DataFrame( + { + "yearFirst": ["2023/01/15", "2023/02/20", "2023/03/25"], + "dayMonth": ["15/01/2023", "20/02/2023", "25/03/2023"], + "monthDay": ["01/15/2023", "02/20/2023", "03/25/2023"], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "yearFirst": { + "type": "string", + "format": "date", + "temporalFormat": "%Y/%m/%d", + }, + "dayMonth": { + "type": "string", + "format": "date", + "temporalFormat": "%d/%m/%Y", + }, + "monthDay": { + "type": "string", + "format": "date", + "temporalFormat": "%d/%m/%Y", + }, + }, + } + + month_first_result = infer_table_schema_from_table( + table, monthFirst=True + ) + assert month_first_result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "yearFirst": { + "type": "string", + "format": "date", + "temporalFormat": "%Y/%m/%d", + }, + "dayMonth": { + "type": "string", + "format": "date", + "temporalFormat": "%m/%d/%Y", + }, + "monthDay": { + "type": "string", + "format": "date", + "temporalFormat": "%m/%d/%Y", + }, + }, + } + + def test_should_infer_dates_with_hyphen_format(self): + table = pl.DataFrame( + { + "dayMonth": ["15-01-2023", "20-02-2023", "25-03-2023"], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "dayMonth": { + "type": "string", + "format": "date", + "temporalFormat": "%d-%m-%Y", + }, + }, + } + + month_first_result = infer_table_schema_from_table( + table, monthFirst=True + ) + assert month_first_result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "dayMonth": { + "type": "string", + "format": "date", + "temporalFormat": "%m-%d-%Y", + }, + }, + } + + def test_should_infer_times_with_standard_format(self): + table = pl.DataFrame( + { + "fullTime": ["14:30:45", "08:15:30", "23:59:59"], + "shortTime": ["14:30", "08:15", "23:59"], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "fullTime": {"type": "string", "format": "time"}, + "shortTime": { + "type": "string", + "format": "time", + "temporalFormat": "%H:%M", + }, + }, + } + + def test_should_infer_times_with_12_hour_format(self): + table = pl.DataFrame( + { + "fullTime": ["2:30:45 PM", "8:15:30 AM", "11:59:59 PM"], + "shortTime": ["2:30 PM", "8:15 AM", "11:59 PM"], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "fullTime": { + "type": "string", + "format": "time", + "temporalFormat": "%I:%M:%S %p", + }, + "shortTime": { + "type": "string", + "format": "time", + "temporalFormat": "%I:%M %p", + }, + }, + } + + def test_should_infer_times_with_timezone_offset(self): + table = pl.DataFrame( + { + "name": ["14:30:45+01:00", "08:15:30-05:00", "23:59:59+00:00"], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "name": {"type": "string", "format": "time"}, + }, + } + + def test_should_infer_datetimes_with_iso_format(self): + table = pl.DataFrame( + { + "standard": [ + "2023-01-15T14:30:45", + "2023-02-20T08:15:30", + "2023-03-25T23:59:59", + ], + "utc": [ + "2023-01-15T14:30:45Z", + "2023-02-20T08:15:30Z", + "2023-03-25T23:59:59Z", + ], + "withTz": [ + "2023-01-15T14:30:45+01:00", + "2023-02-20T08:15:30-05:00", + "2023-03-25T23:59:59+00:00", + ], + "withSpace": [ + "2023-01-15 14:30:45", + "2023-02-20 08:15:30", + "2023-03-25 23:59:59", + ], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "standard": {"type": "string", "format": "date-time"}, + "utc": {"type": "string", "format": "date-time"}, + "withTz": {"type": "string", "format": "date-time"}, + "withSpace": { + "type": "string", + "format": "date-time", + "temporalFormat": "%Y-%m-%d %H:%M:%S", + }, + }, + } + + def test_should_infer_datetimes_with_custom_formats(self): + table = pl.DataFrame( + { + "shortDayMonth": [ + "15/01/2023 14:30", + "20/02/2023 08:15", + "25/03/2023 23:59", + ], + "fullDayMonth": [ + "15/01/2023 14:30:45", + "20/02/2023 08:15:30", + "25/03/2023 23:59:59", + ], + "shortMonthDay": [ + "01/15/2023 14:30", + "02/20/2023 08:15", + "03/25/2023 23:59", + ], + "fullMonthDay": [ + "01/15/2023 14:30:45", + "02/20/2023 08:15:30", + "03/25/2023 23:59:59", + ], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "shortDayMonth": { + "type": "string", + "format": "date-time", + "temporalFormat": "%d/%m/%Y %H:%M", + }, + "fullDayMonth": { + "type": "string", + "format": "date-time", + "temporalFormat": "%d/%m/%Y %H:%M:%S", + }, + "shortMonthDay": { + "type": "string", + "format": "date-time", + "temporalFormat": "%d/%m/%Y %H:%M", + }, + "fullMonthDay": { + "type": "string", + "format": "date-time", + "temporalFormat": "%d/%m/%Y %H:%M:%S", + }, + }, + } + + month_first_result = infer_table_schema_from_table( + table, monthFirst=True + ) + assert month_first_result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "shortDayMonth": { + "type": "string", + "format": "date-time", + "temporalFormat": "%m/%d/%Y %H:%M", + }, + "fullDayMonth": { + "type": "string", + "format": "date-time", + "temporalFormat": "%m/%d/%Y %H:%M:%S", + }, + "shortMonthDay": { + "type": "string", + "format": "date-time", + "temporalFormat": "%m/%d/%Y %H:%M", + }, + "fullMonthDay": { + "type": "string", + "format": "date-time", + "temporalFormat": "%m/%d/%Y %H:%M:%S", + }, + }, + } + + def test_should_infer_urls(self): + table = pl.DataFrame( + { + "url": ["https://example.com", "http://foo.bar/baz"], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "url": {"type": "string", "format": "url"}, + }, + } + + def test_should_infer_emails(self): + table = pl.DataFrame( + { + "email": ["user@example.com", "test.name+tag@domain.org"], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "email": {"type": "string", "format": "email"}, + }, + } + + def test_should_infer_wkt(self): + table = pl.DataFrame( + { + "geom": ["POINT(1 2)", "LINESTRING(0 0, 1 1)"], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "geom": {"type": "string", "format": "wkt"}, + }, + } + + def test_should_infer_durations(self): + table = pl.DataFrame( + { + "duration": ["P1Y2M3D", "PT1H30M", "P1D"], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "duration": {"type": "string", "format": "duration"}, + }, + } + + def test_should_infer_hex(self): + table = pl.DataFrame( + { + "hex": ["1a2b3c4d5e6f7890", "abcdef0123456789"], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "hex": {"type": "string", "format": "hex"}, + }, + } + + def test_should_not_infer_url_email_wkt_hex_for_similar_text(self): + table = pl.DataFrame( + { + "notUrl": ["ftp://example.com", "ftp://other.com"], + "notEmail": ["user@", "test@"], + "notWkt": ["POINT", "LINESTRING"], + "notHex": ["cafe1234", "deadbeef"], + } + ).lazy() + + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "notUrl": {"type": "string"}, + "notEmail": {"type": "string"}, + "notWkt": {"type": "string"}, + "notHex": {"type": "string"}, + }, + } + + def test_should_infer_lists(self): + table = pl.DataFrame( + { + "numericList": ["1.5,2.3", "4.1,5.9", "7.2,8.6"], + "integerList": ["1,2", "3,4", "5,6"], + "singleValue": ["1.5", "2.3", "4.1"], + } + ).lazy() + + result = infer_table_schema_from_table(table) + dumped = result.model_dump(by_alias=True, exclude_none=True) + assert dumped == { + "properties": { + "numericList": { + "type": "string", + "format": "list", + "itemType": "number", + }, + "integerList": { + "type": "string", + "format": "list", + "itemType": "integer", + }, + "singleValue": {"type": "number"}, + }, + } + + +class TestInferTableSchemaFromTableNullable: + def test_should_infer_nullable_string_from_missing_values(self): + table = pl.DataFrame({"name": ["Alice", "Bob", "NA"]}).lazy() + result = infer_table_schema_from_table(table) + dumped = result.model_dump(by_alias=True, exclude_none=True) + assert dumped["properties"] == {"name": {"type": ("string", "null")}} + assert dumped["missingValues"] == ["NA"] + + def test_should_infer_nullable_integer_from_polars_nulls(self): + table = pl.DataFrame( + {"value": pl.Series("value", [1, 2, None], pl.Int32)} + ).lazy() + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": {"value": {"type": ("integer", "null")}}, + } + + def test_should_infer_nullable_number_from_polars_nulls(self): + table = pl.DataFrame( + {"value": pl.Series("value", [1.1, None, 3.3], pl.Float64)} + ).lazy() + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": {"value": {"type": ("number", "null")}}, + } + + def test_should_infer_nullable_url_with_missing_values(self): + table = pl.DataFrame({"link": ["https://a.com", "http://b.com", "NA"]}).lazy() + result = infer_table_schema_from_table(table) + dumped = result.model_dump(by_alias=True, exclude_none=True) + assert dumped["properties"] == { + "link": {"type": ("string", "null"), "format": "url"} + } + assert dumped["missingValues"] == ["NA"] + + def test_should_infer_nullable_string_when_all_values_are_missing(self): + table = pl.DataFrame({"empty": ["NA", "N/A", ""]}).lazy() + result = infer_table_schema_from_table(table) + dumped = result.model_dump(by_alias=True, exclude_none=True) + assert dumped["properties"]["empty"] == {"type": ("string", "null")} + assert result.missingValues is not None + assert set(result.missingValues) == {"NA", "N/A", ""} + assert len(result.missingValues) == 3 + + def test_should_use_explicit_missing_values_option(self): + table = pl.DataFrame({"name": ["Alice", "MISSING"]}).lazy() + result = infer_table_schema_from_table( + table, missingValues=["MISSING"] + ) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": {"name": {"type": ("string", "null")}}, + "missingValues": ["MISSING"], + } + + def test_should_not_make_columns_nullable_when_no_nulls_exist(self): + table = pl.DataFrame({"name": ["Alice", "Bob"]}).lazy() + result = infer_table_schema_from_table(table) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": {"name": {"type": "string"}}, + } + + def test_should_infer_nullable_integer_from_empty_string(self): + table = pl.DataFrame({"value": ["1", "2", ""]}).lazy() + result = infer_table_schema_from_table(table) + dumped = result.model_dump(by_alias=True, exclude_none=True) + assert dumped["properties"] == {"value": {"type": ("integer", "null")}} + assert dumped["missingValues"] == [""] + + +class TestInferTableSchemaFromTableOptionsSteerDetection: + def test_should_steer_boolean_detection_from_true_values_false_values(self): + table = pl.DataFrame({"value": ["yes", "no", "yes"]}).lazy() + result = infer_table_schema_from_table( + table, + trueValues=["yes"], + falseValues=["no"], + ) + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "value": { + "type": "boolean", + "trueValues": ["yes"], + "falseValues": ["no"], + }, + }, + } + + def test_should_steer_number_detection_from_group_char(self): + table = pl.DataFrame({"value": ["1.000", "2.000", "3.000"]}).lazy() + result = infer_table_schema_from_table(table, groupChar=".") + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": {"value": {"type": "integer", "groupChar": "."}}, + } + + def test_should_steer_number_detection_from_decimal_char(self): + table = pl.DataFrame({"value": ["1.000,5", "2.000,5"]}).lazy() + result = infer_table_schema_from_table(table, decimalChar=",") + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "value": {"type": "number", "decimalChar": ",", "groupChar": "."}, + }, + } + + def test_should_steer_list_detection_from_list_delimiter(self): + table = pl.DataFrame({"value": ["1;2", "3;4", "5;6"]}).lazy() + result = infer_table_schema_from_table(table, listDelimiter=";") + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "value": { + "type": "string", + "format": "list", + "itemType": "integer", + "delimiter": ";", + }, + }, + } + + def test_should_steer_date_detection_from_date_format(self): + table = pl.DataFrame({"value": ["15/01/2023", "20/02/2023"]}).lazy() + result = infer_table_schema_from_table(table, dateFormat="%d/%m/%Y") + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "value": { + "type": "string", + "format": "date", + "temporalFormat": "%d/%m/%Y", + }, + }, + } + + def test_should_derive_month_first_from_date_format(self): + table = pl.DataFrame({"value": ["01/15/2023", "02/20/2023"]}).lazy() + result = infer_table_schema_from_table(table, dateFormat="%m/%d/%Y") + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "value": { + "type": "string", + "format": "date", + "temporalFormat": "%m/%d/%Y", + }, + }, + } + + def test_should_filter_time_patterns_from_time_format(self): + table = pl.DataFrame({"value": ["14:30", "08:15"]}).lazy() + result = infer_table_schema_from_table(table, timeFormat="%H:%M") + assert result.model_dump(by_alias=True, exclude_none=True) == { + "properties": { + "value": { + "type": "string", + "format": "time", + "temporalFormat": "%H:%M", + }, + }, + } diff --git a/table/fairspec_table/helpers/__init__.py b/table/fairspec_table/helpers/__init__.py new file mode 100644 index 0000000..3613812 --- /dev/null +++ b/table/fairspec_table/helpers/__init__.py @@ -0,0 +1,13 @@ +from .column import get_categorical_values_and_labels +from .file_dialect import get_header_rows +from .general import get_is_object +from .schema import get_polars_schema +from .table import evaluate_expression + +__all__ = [ + "evaluate_expression", + "get_categorical_values_and_labels", + "get_header_rows", + "get_is_object", + "get_polars_schema", +] diff --git a/table/fairspec_table/helpers/column.py b/table/fairspec_table/helpers/column.py new file mode 100644 index 0000000..09711ab --- /dev/null +++ b/table/fairspec_table/helpers/column.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from fairspec_metadata import ( + CategoricalColumn, + IntegerCategoryItem, + StringCategoryItem, +) + + +def get_categorical_values_and_labels( + column: CategoricalColumn, +) -> tuple[list[str | int], list[str]]: + values: list[str | int] = [] + labels: list[str] = [] + + for item in column.property.categories or []: + if isinstance(item, (IntegerCategoryItem, StringCategoryItem)): + values.append(item.value) + labels.append(item.label) + else: + values.append(item) + labels.append(str(item)) + + return values, labels diff --git a/table/fairspec_table/helpers/file_dialect.py b/table/fairspec_table/helpers/file_dialect.py new file mode 100644 index 0000000..7b85a81 --- /dev/null +++ b/table/fairspec_table/helpers/file_dialect.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from fairspec_table.models.file_dialect import FileDialectWithHeaderAndCommentRows + + +def get_header_rows( + file_dialect: FileDialectWithHeaderAndCommentRows | None = None, +) -> list[int]: + if file_dialect is None or file_dialect.headerRows is None: + return [1] + if file_dialect.headerRows is False: + return [] + return file_dialect.headerRows diff --git a/table/fairspec_table/helpers/general.py b/table/fairspec_table/helpers/general.py new file mode 100644 index 0000000..7ec55cd --- /dev/null +++ b/table/fairspec_table/helpers/general.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from typing import TypeGuard + + +def get_is_object(value: object) -> TypeGuard[dict[str, object]]: + return isinstance(value, dict) diff --git a/table/fairspec_table/helpers/schema.py b/table/fairspec_table/helpers/schema.py new file mode 100644 index 0000000..a840ee9 --- /dev/null +++ b/table/fairspec_table/helpers/schema.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +import polars as pl + +from fairspec_table.models.column import PolarsColumn +from fairspec_table.models.schema import PolarsSchema + + +def get_polars_schema(type_mapping: dict[str, pl.DataType]) -> PolarsSchema: + columns = [ + PolarsColumn(name=name, type=type(dtype)) + for name, dtype in type_mapping.items() + ] + return PolarsSchema(columns=columns) diff --git a/table/fairspec_table/helpers/table.py b/table/fairspec_table/helpers/table.py new file mode 100644 index 0000000..56557d7 --- /dev/null +++ b/table/fairspec_table/helpers/table.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +import polars as pl + + +def evaluate_expression(expr: pl.Expr) -> object: + return pl.select(expr.alias("value")).to_dicts()[0]["value"] diff --git a/table/fairspec_table/models/__init__.py b/table/fairspec_table/models/__init__.py new file mode 100644 index 0000000..58d4b87 --- /dev/null +++ b/table/fairspec_table/models/__init__.py @@ -0,0 +1,30 @@ +from .cell import CellMapping +from .column import ColumnMapping, DenormalizeColumnOptions, PolarsColumn +from .data import DataRecord, DataRow +from .file_dialect import FileDialectWithHeaderAndCommentRows +from .frame import Frame +from .schema import ( + InferTableSchemaOptions, + PolarsSchema, + SchemaMapping, + TableSchemaOptions, +) +from .table import LoadTableOptions, SaveTableOptions, Table + +__all__ = [ + "CellMapping", + "ColumnMapping", + "DataRecord", + "DataRow", + "DenormalizeColumnOptions", + "FileDialectWithHeaderAndCommentRows", + "Frame", + "InferTableSchemaOptions", + "LoadTableOptions", + "PolarsColumn", + "PolarsSchema", + "SaveTableOptions", + "SchemaMapping", + "Table", + "TableSchemaOptions", +] diff --git a/table/fairspec_table/models/cell.py b/table/fairspec_table/models/cell.py new file mode 100644 index 0000000..6055a42 --- /dev/null +++ b/table/fairspec_table/models/cell.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import polars as pl + + +@dataclass +class CellMapping: + source: pl.Expr + target: pl.Expr diff --git a/table/fairspec_table/models/column.py b/table/fairspec_table/models/column.py new file mode 100644 index 0000000..4f1a45f --- /dev/null +++ b/table/fairspec_table/models/column.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import TypedDict + +import polars as pl +from fairspec_metadata import Column + + +@dataclass +class PolarsColumn: + name: str + type: type[pl.DataType] + + +@dataclass +class ColumnMapping: + source: PolarsColumn + target: Column + + +class DenormalizeColumnOptions(TypedDict, total=False): + nativeTypes: list[str] diff --git a/table/fairspec_table/models/data.py b/table/fairspec_table/models/data.py new file mode 100644 index 0000000..8cf5be5 --- /dev/null +++ b/table/fairspec_table/models/data.py @@ -0,0 +1,4 @@ +from __future__ import annotations + +DataRow = list[object] +DataRecord = dict[str, object] diff --git a/table/fairspec_table/models/file_dialect.py b/table/fairspec_table/models/file_dialect.py new file mode 100644 index 0000000..ca248c9 --- /dev/null +++ b/table/fairspec_table/models/file_dialect.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +from typing import Union + +from fairspec_metadata import ( + CsvFileDialect, + OdsFileDialect, + TsvFileDialect, + XlsxFileDialect, +) + +FileDialectWithHeaderAndCommentRows = Union[ + CsvFileDialect, OdsFileDialect, TsvFileDialect, XlsxFileDialect +] diff --git a/table/fairspec_table/models/frame.py b/table/fairspec_table/models/frame.py new file mode 100644 index 0000000..9f12076 --- /dev/null +++ b/table/fairspec_table/models/frame.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +import polars as pl + +Frame = pl.DataFrame diff --git a/table/fairspec_table/models/schema.py b/table/fairspec_table/models/schema.py new file mode 100644 index 0000000..91b9993 --- /dev/null +++ b/table/fairspec_table/models/schema.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Literal, TypedDict + +from fairspec_metadata import TableSchema + +from .column import PolarsColumn + + +@dataclass +class PolarsSchema: + columns: list[PolarsColumn] + + +@dataclass +class SchemaMapping: + source: PolarsSchema + target: TableSchema + + +class TableSchemaOptions(TypedDict, total=False): + columnNames: list[str] + columnTypes: dict[str, str] + missingValues: list[str] + decimalChar: str + groupChar: str + trueValues: list[str] + falseValues: list[str] + datetimeFormat: str + dateFormat: str + timeFormat: str + arrayType: Literal["array", "list"] + listDelimiter: str + listItemType: Literal[ + "string", + "number", + "boolean", + "date", + "date-time", + "integer", + "time", + ] + + +class InferTableSchemaOptions(TableSchemaOptions, total=False): + sampleRows: int + confidence: float + commaDecimal: bool + monthFirst: bool + keepStrings: bool diff --git a/table/fairspec_table/models/table.py b/table/fairspec_table/models/table.py new file mode 100644 index 0000000..6593856 --- /dev/null +++ b/table/fairspec_table/models/table.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from typing import Required + +import polars as pl +from fairspec_dataset import InferFileDialectOptions + +from .schema import InferTableSchemaOptions, TableSchemaOptions + +Table = pl.LazyFrame + + +class LoadTableOptions(InferFileDialectOptions, InferTableSchemaOptions, total=False): + previewBytes: int + denormalized: bool + + +class SaveTableOptions(TableSchemaOptions, total=False): + path: Required[str] + fileDialect: object + tableSchema: object + overwrite: bool diff --git a/table/fairspec_table/plugin.py b/table/fairspec_table/plugin.py new file mode 100644 index 0000000..24c3236 --- /dev/null +++ b/table/fairspec_table/plugin.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack + +from fairspec_dataset.plugin import DatasetPlugin + +if TYPE_CHECKING: + from fairspec_metadata import Resource + from fairspec_metadata import TableSchema + + from .models import ( + InferTableSchemaOptions, + LoadTableOptions, + SaveTableOptions, + Table, + ) + + +class TablePlugin(DatasetPlugin): + def load_table( + self, + resource: Resource, + **options: Unpack[LoadTableOptions], + ) -> Table | None: + return None + + def save_table(self, table: Table, **options: Unpack[SaveTableOptions]) -> str | None: + return None + + def infer_table_schema( + self, + resource: Resource, + **options: Unpack[InferTableSchemaOptions], + ) -> TableSchema | None: + return None diff --git a/table/fairspec_table/plugins/__init__.py b/table/fairspec_table/plugins/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/arrow/__init__.py b/table/fairspec_table/plugins/arrow/__init__.py new file mode 100644 index 0000000..e79ab93 --- /dev/null +++ b/table/fairspec_table/plugins/arrow/__init__.py @@ -0,0 +1,9 @@ +from .actions.table.load import load_arrow_table +from .actions.table.save import save_arrow_table +from .plugin import ArrowPlugin + +__all__ = [ + "ArrowPlugin", + "load_arrow_table", + "save_arrow_table", +] diff --git a/table/fairspec_table/plugins/arrow/actions/__init__.py b/table/fairspec_table/plugins/arrow/actions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/arrow/actions/table/__init__.py b/table/fairspec_table/plugins/arrow/actions/table/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/arrow/actions/table/load.py b/table/fairspec_table/plugins/arrow/actions/table/load.py new file mode 100644 index 0000000..272370c --- /dev/null +++ b/table/fairspec_table/plugins/arrow/actions/table/load.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack + +import polars as pl +from fairspec_dataset import prefetch_files +from fairspec_metadata import resolve_table_schema + +from fairspec_table.actions.table.normalize import normalize_table +from fairspec_table.actions.table_schema.infer import infer_table_schema_from_table + +if TYPE_CHECKING: + from fairspec_metadata import Resource + + from fairspec_table.models.table import LoadTableOptions, Table + + +def load_arrow_table( + resource: Resource, **options: Unpack[LoadTableOptions] +) -> Table: + paths = prefetch_files(resource) + if not paths: + raise Exception("Resource data is not defined") + + first_path, *rest_paths = paths + table = pl.scan_ipc(first_path) + if rest_paths: + table = pl.concat([table, *(pl.scan_ipc(path) for path in rest_paths)]) + + if not options.get("denormalized"): + table_schema = resolve_table_schema(resource.tableSchema) + if not table_schema: + table_schema = infer_table_schema_from_table(table, **options) + table = normalize_table(table, table_schema) + + return table diff --git a/table/fairspec_table/plugins/arrow/actions/table/load_spec.py b/table/fairspec_table/plugins/arrow/actions/table/load_spec.py new file mode 100644 index 0000000..d69857d --- /dev/null +++ b/table/fairspec_table/plugins/arrow/actions/table/load_spec.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +import polars as pl +from fairspec_dataset import get_temp_file_path +from fairspec_metadata import ArrowFileDialect, Resource + +from .load import load_arrow_table + + +class TestLoadArrowTable: + def test_should_load_local_file(self): + path = get_temp_file_path() + pl.DataFrame({"id": [1, 2], "name": ["english", "中文"]}).write_ipc(path) + + table = load_arrow_table(Resource(data=path, fileDialect=ArrowFileDialect())) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + def test_should_load_local_file_multipart(self): + path1 = get_temp_file_path() + path2 = get_temp_file_path() + pl.DataFrame({"id": [1, 2], "name": ["english", "中文"]}).write_ipc(path1) + pl.DataFrame({"id": [1, 2], "name": ["english", "中文"]}).write_ipc(path2) + + table = load_arrow_table(Resource(data=[path1, path2])) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] diff --git a/table/fairspec_table/plugins/arrow/actions/table/save.py b/table/fairspec_table/plugins/arrow/actions/table/save.py new file mode 100644 index 0000000..2bd2396 --- /dev/null +++ b/table/fairspec_table/plugins/arrow/actions/table/save.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack + +from fairspec_dataset import assert_local_path_vacant +from fairspec_metadata import TableSchema + +from fairspec_table.actions.table.denormalize import denormalize_table +from fairspec_table.actions.table_schema.infer import infer_table_schema_from_table +from fairspec_table.plugins.arrow.settings import NATIVE_TYPES + +if TYPE_CHECKING: + from fairspec_table.models.table import SaveTableOptions, Table + + +def save_arrow_table(table: Table, **options: Unpack[SaveTableOptions]) -> str: + path = options["path"] + + if not options.get("overwrite"): + assert_local_path_vacant(path) + + table_schema = options.get("tableSchema") + if not isinstance(table_schema, TableSchema): + table_schema = infer_table_schema_from_table( + table, **options, keepStrings=True + ) + + table = denormalize_table(table, table_schema, nativeTypes=NATIVE_TYPES) + + table.sink_ipc(path) + + return path diff --git a/table/fairspec_table/plugins/arrow/actions/table/save_spec.py b/table/fairspec_table/plugins/arrow/actions/table/save_spec.py new file mode 100644 index 0000000..8b60197 --- /dev/null +++ b/table/fairspec_table/plugins/arrow/actions/table/save_spec.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +from datetime import date, datetime, timezone +from zoneinfo import ZoneInfo + +import polars as pl +from fairspec_dataset import get_temp_file_path +from fairspec_metadata import Resource + +from .load import load_arrow_table +from .save import save_arrow_table + + +class TestSaveArrowTable: + def test_should_save_table_to_arrow_file(self): + path = get_temp_file_path() + source = pl.DataFrame( + {"id": [1.0, 2.0, 3.0], "name": ["Alice", "Bob", "Charlie"]} + ).lazy() + + save_arrow_table(source, path=path) + + table = load_arrow_table(Resource(data=path)) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1.0, "name": "Alice"}, + {"id": 2.0, "name": "Bob"}, + {"id": 3.0, "name": "Charlie"}, + ] + + def test_should_save_and_load_various_data_types(self): + path = get_temp_file_path() + source = pl.DataFrame( + [ + pl.Series("boolean", [True], dtype=pl.Boolean), + pl.Series("date", [date(2025, 1, 1)], dtype=pl.Date), + pl.Series( + "datetime", + [datetime(2025, 1, 1, tzinfo=timezone.utc)], + dtype=pl.Datetime, + ), + pl.Series("integer", [1], dtype=pl.Int32), + pl.Series("number", [1.1], dtype=pl.Float64), + pl.Series("string", ["string"], dtype=pl.String), + ] + ).lazy() + + save_arrow_table(source, path=path) + + target = load_arrow_table(Resource(data=path), denormalized=True) + frame: pl.DataFrame = target.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + { + "boolean": True, + "date": "2025-01-01", + "datetime": datetime(2025, 1, 1, tzinfo=ZoneInfo("UTC")), + "integer": 1, + "number": 1.1, + "string": "string", + }, + ] diff --git a/table/fairspec_table/plugins/arrow/plugin.py b/table/fairspec_table/plugins/arrow/plugin.py new file mode 100644 index 0000000..1f0c913 --- /dev/null +++ b/table/fairspec_table/plugins/arrow/plugin.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack, cast + +from fairspec_metadata import Resource, get_supported_file_dialect +from fairspec_metadata.models.file_dialect.file_dialect import FileDialect + +from fairspec_table.plugin import TablePlugin + +from .actions.table.load import load_arrow_table +from .actions.table.save import save_arrow_table + +if TYPE_CHECKING: + from fairspec_table.models.table import LoadTableOptions, SaveTableOptions, Table + + +class ArrowPlugin(TablePlugin): + def load_table( + self, + resource: Resource, + **options: Unpack[LoadTableOptions], + ) -> Table | None: + file_dialect = get_supported_file_dialect(resource, ["arrow"]) + if not file_dialect: + return None + return load_arrow_table(resource, **options) + + def save_table(self, table: Table, **options: Unpack[SaveTableOptions]) -> str | None: + resource = Resource( + data=options["path"], fileDialect=cast(FileDialect | None, options.get("fileDialect")) + ) + file_dialect = get_supported_file_dialect(resource, ["arrow"]) + if not file_dialect: + return None + return save_arrow_table(table, **options) diff --git a/table/fairspec_table/plugins/arrow/plugin_spec.py b/table/fairspec_table/plugins/arrow/plugin_spec.py new file mode 100644 index 0000000..884e41e --- /dev/null +++ b/table/fairspec_table/plugins/arrow/plugin_spec.py @@ -0,0 +1,153 @@ +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import polars as pl +from fairspec_metadata import ArrowFileDialect, Resource + +from .plugin import ArrowPlugin + + +class TestArrowPluginLoadTable: + def setup_method(self): + self.plugin = ArrowPlugin() + + @patch("fairspec_table.plugins.arrow.plugin.load_arrow_table") + def test_should_load_table_from_arrow_file(self, mock_load: MagicMock): + resource = Resource(data="test.arrow") + mock_table = pl.DataFrame().lazy() + mock_load.return_value = mock_table + + result = self.plugin.load_table(resource) + + mock_load.assert_called_once_with(resource) + assert result is mock_table + + @patch("fairspec_table.plugins.arrow.plugin.load_arrow_table") + def test_should_load_table_from_feather_file(self, mock_load: MagicMock): + resource = Resource(data="test.feather") + mock_table = pl.DataFrame().lazy() + mock_load.return_value = mock_table + + result = self.plugin.load_table(resource) + + mock_load.assert_called_once_with(resource) + assert result is mock_table + + @patch("fairspec_table.plugins.arrow.plugin.load_arrow_table") + def test_should_return_none_for_non_arrow_files(self, mock_load: MagicMock): + resource = Resource(data="test.csv") + + result = self.plugin.load_table(resource) + + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_table.plugins.arrow.plugin.load_arrow_table") + def test_should_handle_explicit_arrow_format(self, mock_load: MagicMock): + resource = Resource(data="test.txt", fileDialect=ArrowFileDialect()) + mock_table = pl.DataFrame().lazy() + mock_load.return_value = mock_table + + result = self.plugin.load_table(resource) + + mock_load.assert_called_once_with(resource) + assert result is mock_table + + @patch("fairspec_table.plugins.arrow.plugin.load_arrow_table") + def test_should_pass_through_load_options(self, mock_load: MagicMock): + resource = Resource(data="test.arrow") + mock_load.return_value = pl.DataFrame().lazy() + + self.plugin.load_table(resource, denormalized=True) + + mock_load.assert_called_once() + + @patch("fairspec_table.plugins.arrow.plugin.load_arrow_table") + def test_should_handle_paths_with_directories(self, mock_load: MagicMock): + resource = Resource(data="/path/to/data.arrow") + mock_load.return_value = pl.DataFrame().lazy() + + self.plugin.load_table(resource) + + mock_load.assert_called_once_with(resource) + + @patch("fairspec_table.plugins.arrow.plugin.load_arrow_table") + def test_should_return_none_for_parquet_files(self, mock_load: MagicMock): + resource = Resource(data="test.parquet") + + result = self.plugin.load_table(resource) + + mock_load.assert_not_called() + assert result is None + + +class TestArrowPluginSaveTable: + def setup_method(self): + self.plugin = ArrowPlugin() + + @patch("fairspec_table.plugins.arrow.plugin.save_arrow_table") + def test_should_save_table_to_arrow_file(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "output.arrow" + + result = self.plugin.save_table(table, path="output.arrow") + + mock_save.assert_called_once_with(table, path="output.arrow") + assert result == "output.arrow" + + @patch("fairspec_table.plugins.arrow.plugin.save_arrow_table") + def test_should_save_table_to_feather_file(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "output.feather" + + result = self.plugin.save_table(table, path="output.feather") + + mock_save.assert_called_once_with(table, path="output.feather") + assert result == "output.feather" + + @patch("fairspec_table.plugins.arrow.plugin.save_arrow_table") + def test_should_return_none_for_non_arrow_files(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + + result = self.plugin.save_table(table, path="output.csv") + + mock_save.assert_not_called() + assert result is None + + @patch("fairspec_table.plugins.arrow.plugin.save_arrow_table") + def test_should_handle_explicit_arrow_format(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "output.txt" + + result = self.plugin.save_table(table, path="output.txt", fileDialect=ArrowFileDialect()) + + mock_save.assert_called_once_with(table, path="output.txt", fileDialect=ArrowFileDialect()) + assert result == "output.txt" + + @patch("fairspec_table.plugins.arrow.plugin.save_arrow_table") + def test_should_handle_paths_with_directories(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "/path/to/output.arrow" + + self.plugin.save_table(table, path="/path/to/output.arrow") + + mock_save.assert_called_once_with(table, path="/path/to/output.arrow") + + @patch("fairspec_table.plugins.arrow.plugin.save_arrow_table") + def test_should_return_none_for_files_without_extension(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + + result = self.plugin.save_table(table, path="output") + + mock_save.assert_not_called() + assert result is None + + @patch("fairspec_table.plugins.arrow.plugin.save_arrow_table") + def test_should_return_none_for_parquet_files(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + + result = self.plugin.save_table(table, path="output.parquet") + + mock_save.assert_not_called() + assert result is None diff --git a/table/fairspec_table/plugins/arrow/settings.py b/table/fairspec_table/plugins/arrow/settings.py new file mode 100644 index 0000000..50814bc --- /dev/null +++ b/table/fairspec_table/plugins/arrow/settings.py @@ -0,0 +1 @@ +NATIVE_TYPES = ["boolean", "integer", "number", "string", "list", "date-time"] diff --git a/table/fairspec_table/plugins/csv/__init__.py b/table/fairspec_table/plugins/csv/__init__.py new file mode 100644 index 0000000..3fd5964 --- /dev/null +++ b/table/fairspec_table/plugins/csv/__init__.py @@ -0,0 +1,11 @@ +from .actions.file_dialect.infer import infer_csv_file_dialect +from .actions.table.load import load_csv_table +from .actions.table.save import save_csv_table +from .plugin import CsvPlugin + +__all__ = [ + "CsvPlugin", + "infer_csv_file_dialect", + "load_csv_table", + "save_csv_table", +] diff --git a/table/fairspec_table/plugins/csv/actions/__init__.py b/table/fairspec_table/plugins/csv/actions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/csv/actions/file_dialect/__init__.py b/table/fairspec_table/plugins/csv/actions/file_dialect/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/csv/actions/file_dialect/infer.py b/table/fairspec_table/plugins/csv/actions/file_dialect/infer.py new file mode 100644 index 0000000..28f21d9 --- /dev/null +++ b/table/fairspec_table/plugins/csv/actions/file_dialect/infer.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_dataset import load_file_stream +from fairspec_metadata import get_data_first_path +from fairspec_metadata.models.file_dialect.csv import CsvFileDialect +from fairspec_metadata.models.file_dialect.tsv import TsvFileDialect + +from fairspec_table.utils.sniffer.sniffer import Sniffer + +if TYPE_CHECKING: + from fairspec_metadata import Resource + + from fairspec_table.models.table import LoadTableOptions + + +def infer_csv_file_dialect( + resource: Resource, + options: LoadTableOptions | None = None, +) -> CsvFileDialect | TsvFileDialect | None: + sample_bytes = 10_000 + + data_path = get_data_first_path(resource) + if not data_path: + return None + + stream = load_file_stream(data_path, max_bytes=sample_bytes) + raw_bytes = stream.read() + + sniffer = Sniffer() + try: + result = sniffer.sniff_bytes(raw_bytes) + except Exception: + return CsvFileDialect() + + lt = result.dialect.line_terminator + line_terminator = "\n" if lt == "LF" else "\r\n" if lt == "CRLF" else "\r" + + is_tsv = result.dialect.delimiter == 9 + + if is_tsv: + kwargs: dict[str, object] = {"lineTerminator": line_terminator} + + if result.dialect.header.has_header_row: + kwargs["headerRows"] = [result.dialect.header.num_preamble_rows + 1] + elif result.num_fields > 0: + kwargs["headerRows"] = False + + return TsvFileDialect(**kwargs) # type: ignore[arg-type] + + kwargs = { + "delimiter": chr(result.dialect.delimiter), + "lineTerminator": line_terminator, + } + + if result.dialect.quote.char is not None: + kwargs["quoteChar"] = chr(result.dialect.quote.char) + + if result.dialect.header.has_header_row: + kwargs["headerRows"] = [result.dialect.header.num_preamble_rows + 1] + elif result.num_fields > 0: + kwargs["headerRows"] = False + + return CsvFileDialect(**kwargs) # type: ignore[arg-type] diff --git a/table/fairspec_table/plugins/csv/actions/file_dialect/infer_spec.py b/table/fairspec_table/plugins/csv/actions/file_dialect/infer_spec.py new file mode 100644 index 0000000..986118a --- /dev/null +++ b/table/fairspec_table/plugins/csv/actions/file_dialect/infer_spec.py @@ -0,0 +1,236 @@ +from __future__ import annotations + +from fairspec_dataset import write_temp_file +from fairspec_metadata import CsvFileDialect, Resource, TsvFileDialect + +from .infer import infer_csv_file_dialect + + +class TestInferCsvFileDialectBasic: + def test_should_infer_simple_csv_file(self): + path = write_temp_file("id,name\n1,english\n2,中文") + + result = infer_csv_file_dialect(Resource(data=path)) + + assert result == CsvFileDialect( + delimiter=",", headerRows=[1], lineTerminator="\n" + ) + + def test_should_infer_quote_char(self): + path = write_temp_file('id,name\n1,"John Doe"\n2,"Jane Smith"') + + result = infer_csv_file_dialect(Resource(data=path)) + + assert result == CsvFileDialect( + delimiter=",", quoteChar='"', headerRows=[1], lineTerminator="\n" + ) + + def test_should_infer_quote_char_with_single_quotes(self): + path = write_temp_file("id,name\n1,'John Doe'\n2,'Jane Smith'") + + result = infer_csv_file_dialect(Resource(data=path)) + + assert result == CsvFileDialect( + delimiter=",", quoteChar="'", headerRows=[1], lineTerminator="\n" + ) + + def test_should_infer_header_false_when_no_header(self): + path = write_temp_file("1,english\n2,中文\n3,español") + + result = infer_csv_file_dialect(Resource(data=path)) + + assert result == CsvFileDialect( + delimiter=",", headerRows=False, lineTerminator="\n" + ) + + def test_should_detect_header_when_present(self): + path = write_temp_file("id,name\n1,english\n2,中文") + + result = infer_csv_file_dialect(Resource(data=path)) + + assert result == CsvFileDialect( + delimiter=",", headerRows=[1], lineTerminator="\n" + ) + + def test_should_infer_complex_csv_with_quotes_and_header(self): + path = write_temp_file( + 'name,description\n"Product A","A great product with, commas"\n"Product B","Another product"' + ) + + result = infer_csv_file_dialect(Resource(data=path)) + + assert result == CsvFileDialect( + delimiter=",", quoteChar='"', headerRows=[1], lineTerminator="\n" + ) + + +class TestInferCsvFileDialectDelimiters: + def test_should_infer_comma_delimiter(self): + path = write_temp_file("id,name,age\n1,alice,25\n2,bob,30") + + result = infer_csv_file_dialect( + Resource(data=path, fileDialect=CsvFileDialect()) + ) + + assert result == CsvFileDialect( + delimiter=",", headerRows=[1], lineTerminator="\n" + ) + + def test_should_infer_pipe_delimiter(self): + path = write_temp_file("id|name|age\n1|alice|25\n2|bob|30") + + result = infer_csv_file_dialect( + Resource(data=path, fileDialect=CsvFileDialect()) + ) + + assert result == CsvFileDialect( + delimiter="|", headerRows=[1], lineTerminator="\n" + ) + + def test_should_infer_semicolon_delimiter(self): + path = write_temp_file("id;name;age\n1;alice;25\n2;bob;30") + + result = infer_csv_file_dialect( + Resource(data=path, fileDialect=CsvFileDialect()) + ) + + assert result == CsvFileDialect( + delimiter=";", headerRows=[1], lineTerminator="\n" + ) + + def test_should_infer_tab_delimiter_as_tsv(self): + path = write_temp_file("id\tname\tage\n1\talice\t25\n2\tbob\t30") + + result = infer_csv_file_dialect( + Resource(data=path, fileDialect=CsvFileDialect()) + ) + + assert result == TsvFileDialect(headerRows=[1], lineTerminator="\n") + + +class TestInferCsvFileDialectQuotes: + def test_should_handle_quoted_fields(self): + path = write_temp_file( + 'id,name,description\n1,"alice","Description with, comma"\n2,"bob","Normal text"' + ) + + result = infer_csv_file_dialect( + Resource(data=path, fileDialect=CsvFileDialect()) + ) + + assert result == CsvFileDialect( + delimiter=",", quoteChar='"', headerRows=[1], lineTerminator="\n" + ) + + def test_should_handle_single_quote_character(self): + path = write_temp_file( + "id,name,description\n1,'alice','Description text'\n2,'bob','Normal text'" + ) + + result = infer_csv_file_dialect( + Resource(data=path, fileDialect=CsvFileDialect()) + ) + + assert result == CsvFileDialect( + delimiter=",", quoteChar="'", headerRows=[1], lineTerminator="\n" + ) + + +class TestInferCsvFileDialectEdgeCases: + def test_should_return_none_for_resources_without_path(self): + resource = Resource( + data=[ + {"id": 1, "name": "alice"}, + {"id": 2, "name": "bob"}, + ], + ) + + result = infer_csv_file_dialect(resource) + + assert result is None + + def test_should_handle_custom_line_terminator(self): + path = write_temp_file("id,name\r\n1,alice\r\n2,bob\r\n") + + result = infer_csv_file_dialect( + Resource(data=path, fileDialect=CsvFileDialect()) + ) + + assert result == CsvFileDialect( + delimiter=",", headerRows=[1], lineTerminator="\r\n" + ) + + def test_should_handle_header_row_only(self): + path = write_temp_file("id,name,age") + + result = infer_csv_file_dialect( + Resource(data=path, fileDialect=CsvFileDialect()) + ) + + assert result == CsvFileDialect( + delimiter=",", headerRows=False, lineTerminator="\n" + ) + + def test_should_handle_empty_file(self): + path = write_temp_file("") + + result = infer_csv_file_dialect( + Resource(data=path, fileDialect=CsvFileDialect()) + ) + + assert result == CsvFileDialect(delimiter=",", lineTerminator="\n") + + +class TestInferCsvFileDialectHeaderDetection: + def test_should_detect_header_with_mixed_types(self): + path = write_temp_file("id,name,age\n1,Alice,25\n2,Bob,30\n3,Charlie,35") + + result = infer_csv_file_dialect(Resource(data=path)) + + assert result == CsvFileDialect( + delimiter=",", headerRows=[1], lineTerminator="\n" + ) + + def test_should_detect_header_after_preamble_rows(self): + path = write_temp_file( + "# Comment line 1\n# Comment line 2\nid,name,age\n1,Alice,25\n2,Bob,30" + ) + + result = infer_csv_file_dialect(Resource(data=path)) + + assert result == CsvFileDialect( + delimiter=",", headerRows=[3], lineTerminator="\n" + ) + + def test_should_not_detect_header_when_first_row_is_numeric(self): + path = write_temp_file("1,2,3\n4,5,6\n7,8,9") + + result = infer_csv_file_dialect(Resource(data=path)) + + assert result == CsvFileDialect( + delimiter=",", headerRows=False, lineTerminator="\n" + ) + + def test_should_detect_header_with_underscores_and_mixed_case(self): + path = write_temp_file( + "user_id,User_Name,EmailAddress\n1,alice,alice@example.com\n2,bob,bob@example.com" + ) + + result = infer_csv_file_dialect(Resource(data=path)) + + assert result == CsvFileDialect( + delimiter=",", headerRows=[1], lineTerminator="\n" + ) + + def test_should_not_detect_header_when_first_row_has_data_like_values(self): + path = write_temp_file( + "blsrpxedd,37257,695.80,false,1927-11-07T01:03:54Z\n" + "zmvpq03o4,68694,337.73,false,1927-04-02T12:37:52Z\n" + "iw1fm3k9n,52019,988.74,false,2009-02-22T05:50:15Z" + ) + + result = infer_csv_file_dialect(Resource(data=path)) + + assert result == CsvFileDialect( + delimiter=",", headerRows=False, lineTerminator="\n" + ) diff --git a/table/fairspec_table/plugins/csv/actions/table/__init__.py b/table/fairspec_table/plugins/csv/actions/table/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/csv/actions/table/load.py b/table/fairspec_table/plugins/csv/actions/table/load.py new file mode 100644 index 0000000..f72204f --- /dev/null +++ b/table/fairspec_table/plugins/csv/actions/table/load.py @@ -0,0 +1,127 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack, cast + +import polars as pl + +from fairspec_dataset import prefetch_files +from fairspec_metadata import Resource, get_supported_file_dialect, resolve_table_schema + +from fairspec_table.actions.table.file_dialect import ( + join_header_rows, + skip_comment_rows, +) +from fairspec_table.actions.table.normalize import normalize_table +from fairspec_table.actions.table_schema.infer import infer_table_schema_from_table +from fairspec_table.helpers.file_dialect import get_header_rows +from fairspec_table.plugins.csv.actions.file_dialect.infer import infer_csv_file_dialect + +if TYPE_CHECKING: + from fairspec_metadata import CsvFileDialect, TsvFileDialect + from fairspec_metadata.models.file_dialect.file_dialect import FileDialect + + from fairspec_table.models.table import LoadTableOptions, Table + + +def load_csv_table( + resource: Resource, **options: Unpack[LoadTableOptions] +) -> Table: + file_dialect = get_supported_file_dialect(resource, ["csv", "tsv"]) + if not file_dialect: + raise Exception("Resource data is not compatible") + + max_bytes = options.get("previewBytes") + paths = prefetch_files(resource, max_bytes=max_bytes) + if not paths: + raise Exception("Resource path is not defined") + + if _dialect_has_only_format(file_dialect): + inferred = infer_csv_file_dialect( + Resource(data=paths[0], fileDialect=cast("FileDialect", file_dialect)) + ) + if inferred: + file_dialect = inferred + + scan_options = _get_scan_options(file_dialect) + + tables: list[Table] = [] + for path in paths: + table = pl.scan_csv(path, **scan_options) # type: ignore[arg-type] + tables.append(table) + + result = pl.concat(tables) + + has_header = scan_options.get("has_header", True) + column_names: list[str] | None = getattr(file_dialect, "columnNames", None) + if not has_header and not column_names: + result = result.rename( + { + name: name.replace("column_", "column") + for name in result.collect_schema().names() + } + ) + + header_rows = get_header_rows(file_dialect) # type: ignore[arg-type] + if len(header_rows) >= 2: + result = join_header_rows(result, file_dialect) # type: ignore[arg-type] + if getattr(file_dialect, "commentRows", None): + result = skip_comment_rows(result, file_dialect) # type: ignore[arg-type] + + if not options.get("denormalized"): + table_schema = resolve_table_schema(resource.tableSchema) + if not table_schema: + table_schema = infer_table_schema_from_table(result, **options) + result = normalize_table(result, table_schema) + + return result + + +def _get_scan_options( + file_dialect: CsvFileDialect + | TsvFileDialect + | FileDialect + | None, +) -> dict[str, object]: + header_rows = get_header_rows(file_dialect) # type: ignore[arg-type] + + options: dict[str, object] = { + "infer_schema_length": 0, + "truncate_ragged_lines": True, + } + + options["skip_rows"] = header_rows[0] - 1 if header_rows else 0 + options["has_header"] = len(header_rows) > 0 + options["eol_char"] = getattr(file_dialect, "lineTerminator", None) or "\n" + + is_csv = getattr(file_dialect, "format", "csv") == "csv" + if is_csv: + options["separator"] = getattr(file_dialect, "delimiter", None) or "," + options["quote_char"] = getattr(file_dialect, "quoteChar", None) or '"' + else: + options["separator"] = "\t" + options["quote_char"] = None + + null_sequence = getattr(file_dialect, "nullSequence", None) + if null_sequence is not None: + options["null_values"] = null_sequence + + comment_prefix = getattr(file_dialect, "commentPrefix", None) + if comment_prefix is not None: + options["comment_prefix"] = comment_prefix + + column_names: list[str] | None = getattr(file_dialect, "columnNames", None) + if column_names: + options["schema"] = {name: pl.String for name in column_names} + options["has_header"] = False + + return options + + +def _dialect_has_only_format(dialect: FileDialect) -> bool: + keys = { + k + for k in type(dialect).model_fields + if getattr(dialect, k, None) is not None + } + meaningful = keys - {"format", "type", "title", "description"} + return len(meaningful) == 0 diff --git a/table/fairspec_table/plugins/csv/actions/table/load_spec.py b/table/fairspec_table/plugins/csv/actions/table/load_spec.py new file mode 100644 index 0000000..6c1a096 --- /dev/null +++ b/table/fairspec_table/plugins/csv/actions/table/load_spec.py @@ -0,0 +1,289 @@ +from __future__ import annotations + +import polars as pl +from fairspec_dataset import write_temp_file +from fairspec_metadata import CsvFileDialect, Resource, TsvFileDialect + +from .load import load_csv_table + + +class TestLoadCsvTable: + def test_should_load_local_file(self): + path = write_temp_file("id,name\n1,english\n2,中文") + + table = load_csv_table(Resource(data=path, fileDialect=CsvFileDialect())) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + def test_should_load_local_file_multipart(self): + path1 = write_temp_file("id,name\n1,english") + path2 = write_temp_file("id,name\n2,中文\n3,german") + + table = load_csv_table( + Resource(data=[path1, path2], fileDialect=CsvFileDialect()) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + {"id": 3, "name": "german"}, + ] + + def test_should_load_remote_file_with_preview_bytes(self): + table = load_csv_table( + Resource( + data="https://raw.githubusercontent.com/fairspec/fairspec-typescript/refs/heads/main/table/plugins/csv/actions/table/fixtures/table.csv", + ), + previewBytes=18, + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + ] + + def test_should_handle_custom_delimiter(self): + path = write_temp_file("id|name\n1|alice\n2|bob") + + table = load_csv_table( + Resource(data=path, fileDialect=CsvFileDialect(delimiter="|")) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "alice"}, + {"id": 2, "name": "bob"}, + ] + + def test_should_handle_files_without_header(self): + path = write_temp_file("1,alice\n2,bob") + + table = load_csv_table( + Resource(data=path, fileDialect=CsvFileDialect(headerRows=False)) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"column1": 1, "column2": "alice"}, + {"column1": 2, "column2": "bob"}, + ] + + def test_should_handle_files_without_header_using_column_names(self): + path = write_temp_file("1,alice\n2,bob") + + table = load_csv_table( + Resource( + data=path, + fileDialect=CsvFileDialect( + headerRows=False, columnNames=["id", "name"] + ), + ) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "alice"}, + {"id": 2, "name": "bob"}, + ] + + def test_should_infer_header_rows_when_partial_dialect(self): + path = write_temp_file("1,100\n2,200\n3,300") + + table = load_csv_table(Resource(data=path, fileDialect=CsvFileDialect())) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"column1": 1, "column2": 100}, + {"column1": 2, "column2": 200}, + {"column1": 3, "column2": 300}, + ] + + def test_should_handle_custom_line_terminator(self): + path = write_temp_file("id,name|1,alice|2,bob") + + table = load_csv_table( + Resource( + data=path, + fileDialect=CsvFileDialect(lineTerminator="|"), + ) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "alice"}, + {"id": 2, "name": "bob"}, + ] + + def test_should_handle_custom_quote_character(self): + path = write_temp_file("id,name\n1,'alice smith'\n2,'bob jones'") + + table = load_csv_table( + Resource( + data=path, + fileDialect=CsvFileDialect(quoteChar="'"), + ) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "alice smith"}, + {"id": 2, "name": "bob jones"}, + ] + + def test_should_handle_comment_character(self): + path = write_temp_file( + "# This is a comment\nid,name\n1,alice\n# Another comment\n2,bob" + ) + + table = load_csv_table( + Resource( + data=path, + fileDialect=CsvFileDialect(commentPrefix="#"), + ) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "alice"}, + {"id": 2, "name": "bob"}, + ] + + def test_should_support_header_rows(self): + path = write_temp_file("#comment\nid,name\n1,alice\n2,bob") + + table = load_csv_table( + Resource( + data=path, + fileDialect=CsvFileDialect(headerRows=[2]), + ) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "alice"}, + {"id": 2, "name": "bob"}, + ] + + def test_should_support_header_join(self): + path = write_temp_file("#comment\nid,name\nint,str\n1,alice\n2,bob") + + table = load_csv_table( + Resource( + data=path, + fileDialect=CsvFileDialect(headerRows=[2, 3], headerJoin="_"), + ) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id_int": 1, "name_str": "alice"}, + {"id_int": 2, "name_str": "bob"}, + ] + + def test_should_support_comment_rows(self): + path = write_temp_file("id,name\n1,alice\ncomment\n2,bob") + + table = load_csv_table( + Resource( + data=path, + fileDialect=CsvFileDialect(commentRows=[3]), + ) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "alice"}, + {"id": 2, "name": "bob"}, + ] + + def test_should_support_header_rows_and_comment_rows(self): + path = write_temp_file("#comment\nid,name\n1,alice\n#comment\n2,bob") + + table = load_csv_table( + Resource( + data=path, + fileDialect=CsvFileDialect(headerRows=[2], commentRows=[4]), + ) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "alice"}, + {"id": 2, "name": "bob"}, + ] + + def test_should_support_header_join_and_comment_rows(self): + path = write_temp_file("#comment\nid,name\nint,str\n1,alice\n#comment\n2,bob") + + table = load_csv_table( + Resource( + data=path, + fileDialect=CsvFileDialect( + headerRows=[2, 3], headerJoin="_", commentRows=[5] + ), + ) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id_int": 1, "name_str": "alice"}, + {"id_int": 2, "name_str": "bob"}, + ] + + def test_should_handle_null_sequence(self): + path = write_temp_file("id,name,age\n1,alice,25\n2,N/A,30\n3,bob,N/A") + + table = load_csv_table( + Resource( + data=path, + fileDialect=CsvFileDialect(nullSequence="N/A"), + ) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "alice", "age": 25}, + {"id": 2, "name": None, "age": 30}, + {"id": 3, "name": "bob", "age": None}, + ] + + def test_should_handle_multiple_format_options_together(self): + path = write_temp_file( + "#comment\nid|'full name'|age\n1|'alice smith'|25\n2|'bob jones'|30" + ) + + table = load_csv_table( + Resource( + data=path, + fileDialect=CsvFileDialect( + delimiter="|", + quoteChar="'", + commentPrefix="#", + headerRows=[1], + ), + ) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "full name": "alice smith", "age": 25}, + {"id": 2, "full name": "bob jones", "age": 30}, + ] + + +class TestLoadCsvTableTsv: + def test_should_load_local_file(self): + path = write_temp_file("id\tname\n1\tenglish\n2\t中文") + + table = load_csv_table(Resource(data=path, fileDialect=TsvFileDialect())) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] diff --git a/table/fairspec_table/plugins/csv/actions/table/save.py b/table/fairspec_table/plugins/csv/actions/table/save.py new file mode 100644 index 0000000..e202cfd --- /dev/null +++ b/table/fairspec_table/plugins/csv/actions/table/save.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack, cast + +import polars as pl + +from fairspec_dataset import assert_local_path_vacant +from fairspec_metadata import Resource, TableSchema, get_supported_file_dialect + +from fairspec_table.actions.table.denormalize import denormalize_table +from fairspec_table.actions.table_schema.infer import infer_table_schema_from_table +from fairspec_table.plugins.csv.settings import NATIVE_TYPES + +if TYPE_CHECKING: + from fairspec_table.models.table import SaveTableOptions, Table + + +def save_csv_table(table: Table, **options: Unpack[SaveTableOptions]) -> str: + path = options["path"] + + if not options.get("overwrite"): + assert_local_path_vacant(path) + + resource = Resource(data=path, fileDialect=options.get("fileDialect")) + file_dialect = get_supported_file_dialect(resource, ["csv", "tsv"]) + if not file_dialect: + raise Exception("Saving options is not compatible") + + header_rows_value = getattr(file_dialect, "headerRows", None) + if isinstance(file_dialect, dict): + header_rows_value = file_dialect.get("headerRows") + if header_rows_value is None: + header_rows = [1] + elif header_rows_value is False: + header_rows = [] + else: + header_rows = header_rows_value + + table_schema = options.get("tableSchema") + if not isinstance(table_schema, TableSchema): + table_schema = infer_table_schema_from_table( + table, **options, keepStrings=True + ) + + table = denormalize_table(table, table_schema, nativeTypes=NATIVE_TYPES) + + is_csv = getattr(file_dialect, "format", "csv") == "csv" + + sink_options: dict[str, object] = { + "include_header": len(header_rows) > 0, + "line_terminator": getattr(file_dialect, "lineTerminator", None) or "\n", + } + + if is_csv: + sink_options["separator"] = getattr(file_dialect, "delimiter", None) or "," + sink_options["quote_char"] = getattr(file_dialect, "quoteChar", None) or '"' + else: + sink_options["separator"] = "\t" + sink_options["quote_char"] = '"' + + frame = cast("pl.DataFrame", table.collect()) + frame.write_csv(path, **sink_options) # type: ignore[arg-type] + + return path diff --git a/table/fairspec_table/plugins/csv/actions/table/save_spec.py b/table/fairspec_table/plugins/csv/actions/table/save_spec.py new file mode 100644 index 0000000..67860a5 --- /dev/null +++ b/table/fairspec_table/plugins/csv/actions/table/save_spec.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +import polars as pl +from fairspec_dataset import get_temp_file_path +from fairspec_metadata import CsvFileDialect, Resource, TsvFileDialect + +from .load import load_csv_table +from .save import save_csv_table + +ROW1 = {"id": 1.0, "name": "Alice"} +ROW2 = {"id": 2.0, "name": "Bob"} +ROW3 = {"id": 3.0, "name": "Charlie"} +TABLE = pl.DataFrame([ROW1, ROW2, ROW3]).lazy() + + +class TestSaveCsvTable: + def test_should_save_table_to_file(self): + path = get_temp_file_path() + + save_csv_table(TABLE, path=path, fileDialect=CsvFileDialect()) + + with open(path, encoding="utf-8") as f: + content = f.read() + assert content == "id,name\n1.0,Alice\n2.0,Bob\n3.0,Charlie\n" + + def test_should_save_with_custom_delimiter(self): + path = get_temp_file_path() + + save_csv_table( + TABLE, + path=path, + fileDialect=CsvFileDialect(delimiter=";"), + ) + + with open(path, encoding="utf-8") as f: + content = f.read() + assert content == "id;name\n1.0;Alice\n2.0;Bob\n3.0;Charlie\n" + + def test_should_save_without_header(self): + path = get_temp_file_path() + + save_csv_table( + TABLE, + path=path, + fileDialect=CsvFileDialect(headerRows=False), + ) + + with open(path, encoding="utf-8") as f: + content = f.read() + assert content == "1.0,Alice\n2.0,Bob\n3.0,Charlie\n" + + def test_should_save_with_custom_quote_char(self): + path = get_temp_file_path() + + table = pl.DataFrame( + { + "id": [1.0, 2.0, 3.0], + "name": ["Alice,Smith", "Bob,Jones", "Charlie,Brown"], + } + ).lazy() + + save_csv_table( + table, + path=path, + fileDialect=CsvFileDialect(quoteChar="'"), + ) + + with open(path, encoding="utf-8") as f: + content = f.read() + assert content == ( + "id,name\n1.0,'Alice,Smith'\n2.0,'Bob,Jones'\n3.0,'Charlie,Brown'\n" + ) + + def test_should_save_and_load_various_data_types(self): + path = get_temp_file_path(format="csv") + + source = pl.DataFrame( + [ + pl.Series("boolean", [True], dtype=pl.Boolean), + pl.Series("date", ["2025-01-01"], dtype=pl.String), + pl.Series("datetime", ["2025-01-01T00:00:00"], dtype=pl.String), + pl.Series("integer", [1], dtype=pl.Int32), + pl.Series("number", [1.1], dtype=pl.Float64), + pl.Series("string", ["string"], dtype=pl.String), + ] + ).lazy() + + save_csv_table(source, path=path) + + target = load_csv_table(Resource(data=path), denormalized=True) + frame: pl.DataFrame = target.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + { + "boolean": "true", + "date": "2025-01-01", + "datetime": "2025-01-01T00:00:00", + "integer": "1", + "number": "1.1", + "string": "string", + }, + ] + + +class TestSaveCsvTableTsv: + def test_should_save_table_to_file(self): + path = get_temp_file_path() + + save_csv_table(TABLE, path=path, fileDialect=TsvFileDialect()) + + with open(path, encoding="utf-8") as f: + content = f.read() + assert content == "id\tname\n1.0\tAlice\n2.0\tBob\n3.0\tCharlie\n" diff --git a/table/fairspec_table/plugins/csv/plugin.py b/table/fairspec_table/plugins/csv/plugin.py new file mode 100644 index 0000000..4a59525 --- /dev/null +++ b/table/fairspec_table/plugins/csv/plugin.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack, cast + +from fairspec_metadata import Resource, get_supported_file_dialect +from fairspec_metadata.models.file_dialect.file_dialect import FileDialect + +from fairspec_table.plugin import TablePlugin + +from .actions.file_dialect.infer import infer_csv_file_dialect +from .actions.table.load import load_csv_table +from .actions.table.save import save_csv_table + +if TYPE_CHECKING: + from fairspec_dataset.models.file_dialect import InferFileDialectOptions + + from fairspec_table.models.table import LoadTableOptions, SaveTableOptions, Table + + +class CsvPlugin(TablePlugin): + def load_table( + self, + resource: Resource, + **options: Unpack[LoadTableOptions], + ) -> Table | None: + file_dialect = get_supported_file_dialect(resource, ["csv", "tsv"]) + if not file_dialect: + return None + + return load_csv_table( + resource.model_copy(update={"fileDialect": file_dialect}), **options + ) + + def save_table(self, table: Table, **options: Unpack[SaveTableOptions]) -> str | None: + resource = Resource( + data=options["path"], fileDialect=cast(FileDialect | None, options.get("fileDialect")) + ) + file_dialect = get_supported_file_dialect(resource, ["csv", "tsv"]) + if not file_dialect: + return None + return save_csv_table(table, **options) + + def infer_file_dialect( + self, + resource: Resource, + **options: Unpack[InferFileDialectOptions], + ) -> FileDialect | None: + file_dialect = get_supported_file_dialect(resource, ["csv", "tsv"]) + if not file_dialect: + return None + return infer_csv_file_dialect(resource) diff --git a/table/fairspec_table/plugins/csv/plugin_spec.py b/table/fairspec_table/plugins/csv/plugin_spec.py new file mode 100644 index 0000000..2cc30cb --- /dev/null +++ b/table/fairspec_table/plugins/csv/plugin_spec.py @@ -0,0 +1,152 @@ +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import polars as pl +from fairspec_metadata import CsvFileDialect, Resource, TsvFileDialect + +from .plugin import CsvPlugin + + +class TestCsvPluginLoadTable: + def setup_method(self): + self.plugin = CsvPlugin() + + @patch("fairspec_table.plugins.csv.plugin.load_csv_table") + def test_should_load_table_from_csv_file(self, mock_load: MagicMock): + resource = Resource(data="test.csv") + mock_table = pl.DataFrame().lazy() + mock_load.return_value = mock_table + + result = self.plugin.load_table(resource) + + assert result is mock_table + + @patch("fairspec_table.plugins.csv.plugin.load_csv_table") + def test_should_load_table_from_tsv_file(self, mock_load: MagicMock): + resource = Resource(data="test.tsv") + mock_table = pl.DataFrame().lazy() + mock_load.return_value = mock_table + + result = self.plugin.load_table(resource) + + assert result is mock_table + + @patch("fairspec_table.plugins.csv.plugin.load_csv_table") + def test_should_return_none_for_non_csv_files(self, mock_load: MagicMock): + resource = Resource(data="test.json") + + result = self.plugin.load_table(resource) + + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_table.plugins.csv.plugin.load_csv_table") + def test_should_handle_explicit_csv_format(self, mock_load: MagicMock): + resource = Resource(data="test.txt", fileDialect=CsvFileDialect()) + mock_table = pl.DataFrame().lazy() + mock_load.return_value = mock_table + + result = self.plugin.load_table(resource) + + assert result is mock_table + + @patch("fairspec_table.plugins.csv.plugin.load_csv_table") + def test_should_pass_through_load_options(self, mock_load: MagicMock): + resource = Resource(data="test.csv") + mock_load.return_value = pl.DataFrame().lazy() + + self.plugin.load_table(resource, denormalized=True) + + mock_load.assert_called_once() + + @patch("fairspec_table.plugins.csv.plugin.load_csv_table") + def test_should_handle_paths_with_directories(self, mock_load: MagicMock): + resource = Resource(data="/path/to/data.csv") + mock_load.return_value = pl.DataFrame().lazy() + + self.plugin.load_table(resource) + + mock_load.assert_called_once() + + @patch("fairspec_table.plugins.csv.plugin.load_csv_table") + def test_should_handle_explicit_tsv_format(self, mock_load: MagicMock): + resource = Resource(data="test.txt", fileDialect=TsvFileDialect()) + mock_table = pl.DataFrame().lazy() + mock_load.return_value = mock_table + + result = self.plugin.load_table(resource) + + assert result is mock_table + + +class TestCsvPluginSaveTable: + def setup_method(self): + self.plugin = CsvPlugin() + + @patch("fairspec_table.plugins.csv.plugin.save_csv_table") + def test_should_save_table_to_csv_file(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "output.csv" + + result = self.plugin.save_table(table, path="output.csv") + + mock_save.assert_called_once_with(table, path="output.csv") + assert result == "output.csv" + + @patch("fairspec_table.plugins.csv.plugin.save_csv_table") + def test_should_save_table_to_tsv_file(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "output.tsv" + + result = self.plugin.save_table(table, path="output.tsv") + + mock_save.assert_called_once_with(table, path="output.tsv") + assert result == "output.tsv" + + @patch("fairspec_table.plugins.csv.plugin.save_csv_table") + def test_should_return_none_for_non_csv_files(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + + result = self.plugin.save_table(table, path="output.json") + + mock_save.assert_not_called() + assert result is None + + @patch("fairspec_table.plugins.csv.plugin.save_csv_table") + def test_should_handle_explicit_csv_format(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "output.txt" + + result = self.plugin.save_table(table, path="output.txt", fileDialect=CsvFileDialect()) + + mock_save.assert_called_once_with(table, path="output.txt", fileDialect=CsvFileDialect()) + assert result == "output.txt" + + @patch("fairspec_table.plugins.csv.plugin.save_csv_table") + def test_should_handle_paths_with_directories(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "/path/to/output.csv" + + self.plugin.save_table(table, path="/path/to/output.csv") + + mock_save.assert_called_once_with(table, path="/path/to/output.csv") + + @patch("fairspec_table.plugins.csv.plugin.save_csv_table") + def test_should_return_none_for_files_without_extension(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + + result = self.plugin.save_table(table, path="output") + + mock_save.assert_not_called() + assert result is None + + @patch("fairspec_table.plugins.csv.plugin.save_csv_table") + def test_should_handle_explicit_tsv_format(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "output.txt" + + result = self.plugin.save_table(table, path="output.txt", fileDialect=TsvFileDialect()) + + mock_save.assert_called_once_with(table, path="output.txt", fileDialect=TsvFileDialect()) + assert result == "output.txt" diff --git a/table/fairspec_table/plugins/csv/settings.py b/table/fairspec_table/plugins/csv/settings.py new file mode 100644 index 0000000..73e9eef --- /dev/null +++ b/table/fairspec_table/plugins/csv/settings.py @@ -0,0 +1 @@ +NATIVE_TYPES = ["string"] diff --git a/table/fairspec_table/plugins/inline/__init__.py b/table/fairspec_table/plugins/inline/__init__.py new file mode 100644 index 0000000..349881b --- /dev/null +++ b/table/fairspec_table/plugins/inline/__init__.py @@ -0,0 +1,7 @@ +from .actions.table.load import load_inline_table +from .plugin import InlinePlugin + +__all__ = [ + "InlinePlugin", + "load_inline_table", +] diff --git a/table/fairspec_table/plugins/inline/actions/__init__.py b/table/fairspec_table/plugins/inline/actions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/inline/actions/table/__init__.py b/table/fairspec_table/plugins/inline/actions/table/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/inline/actions/table/load.py b/table/fairspec_table/plugins/inline/actions/table/load.py new file mode 100644 index 0000000..b37039a --- /dev/null +++ b/table/fairspec_table/plugins/inline/actions/table/load.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack + +import polars as pl +from fairspec_metadata import get_data_records, resolve_table_schema +from fairspec_metadata import Resource + +from fairspec_table.actions.table.normalize import normalize_table +from fairspec_table.actions.table_schema.infer import infer_table_schema_from_table + +if TYPE_CHECKING: + from fairspec_table.models.table import LoadTableOptions, Table + + +def load_inline_table( + resource: Resource, **options: Unpack[LoadTableOptions] +) -> Table: + data_records = get_data_records(resource) + if not data_records: + raise Exception("Resource data is not defined or tabular") + + table = pl.DataFrame(data_records).lazy() + + if not options.get("denormalized"): + table_schema = resolve_table_schema(resource.tableSchema) + if not table_schema: + table_schema = infer_table_schema_from_table(table, **options) + table = normalize_table(table, table_schema) + + return table diff --git a/table/fairspec_table/plugins/inline/actions/table/load_spec.py b/table/fairspec_table/plugins/inline/actions/table/load_spec.py new file mode 100644 index 0000000..2c485de --- /dev/null +++ b/table/fairspec_table/plugins/inline/actions/table/load_spec.py @@ -0,0 +1,149 @@ +from __future__ import annotations + +from datetime import datetime + +import polars as pl +import pytest +from fairspec_metadata import IntegerColumnProperty +from fairspec_metadata import StringColumnProperty +from fairspec_metadata import Resource +from fairspec_metadata import TableSchema + +from .load import load_inline_table + + +class TestLoadInlineTable: + def test_should_raise_on_no_data(self): + resource = Resource(name="test") + + with pytest.raises(Exception, match="Resource data is not defined or tabular"): + load_inline_table(resource) + + def test_should_raise_on_bad_data(self): + resource = Resource(name="test", data="bad") + + with pytest.raises(Exception, match="Resource data is not defined or tabular"): + load_inline_table(resource) + + def test_should_read_table_data(self): + resource = Resource( + name="test", + data=[ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ], + ) + + table = load_inline_table(resource) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + def test_should_handle_longer_rows(self): + resource = Resource( + data=[ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文", "extra": "bad"}, + ], + tableSchema=TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + }, + ), + ) + + table = load_inline_table(resource) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + def test_should_handle_shorter_rows(self): + resource = Resource( + name="test", + data=[{"id": 1, "name": "english"}, {"id": 2}], + tableSchema=TableSchema( + properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + }, + ), + ) + + table = load_inline_table(resource) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": None}, + ] + + def test_should_handle_various_data_types(self): + resource = Resource( + data=[ + { + "string": "string", + "number": 1, + "boolean": True, + "date": datetime(2025, 1, 1), + "time": datetime(2025, 1, 1), + "datetime": datetime(2025, 1, 1), + }, + ], + ) + + table = load_inline_table(resource) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + + assert frame.to_dicts() == [ + { + "string": "string", + "number": 1, + "boolean": True, + "date": datetime(2025, 1, 1), + "time": datetime(2025, 1, 1), + "datetime": datetime(2025, 1, 1), + }, + ] + + def test_should_handle_objects_with_shorter_rows(self): + resource = Resource( + data=[ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + {"id": 3}, + ], + ) + + table = load_inline_table(resource) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + {"id": 3, "name": None}, + ] + + def test_should_handle_objects_with_longer_rows(self): + resource = Resource( + data=[ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + {"id": 3, "name": "german", "extra": "extra"}, + ], + ) + + table = load_inline_table(resource) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278 + + assert frame.to_dicts() == [ + {"id": 1, "name": "english", "extra": None}, + {"id": 2, "name": "中文", "extra": None}, + {"id": 3, "name": "german", "extra": "extra"}, + ] diff --git a/table/fairspec_table/plugins/inline/plugin.py b/table/fairspec_table/plugins/inline/plugin.py new file mode 100644 index 0000000..16eb752 --- /dev/null +++ b/table/fairspec_table/plugins/inline/plugin.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack + +from fairspec_metadata import get_data_records + +from fairspec_table.plugin import TablePlugin + +from .actions.table.load import load_inline_table + +if TYPE_CHECKING: + from fairspec_metadata import Resource + + from fairspec_table.models.table import LoadTableOptions, Table + + +class InlinePlugin(TablePlugin): + def load_table( + self, + resource: Resource, + **options: Unpack[LoadTableOptions], + ) -> Table | None: + records = get_data_records(resource) + if not records: + return None + return load_inline_table(resource, **options) diff --git a/table/fairspec_table/plugins/json/__init__.py b/table/fairspec_table/plugins/json/__init__.py new file mode 100644 index 0000000..6520341 --- /dev/null +++ b/table/fairspec_table/plugins/json/__init__.py @@ -0,0 +1,11 @@ +from .actions.file_dialect.infer import infer_json_file_dialect +from .actions.table.load import load_json_table +from .actions.table.save import save_json_table +from .plugin import JsonPlugin + +__all__ = [ + "JsonPlugin", + "infer_json_file_dialect", + "load_json_table", + "save_json_table", +] diff --git a/table/fairspec_table/plugins/json/actions/__init__.py b/table/fairspec_table/plugins/json/actions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/json/actions/buffer/__init__.py b/table/fairspec_table/plugins/json/actions/buffer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/json/actions/buffer/decode.py b/table/fairspec_table/plugins/json/actions/buffer/decode.py new file mode 100644 index 0000000..a96c38b --- /dev/null +++ b/table/fairspec_table/plugins/json/actions/buffer/decode.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +import json + + +def decode_json_buffer(data: bytes, *, is_lines: bool) -> object: + text = data.decode("utf-8") + if is_lines: + return [json.loads(line) for line in text.split("\n") if line.strip()] + return json.loads(text) diff --git a/table/fairspec_table/plugins/json/actions/buffer/encode.py b/table/fairspec_table/plugins/json/actions/buffer/encode.py new file mode 100644 index 0000000..0d525ca --- /dev/null +++ b/table/fairspec_table/plugins/json/actions/buffer/encode.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +import json + + +def encode_json_buffer(data: object, *, is_lines: bool) -> bytes: + if is_lines: + assert isinstance(data, list) + text = "\n".join(json.dumps(item, ensure_ascii=False) for item in data) + else: + text = json.dumps(data, indent=2, ensure_ascii=False) + return text.encode("utf-8") diff --git a/table/fairspec_table/plugins/json/actions/file_dialect/__init__.py b/table/fairspec_table/plugins/json/actions/file_dialect/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/json/actions/file_dialect/infer.py b/table/fairspec_table/plugins/json/actions/file_dialect/infer.py new file mode 100644 index 0000000..0a03c89 --- /dev/null +++ b/table/fairspec_table/plugins/json/actions/file_dialect/infer.py @@ -0,0 +1,130 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, cast + +from fairspec_dataset import load_file, load_file_stream +from fairspec_metadata import get_data_first_path, get_supported_file_dialect +from fairspec_metadata.models.file_dialect.common import RowType +from fairspec_metadata.models.file_dialect.json import JsonFileDialect +from fairspec_metadata.models.file_dialect.jsonl import JsonlFileDialect + +from fairspec_table.plugins.json.actions.buffer.decode import decode_json_buffer +from fairspec_table.utils.sniffer.sniffer import Sniffer + +if TYPE_CHECKING: + from fairspec_metadata import Resource + + from fairspec_table.models.table import LoadTableOptions + + +def infer_json_file_dialect( + resource: Resource, + options: LoadTableOptions | None = None, +) -> JsonFileDialect | JsonlFileDialect | None: + data_path = get_data_first_path(resource) + if not data_path: + return None + + dialect = get_supported_file_dialect(resource, ["json", "jsonl"]) + if not dialect: + return None + + format: str = getattr(dialect, "format", None) or ( + dialect.get("format") if isinstance(dialect, dict) else None + ) # type: ignore[union-attr] + + try: + if format == "json": + json_buffer = load_file(data_path) + else: + stream = load_file_stream(data_path, max_bytes=10000) + json_buffer = stream.read() + except Exception: + if format == "json": + return JsonFileDialect() + return JsonlFileDialect() + + try: + parsed = decode_json_buffer(json_buffer, is_lines=format == "jsonl") + except Exception: + if format == "json": + return JsonFileDialect() + return JsonlFileDialect() + + json_pointer: str | None = None + row_type: RowType | None = None + header_rows: list[int] | bool | None = None + + data = parsed + if not isinstance(data, list): + if isinstance(data, dict): + for key, value in data.items(): + if isinstance(value, list): + if format == "json": + json_pointer = str(key) + data = value + break + + if not isinstance(data, list) or len(data) == 0: + if format == "json": + return _build_json_dialect(json_pointer, row_type, header_rows) + return _build_jsonl_dialect(row_type, header_rows) + + first_element = data[0] + + if isinstance(first_element, list): + row_type = RowType.array + elif isinstance(first_element, dict): + row_type = RowType.object + else: + if format == "json": + return _build_json_dialect(json_pointer, row_type, header_rows) + return _build_jsonl_dialect(row_type, header_rows) + + if row_type == RowType.array: + sample_rows = 100 + rows = cast("list[list[object]]", data[:sample_rows]) + + sniffer = Sniffer() + try: + detection = sniffer.sniff_rows(rows) + except Exception: + if format == "json": + return _build_json_dialect(json_pointer, row_type, header_rows) + return _build_jsonl_dialect(row_type, header_rows) + + if detection.dialect.header.has_header_row: + header_rows = [detection.dialect.header.num_preamble_rows + 1] + elif detection.num_fields > 0: + header_rows = False + + if format == "json": + return _build_json_dialect(json_pointer, row_type, header_rows) + return _build_jsonl_dialect(row_type, header_rows) + + +def _build_json_dialect( + json_pointer: str | None, + row_type: RowType | None, + header_rows: list[int] | bool | None, +) -> JsonFileDialect: + kwargs: dict[str, object] = {} + if json_pointer is not None: + kwargs["jsonPointer"] = json_pointer + if row_type is not None: + kwargs["rowType"] = row_type + if header_rows is not None: + kwargs["headerRows"] = header_rows + return JsonFileDialect(**kwargs) # type: ignore[arg-type] + + +def _build_jsonl_dialect( + row_type: RowType | None, + header_rows: list[int] | bool | None, +) -> JsonlFileDialect: + kwargs: dict[str, object] = {} + if row_type is not None: + kwargs["rowType"] = row_type + if header_rows is not None: + kwargs["headerRows"] = header_rows + return JsonlFileDialect(**kwargs) # type: ignore[arg-type] diff --git a/table/fairspec_table/plugins/json/actions/file_dialect/infer_spec.py b/table/fairspec_table/plugins/json/actions/file_dialect/infer_spec.py new file mode 100644 index 0000000..8696eae --- /dev/null +++ b/table/fairspec_table/plugins/json/actions/file_dialect/infer_spec.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +from fairspec_dataset import write_temp_file +from fairspec_metadata import ( + CsvFileDialect, + JsonFileDialect, + JsonlFileDialect, + Resource, +) +from fairspec_metadata.models.file_dialect.common import RowType + +from .infer import infer_json_file_dialect + + +class TestInferJsonFileDialectJsonArrayOfObjects: + def test_should_detect_row_type_as_object_without_header_rows(self): + body = '[{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]' + path = write_temp_file(body) + + resource = Resource(data=path, fileDialect=JsonFileDialect()) + + result = infer_json_file_dialect(resource) + + assert result == JsonFileDialect(rowType=RowType.object) + + +class TestInferJsonFileDialectJsonArrayOfArrays: + def test_should_detect_headers_when_first_row_is_header(self): + body = '[["id", "name"], [1, "Alice"], [2, "Bob"]]' + path = write_temp_file(body) + + resource = Resource(data=path, fileDialect=JsonFileDialect()) + + result = infer_json_file_dialect(resource) + + assert result == JsonFileDialect(rowType=RowType.array, headerRows=[1]) + + def test_should_detect_no_headers_when_data_rows_only(self): + body = "[[1, 2, 3], [4, 5, 6], [7, 8, 9]]" + path = write_temp_file(body) + + resource = Resource(data=path, fileDialect=JsonFileDialect()) + + result = infer_json_file_dialect(resource) + + assert result == JsonFileDialect(rowType=RowType.array, headerRows=False) + + +class TestInferJsonFileDialectNestedJson: + def test_should_detect_json_pointer_for_nested_data_structure(self): + body = '{"metadata": {"version": "1.0"}, "data": [["id", "name"], [1, "Alice"], [2, "Bob"]]}' + path = write_temp_file(body) + + resource = Resource(data=path, fileDialect=JsonFileDialect()) + + result = infer_json_file_dialect(resource) + + assert isinstance(result, JsonFileDialect) + assert result.jsonPointer == "data" + assert result.rowType == RowType.array + + def test_should_detect_json_pointer_with_array_of_objects(self): + body = '{"metadata": {"version": "1.0"}, "records": [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]}' + path = write_temp_file(body) + + resource = Resource(data=path, fileDialect=JsonFileDialect()) + + result = infer_json_file_dialect(resource) + + assert result == JsonFileDialect(jsonPointer="records", rowType=RowType.object) + + +class TestInferJsonFileDialectJsonl: + def test_should_detect_row_type_for_array_jsonl(self): + body = '[1, "Alice", 30]\n[2, "Bob", 25]\n[3, "Charlie", 35]' + path = write_temp_file(body) + + resource = Resource(data=path, fileDialect=JsonlFileDialect()) + + result = infer_json_file_dialect(resource) + + assert result == JsonlFileDialect(rowType=RowType.array, headerRows=False) + + def test_should_detect_row_type_for_object_jsonl(self): + body = '{"id": 1, "name": "Alice"}\n{"id": 2, "name": "Bob"}\n{"id": 3, "name": "Charlie"}' + path = write_temp_file(body) + + resource = Resource(data=path, fileDialect=JsonlFileDialect()) + + result = infer_json_file_dialect(resource) + + assert result == JsonlFileDialect(rowType=RowType.object) + + +class TestInferJsonFileDialectEdgeCases: + def test_should_return_format_only_for_empty_array(self): + body = "[]" + path = write_temp_file(body) + + resource = Resource(data=path, fileDialect=JsonFileDialect()) + + result = infer_json_file_dialect(resource) + + assert result == JsonFileDialect() + + def test_should_return_format_only_for_invalid_json(self): + body = "{this is not valid json" + path = write_temp_file(body) + + resource = Resource(data=path, fileDialect=JsonFileDialect()) + + result = infer_json_file_dialect(resource) + + assert result == JsonFileDialect() + + def test_should_return_none_for_inline_data(self): + resource = Resource( + data=[ + {"id": 1, "name": "Alice"}, + {"id": 2, "name": "Bob"}, + ], + fileDialect=JsonFileDialect(), + ) + + result = infer_json_file_dialect(resource) + + assert result is None + + def test_should_return_none_for_unsupported_format(self): + body = "id,name\n1,Alice\n2,Bob" + path = write_temp_file(body) + + resource = Resource(data=path, fileDialect=CsvFileDialect()) + + result = infer_json_file_dialect(resource) + + assert result is None + + def test_should_return_format_only_for_single_row_array(self): + body = "[[1, 2, 3]]" + path = write_temp_file(body) + + resource = Resource(data=path, fileDialect=JsonFileDialect()) + + result = infer_json_file_dialect(resource) + + assert result == JsonFileDialect(rowType=RowType.array, headerRows=False) + + def test_should_handle_non_file_path_errors_gracefully(self): + resource = Resource( + data="/nonexistent/path/to/file.json", + fileDialect=JsonFileDialect(), + ) + + result = infer_json_file_dialect(resource) + + assert result == JsonFileDialect() diff --git a/table/fairspec_table/plugins/json/actions/table/__init__.py b/table/fairspec_table/plugins/json/actions/table/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/json/actions/table/load.py b/table/fairspec_table/plugins/json/actions/table/load.py new file mode 100644 index 0000000..e48752b --- /dev/null +++ b/table/fairspec_table/plugins/json/actions/table/load.py @@ -0,0 +1,115 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack, cast + +import polars as pl + +from fairspec_dataset import load_file, prefetch_files +from fairspec_metadata import Resource, get_supported_file_dialect, resolve_table_schema + +from fairspec_table.actions.table.normalize import normalize_table +from fairspec_table.actions.table_schema.infer import infer_table_schema_from_table +from fairspec_table.plugins.json.actions.buffer.decode import decode_json_buffer +from fairspec_table.plugins.json.actions.file_dialect.infer import ( + infer_json_file_dialect, +) + +if TYPE_CHECKING: + from fairspec_metadata import JsonFileDialect, JsonlFileDialect + from fairspec_metadata.models.file_dialect.file_dialect import FileDialect + + from fairspec_table.models.table import LoadTableOptions, Table + + +def load_json_table( + resource: Resource, **options: Unpack[LoadTableOptions] +) -> Table: + file_dialect = get_supported_file_dialect(resource, ["json", "jsonl"]) + if not file_dialect: + raise Exception("Resource data is not compatible") + + is_lines = getattr(file_dialect, "format", None) == "jsonl" + max_bytes = options.get("previewBytes") if is_lines else None + paths = prefetch_files(resource, max_bytes=max_bytes) + if not paths: + raise Exception("Resource data is not defined") + + if _dialect_has_only_format(file_dialect): + inferred = infer_json_file_dialect( + Resource(data=paths[0], fileDialect=cast("FileDialect", file_dialect)) + ) + if inferred: + file_dialect = inferred + + is_lines = getattr(file_dialect, "format", None) == "jsonl" + is_default = _is_default_dialect(file_dialect) + + tables: list[Table] = [] + for path in paths: + if is_lines and is_default: + table = pl.scan_ndjson(path) + tables.append(table) + continue + + buffer = load_file(path) + data: object = decode_json_buffer(buffer, is_lines=is_lines) + if not is_default: + data = _process_data(data, file_dialect) + table = pl.DataFrame(data).lazy() + tables.append(table) + + result = pl.concat(tables) + + if not options.get("denormalized"): + table_schema = resolve_table_schema(resource.tableSchema) + if not table_schema: + table_schema = infer_table_schema_from_table(result, **options) + result = normalize_table(result, table_schema) + + return result + + +def _dialect_has_only_format(dialect: FileDialect) -> bool: + keys = { + k + for k in type(dialect).model_fields + if getattr(dialect, k, None) is not None + } + meaningful = keys - {"format", "type", "title", "description"} + return len(meaningful) == 0 + + +def _is_default_dialect(dialect: FileDialect) -> bool: + for key in type(dialect).model_fields: + if key in ("format", "type", "title", "description"): + continue + if getattr(dialect, key, None) is not None: + return False + return True + + +def _process_data( + data: object, + dialect: JsonFileDialect | JsonlFileDialect | FileDialect, +) -> list[dict[str, object]]: + if getattr(dialect, "format", None) == "json" and getattr( + dialect, "jsonPointer", None + ): + json_pointer: str = getattr(dialect, "jsonPointer") + assert isinstance(data, dict) + data = data[json_pointer] + + if getattr(dialect, "rowType", None) == "array": + assert isinstance(data, list) + keys = cast("list[str]", data[0]) + data = [dict(zip(keys, cast("list[object]", row))) for row in data[1:]] + + column_names: list[str] | None = getattr(dialect, "columnNames", None) + if column_names: + assert isinstance(data, list) + data = [ + {name: cast("dict[str, object]", row)[name] for name in column_names} + for row in data + ] + + return cast("list[dict[str, object]]", data) diff --git a/table/fairspec_table/plugins/json/actions/table/load_spec.py b/table/fairspec_table/plugins/json/actions/table/load_spec.py new file mode 100644 index 0000000..da552e4 --- /dev/null +++ b/table/fairspec_table/plugins/json/actions/table/load_spec.py @@ -0,0 +1,168 @@ +from __future__ import annotations + +import polars as pl +from fairspec_dataset import write_temp_file +from fairspec_metadata import JsonFileDialect, JsonlFileDialect, Resource +from fairspec_metadata.models.file_dialect.common import RowType + +from .load import load_json_table + + +class TestLoadJsonTable: + def test_should_load_local_file(self): + body = '[{"id":1,"name":"english"},{"id":2,"name":"中文"}]' + path = write_temp_file(body) + + table = load_json_table(Resource(data=path, fileDialect=JsonFileDialect())) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + def test_should_load_local_file_multipart(self): + body = '[{"id":1,"name":"english"},{"id":2,"name":"中文"}]' + path1 = write_temp_file(body) + path2 = write_temp_file(body) + + table = load_json_table( + Resource(data=[path1, path2], fileDialect=JsonFileDialect()) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + def test_should_handle_property(self): + body = '{"key": [{"id":1,"name":"english"},{"id":2,"name":"中文"}]}' + path = write_temp_file(body) + + table = load_json_table( + Resource(data=path, fileDialect=JsonFileDialect(jsonPointer="key")) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + def test_should_handle_item_keys(self): + body = '[{"id":1,"name":"english"},{"id":2,"name":"中文"}]' + path = write_temp_file(body) + + table = load_json_table( + Resource(data=path, fileDialect=JsonFileDialect(columnNames=["name"])) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"name": "english"}, + {"name": "中文"}, + ] + + def test_should_handle_item_type_array(self): + body = '[["id","name"],[1,"english"],[2,"中文"]]' + path = write_temp_file(body) + + table = load_json_table( + Resource(data=path, fileDialect=JsonFileDialect(rowType=RowType.array)) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + def test_should_load_item_type_object(self): + body = '[{"id":1,"name":"english"},{"id":2,"name":"中文"}]' + path = write_temp_file(body) + + table = load_json_table( + Resource(data=path, fileDialect=JsonFileDialect(rowType=RowType.object)) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + +class TestLoadJsonTableJsonl: + def test_should_load_local_file(self): + body = '{"id":1,"name":"english"}\n{"id":2,"name":"中文"}' + path = write_temp_file(body) + + table = load_json_table(Resource(data=path, fileDialect=JsonlFileDialect())) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + def test_should_load_local_file_multipart(self): + body = '{"id":1,"name":"english"}\n{"id":2,"name":"中文"}' + path1 = write_temp_file(body) + path2 = write_temp_file(body) + + table = load_json_table( + Resource(data=[path1, path2], fileDialect=JsonlFileDialect()) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + def test_should_handle_item_keys(self): + body = '{"id":1,"name":"english"}\n{"id":2,"name":"中文"}' + path = write_temp_file(body) + + table = load_json_table( + Resource(data=path, fileDialect=JsonlFileDialect(columnNames=["name"])) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"name": "english"}, + {"name": "中文"}, + ] + + def test_should_handle_item_type_array(self): + body = '["id","name"]\n[1,"english"]\n[2,"中文"]' + path = write_temp_file(body) + + table = load_json_table( + Resource(data=path, fileDialect=JsonlFileDialect(rowType=RowType.array)) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + def test_should_load_item_type_object(self): + body = '{"id":1,"name":"english"}\n{"id":2,"name":"中文"}' + path = write_temp_file(body) + + table = load_json_table( + Resource(data=path, fileDialect=JsonlFileDialect(rowType=RowType.object)) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] diff --git a/table/fairspec_table/plugins/json/actions/table/save.py b/table/fairspec_table/plugins/json/actions/table/save.py new file mode 100644 index 0000000..44e85d6 --- /dev/null +++ b/table/fairspec_table/plugins/json/actions/table/save.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack, cast + +import polars as pl + +from fairspec_dataset import assert_local_path_vacant, save_file +from fairspec_metadata import Resource, TableSchema, get_supported_file_dialect + +from fairspec_table.actions.table.denormalize import denormalize_table +from fairspec_table.actions.table_schema.infer import infer_table_schema_from_table +from fairspec_table.plugins.json.actions.buffer.decode import decode_json_buffer +from fairspec_table.plugins.json.actions.buffer.encode import encode_json_buffer +from fairspec_table.plugins.json.settings import NATIVE_TYPES + +if TYPE_CHECKING: + from fairspec_metadata import JsonFileDialect, JsonlFileDialect + + from fairspec_table.models.table import SaveTableOptions, Table + + +def save_json_table(table: Table, **options: Unpack[SaveTableOptions]) -> str: + path = options["path"] + + if not options.get("overwrite"): + assert_local_path_vacant(path) + + resource = Resource(data=path, fileDialect=options.get("fileDialect")) + file_dialect = get_supported_file_dialect(resource, ["json", "jsonl"]) + if not file_dialect: + raise Exception("Saving options is not compatible") + + is_lines = getattr(file_dialect, "format", None) == "jsonl" + + table_schema = options.get("tableSchema") + if not isinstance(table_schema, TableSchema): + table_schema = infer_table_schema_from_table( + table, **options, keepStrings=True + ) + + table = denormalize_table(table, table_schema, nativeTypes=NATIVE_TYPES) + + frame = cast("pl.DataFrame", table.collect()) + if is_lines: + text = frame.write_ndjson() + else: + text = frame.write_json() + + data = decode_json_buffer(text.encode("utf-8"), is_lines=is_lines) + + data = _process_data(data, file_dialect) + + buffer = encode_json_buffer(data, is_lines=is_lines) + save_file(path, buffer, overwrite=bool(options.get("overwrite"))) + + return path + + +def _process_data( + records: object, + dialect: JsonFileDialect | JsonlFileDialect | object, +) -> object: + data: object = records + + column_names: list[str] | None = getattr(dialect, "columnNames", None) + if column_names: + assert isinstance(data, list) + data = [ + {name: cast("dict[str, object]", row)[name] for name in column_names} + for row in data + ] + + if getattr(dialect, "rowType", None) == "array": + assert isinstance(data, list) + names: list[str] = column_names or list( + cast("dict[str, object]", data[0]).keys() + ) + data = [ + names, + *[[cast("dict[str, object]", row)[name] for name in names] for row in data], + ] + + if getattr(dialect, "format", None) == "json": + json_pointer: str | None = getattr(dialect, "jsonPointer", None) + if json_pointer: + data = {json_pointer: data} + + return data diff --git a/table/fairspec_table/plugins/json/actions/table/save_spec.py b/table/fairspec_table/plugins/json/actions/table/save_spec.py new file mode 100644 index 0000000..08c4cdc --- /dev/null +++ b/table/fairspec_table/plugins/json/actions/table/save_spec.py @@ -0,0 +1,178 @@ +from __future__ import annotations + +import json + +import polars as pl +from fairspec_dataset import get_temp_file_path +from fairspec_metadata import JsonFileDialect, JsonlFileDialect +from fairspec_metadata.models.file_dialect.common import RowType + +from .load import load_json_table +from .save import save_json_table + +ROW1 = {"id": 1, "name": "english"} +ROW2 = {"id": 2, "name": "中文"} +TABLE = pl.DataFrame([ROW1, ROW2]).lazy() + + +class TestSaveJsonTable: + def test_should_save_table_to_file(self): + path = get_temp_file_path() + + save_json_table(TABLE, path=path, fileDialect=JsonFileDialect()) + + with open(path, encoding="utf-8") as f: + content = f.read() + assert content == json.dumps([ROW1, ROW2], indent=2, ensure_ascii=False) + + def test_should_handle_property(self): + path = get_temp_file_path() + + save_json_table( + TABLE, + path=path, + fileDialect=JsonFileDialect(jsonPointer="key"), + ) + + with open(path, encoding="utf-8") as f: + content = f.read() + assert content == json.dumps( + {"key": [ROW1, ROW2]}, indent=2, ensure_ascii=False + ) + + def test_should_handle_item_keys(self): + path = get_temp_file_path() + + save_json_table( + TABLE, + path=path, + fileDialect=JsonFileDialect(columnNames=["name"]), + ) + + with open(path, encoding="utf-8") as f: + content = f.read() + assert content == json.dumps( + [{"name": ROW1["name"]}, {"name": ROW2["name"]}], + indent=2, + ensure_ascii=False, + ) + + def test_should_handle_item_type_array(self): + path = get_temp_file_path() + + save_json_table( + TABLE, + path=path, + fileDialect=JsonFileDialect(rowType=RowType.array), + ) + + with open(path, encoding="utf-8") as f: + content = f.read() + assert content == json.dumps( + [list(ROW1.keys()), list(ROW1.values()), list(ROW2.values())], + indent=2, + ensure_ascii=False, + ) + + def test_should_save_and_load_various_data_types(self): + from fairspec_metadata import Resource + + path = get_temp_file_path(format="json") + + source = pl.DataFrame( + [ + pl.Series("boolean", [True], dtype=pl.Boolean), + pl.Series("date", ["2025-01-01"], dtype=pl.String), + pl.Series("integer", [1], dtype=pl.Int32), + pl.Series("list", [[1.0, 2.0, 3.0]], dtype=pl.List(pl.Float32)), + pl.Series("number", [1.1], dtype=pl.Float64), + pl.Series("string", ["string"], dtype=pl.String), + ] + ).lazy() + + save_json_table(source, path=path) + + target = load_json_table(Resource(data=path), denormalized=True) + frame: pl.DataFrame = target.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + { + "boolean": True, + "date": "2025-01-01", + "integer": 1, + "list": [1.0, 2.0, 3.0], + "number": 1.1, + "string": "string", + }, + ] + + +class TestSaveJsonTableJsonl: + def test_should_save_table_to_file(self): + path = get_temp_file_path() + + save_json_table(TABLE, path=path, fileDialect=JsonlFileDialect()) + + with open(path, encoding="utf-8") as f: + content = f.read() + assert content == "\n".join( + [ + json.dumps(ROW1, ensure_ascii=False), + json.dumps(ROW2, ensure_ascii=False), + ] + ) + + def test_should_handle_item_keys(self): + path = get_temp_file_path() + + save_json_table( + TABLE, + path=path, + fileDialect=JsonlFileDialect(columnNames=["name"]), + ) + + with open(path, encoding="utf-8") as f: + content = f.read() + assert content == "\n".join( + [ + json.dumps({"name": ROW1["name"]}, ensure_ascii=False), + json.dumps({"name": ROW2["name"]}, ensure_ascii=False), + ] + ) + + def test_should_handle_item_type_array(self): + path = get_temp_file_path() + + save_json_table( + TABLE, + path=path, + fileDialect=JsonlFileDialect(rowType=RowType.array), + ) + + with open(path, encoding="utf-8") as f: + content = f.read() + assert content == "\n".join( + [ + json.dumps(list(ROW1.keys()), ensure_ascii=False), + json.dumps(list(ROW1.values()), ensure_ascii=False), + json.dumps(list(ROW2.values()), ensure_ascii=False), + ] + ) + + def test_should_handle_item_type_object(self): + path = get_temp_file_path() + + save_json_table( + TABLE, + path=path, + fileDialect=JsonlFileDialect(rowType=RowType.object), + ) + + with open(path, encoding="utf-8") as f: + content = f.read() + assert content == "\n".join( + [ + json.dumps(ROW1, ensure_ascii=False), + json.dumps(ROW2, ensure_ascii=False), + ] + ) diff --git a/table/fairspec_table/plugins/json/plugin.py b/table/fairspec_table/plugins/json/plugin.py new file mode 100644 index 0000000..2cc0418 --- /dev/null +++ b/table/fairspec_table/plugins/json/plugin.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack, cast + +from fairspec_metadata import Resource, get_supported_file_dialect +from fairspec_metadata.models.file_dialect.file_dialect import FileDialect + +from fairspec_table.plugin import TablePlugin + +from .actions.file_dialect.infer import infer_json_file_dialect +from .actions.table.load import load_json_table +from .actions.table.save import save_json_table + +if TYPE_CHECKING: + from fairspec_table.models.table import LoadTableOptions, SaveTableOptions, Table + + +class JsonPlugin(TablePlugin): + def load_table( + self, + resource: Resource, + **options: Unpack[LoadTableOptions], + ) -> Table | None: + file_dialect = get_supported_file_dialect(resource, ["json", "jsonl"]) + if not file_dialect: + return None + + inferred = infer_json_file_dialect(resource) + if inferred: + resource = resource.model_copy(update={"fileDialect": inferred}) + + return load_json_table(resource, **options) + + def save_table(self, table: Table, **options: Unpack[SaveTableOptions]) -> str | None: + resource = Resource( + data=options["path"], fileDialect=cast(FileDialect | None, options.get("fileDialect")) + ) + file_dialect = get_supported_file_dialect(resource, ["json", "jsonl"]) + if not file_dialect: + return None + return save_json_table(table, **options) diff --git a/table/fairspec_table/plugins/json/plugin_spec.py b/table/fairspec_table/plugins/json/plugin_spec.py new file mode 100644 index 0000000..06168fd --- /dev/null +++ b/table/fairspec_table/plugins/json/plugin_spec.py @@ -0,0 +1,194 @@ +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import polars as pl +from fairspec_metadata import JsonFileDialect, JsonlFileDialect, Resource + +from .plugin import JsonPlugin + + +class TestJsonPluginLoadTable: + def setup_method(self): + self.plugin = JsonPlugin() + + @patch("fairspec_table.plugins.json.plugin.infer_json_file_dialect") + @patch("fairspec_table.plugins.json.plugin.load_json_table") + def test_should_load_table_from_json_file( + self, mock_load: MagicMock, mock_infer: MagicMock + ): + resource = Resource(data="test.json") + mock_table = pl.DataFrame().lazy() + mock_load.return_value = mock_table + mock_infer.return_value = JsonFileDialect() + + result = self.plugin.load_table(resource) + + assert result is mock_table + + @patch("fairspec_table.plugins.json.plugin.infer_json_file_dialect") + @patch("fairspec_table.plugins.json.plugin.load_json_table") + def test_should_load_table_from_jsonl_file( + self, mock_load: MagicMock, mock_infer: MagicMock + ): + resource = Resource(data="test.jsonl") + mock_table = pl.DataFrame().lazy() + mock_load.return_value = mock_table + mock_infer.return_value = JsonlFileDialect() + + result = self.plugin.load_table(resource) + + assert result is mock_table + + @patch("fairspec_table.plugins.json.plugin.infer_json_file_dialect") + @patch("fairspec_table.plugins.json.plugin.load_json_table") + def test_should_load_table_from_ndjson_file( + self, mock_load: MagicMock, mock_infer: MagicMock + ): + resource = Resource(data="test.ndjson") + mock_table = pl.DataFrame().lazy() + mock_load.return_value = mock_table + mock_infer.return_value = JsonlFileDialect() + + result = self.plugin.load_table(resource) + + assert result is mock_table + + @patch("fairspec_table.plugins.json.plugin.load_json_table") + def test_should_return_none_for_non_json_files(self, mock_load: MagicMock): + resource = Resource(data="test.csv") + + result = self.plugin.load_table(resource) + + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_table.plugins.json.plugin.infer_json_file_dialect") + @patch("fairspec_table.plugins.json.plugin.load_json_table") + def test_should_handle_explicit_json_format( + self, mock_load: MagicMock, mock_infer: MagicMock + ): + resource = Resource(data="test.txt", fileDialect=JsonFileDialect()) + mock_table = pl.DataFrame().lazy() + mock_load.return_value = mock_table + mock_infer.return_value = JsonFileDialect() + + result = self.plugin.load_table(resource) + + assert result is mock_table + + @patch("fairspec_table.plugins.json.plugin.infer_json_file_dialect") + @patch("fairspec_table.plugins.json.plugin.load_json_table") + def test_should_pass_through_load_options( + self, mock_load: MagicMock, mock_infer: MagicMock + ): + resource = Resource(data="test.json") + mock_load.return_value = pl.DataFrame().lazy() + mock_infer.return_value = JsonFileDialect() + + self.plugin.load_table(resource, denormalized=True) + + mock_load.assert_called_once() + + @patch("fairspec_table.plugins.json.plugin.infer_json_file_dialect") + @patch("fairspec_table.plugins.json.plugin.load_json_table") + def test_should_handle_paths_with_directories( + self, mock_load: MagicMock, mock_infer: MagicMock + ): + resource = Resource(data="/path/to/data.json") + mock_load.return_value = pl.DataFrame().lazy() + mock_infer.return_value = JsonFileDialect() + + self.plugin.load_table(resource) + + mock_load.assert_called_once() + + @patch("fairspec_table.plugins.json.plugin.load_json_table") + def test_should_return_none_for_parquet_files(self, mock_load: MagicMock): + resource = Resource(data="test.parquet") + + result = self.plugin.load_table(resource) + + mock_load.assert_not_called() + assert result is None + + +class TestJsonPluginSaveTable: + def setup_method(self): + self.plugin = JsonPlugin() + + @patch("fairspec_table.plugins.json.plugin.save_json_table") + def test_should_save_table_to_json_file(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "output.json" + + result = self.plugin.save_table(table, path="output.json") + + mock_save.assert_called_once_with(table, path="output.json") + assert result == "output.json" + + @patch("fairspec_table.plugins.json.plugin.save_json_table") + def test_should_save_table_to_jsonl_file(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "output.jsonl" + + result = self.plugin.save_table(table, path="output.jsonl") + + mock_save.assert_called_once_with(table, path="output.jsonl") + assert result == "output.jsonl" + + @patch("fairspec_table.plugins.json.plugin.save_json_table") + def test_should_save_table_to_ndjson_file(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "output.ndjson" + + result = self.plugin.save_table(table, path="output.ndjson") + + mock_save.assert_called_once_with(table, path="output.ndjson") + assert result == "output.ndjson" + + @patch("fairspec_table.plugins.json.plugin.save_json_table") + def test_should_return_none_for_non_json_files(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + + result = self.plugin.save_table(table, path="output.csv") + + mock_save.assert_not_called() + assert result is None + + @patch("fairspec_table.plugins.json.plugin.save_json_table") + def test_should_handle_explicit_json_format(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "output.txt" + + result = self.plugin.save_table(table, path="output.txt", fileDialect=JsonFileDialect()) + + mock_save.assert_called_once_with(table, path="output.txt", fileDialect=JsonFileDialect()) + assert result == "output.txt" + + @patch("fairspec_table.plugins.json.plugin.save_json_table") + def test_should_handle_paths_with_directories(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "/path/to/output.json" + + self.plugin.save_table(table, path="/path/to/output.json") + + mock_save.assert_called_once_with(table, path="/path/to/output.json") + + @patch("fairspec_table.plugins.json.plugin.save_json_table") + def test_should_return_none_for_files_without_extension(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + + result = self.plugin.save_table(table, path="output") + + mock_save.assert_not_called() + assert result is None + + @patch("fairspec_table.plugins.json.plugin.save_json_table") + def test_should_return_none_for_parquet_files(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + + result = self.plugin.save_table(table, path="output.parquet") + + mock_save.assert_not_called() + assert result is None diff --git a/table/fairspec_table/plugins/json/settings.py b/table/fairspec_table/plugins/json/settings.py new file mode 100644 index 0000000..05640fa --- /dev/null +++ b/table/fairspec_table/plugins/json/settings.py @@ -0,0 +1 @@ +NATIVE_TYPES = ["boolean", "integer", "list", "number", "string"] diff --git a/table/fairspec_table/plugins/parquet/__init__.py b/table/fairspec_table/plugins/parquet/__init__.py new file mode 100644 index 0000000..3b24037 --- /dev/null +++ b/table/fairspec_table/plugins/parquet/__init__.py @@ -0,0 +1,9 @@ +from .actions.table.load import load_parquet_table +from .actions.table.save import save_parquet_table +from .plugin import ParquetPlugin + +__all__ = [ + "ParquetPlugin", + "load_parquet_table", + "save_parquet_table", +] diff --git a/table/fairspec_table/plugins/parquet/actions/__init__.py b/table/fairspec_table/plugins/parquet/actions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/parquet/actions/table/__init__.py b/table/fairspec_table/plugins/parquet/actions/table/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/parquet/actions/table/load.py b/table/fairspec_table/plugins/parquet/actions/table/load.py new file mode 100644 index 0000000..9796d0f --- /dev/null +++ b/table/fairspec_table/plugins/parquet/actions/table/load.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack + +import polars as pl +from fairspec_dataset import prefetch_files +from fairspec_metadata import resolve_table_schema + +from fairspec_table.actions.table.normalize import normalize_table +from fairspec_table.actions.table_schema.infer import infer_table_schema_from_table + +if TYPE_CHECKING: + from fairspec_metadata import Resource + + from fairspec_table.models.table import LoadTableOptions, Table + + +def load_parquet_table( + resource: Resource, **options: Unpack[LoadTableOptions] +) -> Table: + paths = prefetch_files(resource) + if not paths: + raise Exception("Resource data is not defined") + + first_path, *rest_paths = paths + table = pl.scan_parquet(first_path) + if rest_paths: + table = pl.concat([table, *(pl.scan_parquet(path) for path in rest_paths)]) + + if not options.get("denormalized"): + table_schema = resolve_table_schema(resource.tableSchema) + if not table_schema: + table_schema = infer_table_schema_from_table(table, **options) + table = normalize_table(table, table_schema) + + return table diff --git a/table/fairspec_table/plugins/parquet/actions/table/load_spec.py b/table/fairspec_table/plugins/parquet/actions/table/load_spec.py new file mode 100644 index 0000000..66d4c8a --- /dev/null +++ b/table/fairspec_table/plugins/parquet/actions/table/load_spec.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +import polars as pl +from fairspec_dataset import get_temp_file_path +from fairspec_metadata import ParquetFileDialect, Resource + +from .load import load_parquet_table + + +class TestLoadParquetTable: + def test_should_load_local_file(self): + path = get_temp_file_path() + pl.DataFrame({"id": [1, 2], "name": ["english", "中文"]}).write_parquet(path) + + table = load_parquet_table( + Resource(data=path, fileDialect=ParquetFileDialect()) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + def test_should_load_local_file_multipart(self): + path1 = get_temp_file_path() + path2 = get_temp_file_path() + pl.DataFrame({"id": [1, 2], "name": ["english", "中文"]}).write_parquet(path1) + pl.DataFrame({"id": [1, 2], "name": ["english", "中文"]}).write_parquet(path2) + + table = load_parquet_table(Resource(data=[path1, path2])) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] diff --git a/table/fairspec_table/plugins/parquet/actions/table/save.py b/table/fairspec_table/plugins/parquet/actions/table/save.py new file mode 100644 index 0000000..b2806fd --- /dev/null +++ b/table/fairspec_table/plugins/parquet/actions/table/save.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack + +from fairspec_dataset import assert_local_path_vacant +from fairspec_metadata import TableSchema + +from fairspec_table.actions.table.denormalize import denormalize_table +from fairspec_table.actions.table_schema.infer import infer_table_schema_from_table +from fairspec_table.plugins.parquet.settings import NATIVE_TYPES + +if TYPE_CHECKING: + from fairspec_table.models.table import SaveTableOptions, Table + + +def save_parquet_table(table: Table, **options: Unpack[SaveTableOptions]) -> str: + path = options["path"] + + if not options.get("overwrite"): + assert_local_path_vacant(path) + + table_schema = options.get("tableSchema") + if not isinstance(table_schema, TableSchema): + table_schema = infer_table_schema_from_table( + table, **options, keepStrings=True + ) + + table = denormalize_table(table, table_schema, nativeTypes=NATIVE_TYPES) + + table.sink_parquet(path) + + return path diff --git a/table/fairspec_table/plugins/parquet/actions/table/save_spec.py b/table/fairspec_table/plugins/parquet/actions/table/save_spec.py new file mode 100644 index 0000000..83a03e3 --- /dev/null +++ b/table/fairspec_table/plugins/parquet/actions/table/save_spec.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +from datetime import date, datetime, timezone +from zoneinfo import ZoneInfo + +import polars as pl +from fairspec_dataset import get_temp_file_path +from fairspec_metadata import Resource + +from .load import load_parquet_table +from .save import save_parquet_table + + +class TestSaveParquetTable: + def test_should_save_table_to_parquet_file(self): + path = get_temp_file_path() + source = pl.DataFrame( + {"id": [1.0, 2.0, 3.0], "name": ["Alice", "Bob", "Charlie"]} + ).lazy() + + save_parquet_table(source, path=path) + + table = load_parquet_table(Resource(data=path)) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1.0, "name": "Alice"}, + {"id": 2.0, "name": "Bob"}, + {"id": 3.0, "name": "Charlie"}, + ] + + def test_should_save_and_load_various_data_types(self): + path = get_temp_file_path() + source = pl.DataFrame( + [ + pl.Series("boolean", [True], dtype=pl.Boolean), + pl.Series("date", [date(2025, 1, 1)], dtype=pl.Date), + pl.Series( + "datetime", + [datetime(2025, 1, 1, tzinfo=timezone.utc)], + dtype=pl.Datetime, + ), + pl.Series("integer", [1], dtype=pl.Int32), + pl.Series("number", [1.1], dtype=pl.Float64), + pl.Series("string", ["string"], dtype=pl.String), + ] + ).lazy() + + save_parquet_table(source, path=path) + + target = load_parquet_table(Resource(data=path), denormalized=True) + frame: pl.DataFrame = target.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + { + "boolean": True, + "date": "2025-01-01", + "datetime": datetime(2025, 1, 1, tzinfo=ZoneInfo("UTC")), + "integer": 1, + "number": 1.1, + "string": "string", + }, + ] diff --git a/table/fairspec_table/plugins/parquet/plugin.py b/table/fairspec_table/plugins/parquet/plugin.py new file mode 100644 index 0000000..423f168 --- /dev/null +++ b/table/fairspec_table/plugins/parquet/plugin.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack, cast + +from fairspec_metadata import Resource, get_supported_file_dialect +from fairspec_metadata.models.file_dialect.file_dialect import FileDialect + +from fairspec_table.plugin import TablePlugin + +from .actions.table.load import load_parquet_table +from .actions.table.save import save_parquet_table + +if TYPE_CHECKING: + from fairspec_table.models.table import LoadTableOptions, SaveTableOptions, Table + + +class ParquetPlugin(TablePlugin): + def load_table( + self, + resource: Resource, + **options: Unpack[LoadTableOptions], + ) -> Table | None: + file_dialect = get_supported_file_dialect(resource, ["parquet"]) + if not file_dialect: + return None + return load_parquet_table(resource, **options) + + def save_table(self, table: Table, **options: Unpack[SaveTableOptions]) -> str | None: + resource = Resource( + data=options["path"], fileDialect=cast(FileDialect | None, options.get("fileDialect")) + ) + file_dialect = get_supported_file_dialect(resource, ["parquet"]) + if not file_dialect: + return None + return save_parquet_table(table, **options) diff --git a/table/fairspec_table/plugins/parquet/plugin_spec.py b/table/fairspec_table/plugins/parquet/plugin_spec.py new file mode 100644 index 0000000..e5e6eb3 --- /dev/null +++ b/table/fairspec_table/plugins/parquet/plugin_spec.py @@ -0,0 +1,150 @@ +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import polars as pl +from fairspec_metadata import ParquetFileDialect, Resource + +from .plugin import ParquetPlugin + + +class TestParquetPluginLoadTable: + def setup_method(self): + self.plugin = ParquetPlugin() + + @patch("fairspec_table.plugins.parquet.plugin.load_parquet_table") + def test_should_load_table_from_parquet_file(self, mock_load: MagicMock): + resource = Resource(data="test.parquet") + mock_table = pl.DataFrame().lazy() + mock_load.return_value = mock_table + + result = self.plugin.load_table(resource) + + mock_load.assert_called_once_with(resource) + assert result is mock_table + + @patch("fairspec_table.plugins.parquet.plugin.load_parquet_table") + def test_should_return_none_for_non_parquet_files(self, mock_load: MagicMock): + resource = Resource(data="test.csv") + + result = self.plugin.load_table(resource) + + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_table.plugins.parquet.plugin.load_parquet_table") + def test_should_handle_explicit_format(self, mock_load: MagicMock): + resource = Resource(data="test.txt", fileDialect=ParquetFileDialect()) + mock_table = pl.DataFrame().lazy() + mock_load.return_value = mock_table + + result = self.plugin.load_table(resource) + + mock_load.assert_called_once_with(resource) + assert result is mock_table + + @patch("fairspec_table.plugins.parquet.plugin.load_parquet_table") + def test_should_pass_through_load_options(self, mock_load: MagicMock): + resource = Resource(data="test.parquet") + mock_load.return_value = pl.DataFrame().lazy() + + self.plugin.load_table(resource, denormalized=True) + + mock_load.assert_called_once() + + @patch("fairspec_table.plugins.parquet.plugin.load_parquet_table") + def test_should_handle_paths_with_directories(self, mock_load: MagicMock): + resource = Resource(data="/path/to/data.parquet") + mock_load.return_value = pl.DataFrame().lazy() + + self.plugin.load_table(resource) + + mock_load.assert_called_once_with(resource) + + @patch("fairspec_table.plugins.parquet.plugin.load_parquet_table") + def test_should_return_none_for_arrow_files(self, mock_load: MagicMock): + resource = Resource(data="test.arrow") + + result = self.plugin.load_table(resource) + + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_table.plugins.parquet.plugin.load_parquet_table") + def test_should_return_none_for_json_files(self, mock_load: MagicMock): + resource = Resource(data="test.json") + + result = self.plugin.load_table(resource) + + mock_load.assert_not_called() + assert result is None + + +class TestParquetPluginSaveTable: + def setup_method(self): + self.plugin = ParquetPlugin() + + @patch("fairspec_table.plugins.parquet.plugin.save_parquet_table") + def test_should_save_table_to_parquet_file(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "output.parquet" + + result = self.plugin.save_table(table, path="output.parquet") + + mock_save.assert_called_once_with(table, path="output.parquet") + assert result == "output.parquet" + + @patch("fairspec_table.plugins.parquet.plugin.save_parquet_table") + def test_should_return_none_for_non_parquet_files(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + + result = self.plugin.save_table(table, path="output.csv") + + mock_save.assert_not_called() + assert result is None + + @patch("fairspec_table.plugins.parquet.plugin.save_parquet_table") + def test_should_handle_explicit_format(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "output.txt" + + result = self.plugin.save_table(table, path="output.txt", fileDialect=ParquetFileDialect()) + + mock_save.assert_called_once_with(table, path="output.txt", fileDialect=ParquetFileDialect()) + assert result == "output.txt" + + @patch("fairspec_table.plugins.parquet.plugin.save_parquet_table") + def test_should_handle_paths_with_directories(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "/path/to/output.parquet" + + self.plugin.save_table(table, path="/path/to/output.parquet") + + mock_save.assert_called_once_with(table, path="/path/to/output.parquet") + + @patch("fairspec_table.plugins.parquet.plugin.save_parquet_table") + def test_should_return_none_for_files_without_extension(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + + result = self.plugin.save_table(table, path="output") + + mock_save.assert_not_called() + assert result is None + + @patch("fairspec_table.plugins.parquet.plugin.save_parquet_table") + def test_should_return_none_for_arrow_files(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + + result = self.plugin.save_table(table, path="output.arrow") + + mock_save.assert_not_called() + assert result is None + + @patch("fairspec_table.plugins.parquet.plugin.save_parquet_table") + def test_should_return_none_for_json_files(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + + result = self.plugin.save_table(table, path="output.json") + + mock_save.assert_not_called() + assert result is None diff --git a/table/fairspec_table/plugins/parquet/settings.py b/table/fairspec_table/plugins/parquet/settings.py new file mode 100644 index 0000000..50814bc --- /dev/null +++ b/table/fairspec_table/plugins/parquet/settings.py @@ -0,0 +1 @@ +NATIVE_TYPES = ["boolean", "integer", "number", "string", "list", "date-time"] diff --git a/table/fairspec_table/plugins/sqlite/__init__.py b/table/fairspec_table/plugins/sqlite/__init__.py new file mode 100644 index 0000000..f93034e --- /dev/null +++ b/table/fairspec_table/plugins/sqlite/__init__.py @@ -0,0 +1,5 @@ +from .actions.table.load import load_sqlite_table +from .actions.table.save import save_sqlite_table +from .plugin import SqlitePlugin + +__all__ = ["SqlitePlugin", "load_sqlite_table", "save_sqlite_table"] diff --git a/table/fairspec_table/plugins/sqlite/actions/__init__.py b/table/fairspec_table/plugins/sqlite/actions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/sqlite/actions/column/__init__.py b/table/fairspec_table/plugins/sqlite/actions/column/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/sqlite/actions/column/from_database.py b/table/fairspec_table/plugins/sqlite/actions/column/from_database.py new file mode 100644 index 0000000..bd6c4e1 --- /dev/null +++ b/table/fairspec_table/plugins/sqlite/actions/column/from_database.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata import create_column_from_property + +from fairspec_table.plugins.sqlite.models.column import SqliteColumn + +if TYPE_CHECKING: + from fairspec_metadata import Column + + +def convert_column_from_database(database_column: SqliteColumn) -> Column: + property = _convert_property(database_column.dataType) + column = create_column_from_property(database_column.name, property) + + if database_column.comment: + column.property.description = database_column.comment + + return column + + +def _convert_property(database_type: str) -> dict[str, str]: + match database_type.lower(): + case "blob": + return {"type": "string"} + case "text": + return {"type": "string"} + case "integer": + return {"type": "integer"} + case "numeric" | "real": + return {"type": "number"} + case _: + return {} diff --git a/table/fairspec_table/plugins/sqlite/actions/column/to_database.py b/table/fairspec_table/plugins/sqlite/actions/column/to_database.py new file mode 100644 index 0000000..c5bce80 --- /dev/null +++ b/table/fairspec_table/plugins/sqlite/actions/column/to_database.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_table.plugins.sqlite.models.column import SqliteColumn + +if TYPE_CHECKING: + from fairspec_metadata import Column + + +def convert_column_to_database( + column: Column, is_nullable: bool = True +) -> SqliteColumn: + return SqliteColumn( + name=column.name, + dataType=_convert_type(column.type), + isNullable=is_nullable, + comment=column.property.description, + isAutoIncrementing=False, + hasDefaultValue=False, + ) + + +def _convert_type(column_type: str) -> str: + match column_type: + case "boolean": + return "integer" + case "integer": + return "integer" + case "number": + return "real" + case "string": + return "text" + case _: + return "text" diff --git a/table/fairspec_table/plugins/sqlite/actions/database/__init__.py b/table/fairspec_table/plugins/sqlite/actions/database/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/sqlite/actions/database/connect.py b/table/fairspec_table/plugins/sqlite/actions/database/connect.py new file mode 100644 index 0000000..8869ccc --- /dev/null +++ b/table/fairspec_table/plugins/sqlite/actions/database/connect.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +import sqlite3 + +from fairspec_dataset import get_is_local_path_exist + + +def connect_database(path: str, *, create: bool = False) -> sqlite3.Connection: + path = path.removeprefix("sqlite://") + + if path == ":memory:" or path.startswith("file::memory"): + raise Exception("In-memory databases are not supported") + + if not create: + if not get_is_local_path_exist(path): + raise Exception(f'Database file "{path}" does not exist') + + conn = sqlite3.connect(path) + conn.row_factory = sqlite3.Row + return conn diff --git a/table/fairspec_table/plugins/sqlite/actions/table/__init__.py b/table/fairspec_table/plugins/sqlite/actions/table/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/sqlite/actions/table/load.py b/table/fairspec_table/plugins/sqlite/actions/table/load.py new file mode 100644 index 0000000..7e36d44 --- /dev/null +++ b/table/fairspec_table/plugins/sqlite/actions/table/load.py @@ -0,0 +1,68 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack, cast + +import polars as pl + +from fairspec_metadata import ( + Resource, + TableSchema, + get_data_first_path, + get_supported_file_dialect, + resolve_table_schema, +) +from fairspec_metadata.models.file_dialect.file_dialect import FileDialect + +from fairspec_table.actions.table.normalize import normalize_table +from fairspec_table.plugins.sqlite.actions.database.connect import connect_database +from fairspec_table.plugins.sqlite.actions.table_schema.infer import ( + infer_table_schema_from_sqlite, +) + +if TYPE_CHECKING: + from fairspec_table.models.table import LoadTableOptions, Table + + +def load_sqlite_table( + resource: Resource, **options: Unpack[LoadTableOptions] +) -> Table: + first_path = get_data_first_path(resource) + if not first_path: + raise Exception("Resource path is not defined") + + file_dialect = get_supported_file_dialect(resource, ["sqlite"]) + if not file_dialect: + raise Exception("Resource data is not compatible") + + conn = connect_database(first_path) + try: + cursor = conn.cursor() + tables = cursor.execute( + "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name" + ).fetchall() + + table_name = getattr(file_dialect, "tableName", None) or ( + tables[0]["name"] if tables else None + ) + + if not table_name: + raise Exception("Table name is not defined") + + rows = cursor.execute(f'SELECT * FROM "{table_name}"').fetchall() + records = [dict(row) for row in rows] + table: Table = pl.DataFrame(records).lazy() + + if not options.get("denormalized"): + table_schema = resolve_table_schema(resource.tableSchema) + if not table_schema: + descriptor = infer_table_schema_from_sqlite( + Resource( + data=first_path, fileDialect=cast(FileDialect, file_dialect) + ) + ) + table_schema = TableSchema.model_validate(descriptor) + table = normalize_table(table, table_schema) + + return table + finally: + conn.close() diff --git a/table/fairspec_table/plugins/sqlite/actions/table/load_spec.py b/table/fairspec_table/plugins/sqlite/actions/table/load_spec.py new file mode 100644 index 0000000..d2a0806 --- /dev/null +++ b/table/fairspec_table/plugins/sqlite/actions/table/load_spec.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from fairspec_metadata import Resource, SqliteFileDialect + +import pytest + +from .load import load_sqlite_table + + +class TestLoadSqliteTable: + def test_should_raise_error_when_resource_path_is_not_defined(self): + with pytest.raises(Exception, match="Resource path is not defined"): + load_sqlite_table( + Resource(fileDialect=SqliteFileDialect(tableName="fairspec")) + ) diff --git a/table/fairspec_table/plugins/sqlite/actions/table/save.py b/table/fairspec_table/plugins/sqlite/actions/table/save.py new file mode 100644 index 0000000..2e6e714 --- /dev/null +++ b/table/fairspec_table/plugins/sqlite/actions/table/save.py @@ -0,0 +1,99 @@ +from __future__ import annotations + +import sqlite3 +from typing import TYPE_CHECKING, Unpack, cast + +import polars as pl + +from fairspec_metadata import Resource, TableSchema, get_supported_file_dialect + +from fairspec_table.actions.table.denormalize import denormalize_table +from fairspec_table.actions.table_schema.infer import infer_table_schema_from_table +from fairspec_table.plugins.sqlite.actions.database.connect import connect_database +from fairspec_table.plugins.sqlite.actions.table_schema.to_database import ( + convert_table_schema_to_database, +) +from fairspec_table.plugins.sqlite.models.schema import SqliteSchema +from fairspec_table.plugins.sqlite.settings import NATIVE_TYPES + +if TYPE_CHECKING: + from fairspec_table.models.table import SaveTableOptions, Table + +BUFFER_SIZE = 10_000 + + +def save_sqlite_table(table: Table, **options: Unpack[SaveTableOptions]) -> str: + path = options["path"] + + resource = Resource(data=path, fileDialect=options.get("fileDialect")) + file_dialect = get_supported_file_dialect(resource, ["sqlite"]) + if not file_dialect: + raise Exception("Saving options is not compatible") + + table_schema = options.get("tableSchema") + if not isinstance(table_schema, TableSchema): + table_schema = infer_table_schema_from_table( + table, **options, keepStrings=True + ) + + table = denormalize_table(table, table_schema, nativeTypes=NATIVE_TYPES) + + conn = connect_database(path, create=True) + try: + cursor = conn.cursor() + tables = cursor.execute( + "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name" + ).fetchall() + + table_name = getattr(file_dialect, "tableName", None) or ( + tables[0]["name"] if tables else None + ) + + if not table_name: + raise Exception("Table name is not defined") + + sqlite_schema = convert_table_schema_to_database(table_schema, table_name) + + _define_table(conn, sqlite_schema, overwrite=bool(options.get("overwrite"))) + _populate_table(conn, table_name, table) + + return path + finally: + conn.close() + + +def _define_table( + conn: sqlite3.Connection, schema: SqliteSchema, *, overwrite: bool +) -> None: + if overwrite: + conn.execute(f'DROP TABLE IF EXISTS "{schema.name}"') + + cols: list[str] = [] + for col in schema.columns: + null = "" if col.isNullable else " NOT NULL" + cols.append(f'"{col.name}" {col.dataType}{null}') + + if schema.primaryKey: + pk_cols = ", ".join(f'"{c}"' for c in schema.primaryKey) + cols.append(f"PRIMARY KEY ({pk_cols})") + + sql = f'CREATE TABLE "{schema.name}" ({", ".join(cols)})' + conn.execute(sql) + + +def _populate_table(conn: sqlite3.Connection, table_name: str, table: Table) -> None: + frame = cast("pl.DataFrame", table.collect()) + records = frame.to_dicts() + if not records: + return + + columns = list(records[0].keys()) + placeholders = ", ".join("?" * len(columns)) + col_names = ", ".join(f'"{c}"' for c in columns) + sql = f'INSERT INTO "{table_name}" ({col_names}) VALUES ({placeholders})' + + for i in range(0, len(records), BUFFER_SIZE): + batch = records[i : i + BUFFER_SIZE] + conn.executemany(sql, [tuple(r[c] for c in columns) for r in batch]) + + conn.commit() diff --git a/table/fairspec_table/plugins/sqlite/actions/table/save_spec.py b/table/fairspec_table/plugins/sqlite/actions/table/save_spec.py new file mode 100644 index 0000000..b52ffd3 --- /dev/null +++ b/table/fairspec_table/plugins/sqlite/actions/table/save_spec.py @@ -0,0 +1,83 @@ +from __future__ import annotations + +import polars as pl +from fairspec_dataset import get_temp_file_path +from fairspec_metadata import Resource, SqliteFileDialect + +from .load import load_sqlite_table +from .save import save_sqlite_table + +DIALECT = SqliteFileDialect(tableName="fairspec") + + +class TestSaveSqliteTable: + def test_should_save_and_load_table(self): + path = get_temp_file_path() + + source = pl.DataFrame( + [{"id": 1, "name": "english"}, {"id": 2, "name": "中文"}] + ).lazy() + save_sqlite_table( + source, path=path, fileDialect=DIALECT, overwrite=True + ) + + target = load_sqlite_table(Resource(data=path, fileDialect=DIALECT)) + frame: pl.DataFrame = target.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + def test_should_save_and_load_table_with_protocol(self): + path = f"sqlite://{get_temp_file_path()}" + + source = pl.DataFrame( + [{"id": 1, "name": "english"}, {"id": 2, "name": "中文"}] + ).lazy() + save_sqlite_table( + source, path=path, fileDialect=DIALECT, overwrite=True + ) + + target = load_sqlite_table(Resource(data=path, fileDialect=DIALECT)) + frame: pl.DataFrame = target.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"id": 1, "name": "english"}, + {"id": 2, "name": "中文"}, + ] + + def test_should_save_and_load_various_data_types(self): + path = f"sqlite://{get_temp_file_path()}" + + source = pl.DataFrame( + [ + pl.Series("boolean", [True], dtype=pl.Boolean), + pl.Series("date", ["2025-01-01"], dtype=pl.String), + pl.Series("datetime", ["2025-01-01T00:00:00"], dtype=pl.String), + pl.Series("integer", [1], dtype=pl.Int32), + pl.Series("number", [1.1], dtype=pl.Float64), + pl.Series("string", ["string"], dtype=pl.String), + ] + ).lazy() + + save_sqlite_table( + source, path=path, fileDialect=DIALECT, overwrite=True + ) + + target = load_sqlite_table( + Resource(data=path, fileDialect=DIALECT), + denormalized=True, + ) + frame: pl.DataFrame = target.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + { + "boolean": "true", + "date": "2025-01-01", + "datetime": "2025-01-01T00:00:00", + "integer": 1, + "number": 1.1, + "string": "string", + }, + ] diff --git a/table/fairspec_table/plugins/sqlite/actions/table_schema/__init__.py b/table/fairspec_table/plugins/sqlite/actions/table_schema/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/sqlite/actions/table_schema/from_database.py b/table/fairspec_table/plugins/sqlite/actions/table_schema/from_database.py new file mode 100644 index 0000000..5747a4d --- /dev/null +++ b/table/fairspec_table/plugins/sqlite/actions/table_schema/from_database.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +from fairspec_metadata import Column, get_column_properties + +from fairspec_table.plugins.sqlite.actions.column.from_database import ( + convert_column_from_database, +) +from fairspec_table.plugins.sqlite.models.schema import SqliteSchema + + +def convert_table_schema_from_database(database_schema: SqliteSchema) -> dict: + columns: list[Column] = [] + required: list[str] = [] + + for database_column in database_schema.columns: + column = convert_column_from_database(database_column) + + if database_column.isNullable: + base_type = column.property.type + if base_type and isinstance(base_type, str): + column.property.type = [base_type, "null"] # type: ignore[assignment] + + columns.append(column) + + if not database_column.isNullable: + required.append(database_column.name) + + return { + "properties": get_column_properties(columns), + "primaryKey": database_schema.primaryKey, + "required": required, + } diff --git a/table/fairspec_table/plugins/sqlite/actions/table_schema/infer.py b/table/fairspec_table/plugins/sqlite/actions/table_schema/infer.py new file mode 100644 index 0000000..a4dcba7 --- /dev/null +++ b/table/fairspec_table/plugins/sqlite/actions/table_schema/infer.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +from fairspec_metadata import Resource, get_data_first_path, get_supported_file_dialect + +from fairspec_table.plugins.sqlite.actions.database.connect import connect_database +from fairspec_table.plugins.sqlite.actions.table_schema.from_database import ( + convert_table_schema_from_database, +) +from fairspec_table.plugins.sqlite.models.column import SqliteColumn +from fairspec_table.plugins.sqlite.models.schema import SqliteSchema + + +def infer_table_schema_from_sqlite(resource: Resource) -> dict: + first_path = get_data_first_path(resource) + if not first_path: + raise Exception("Database is not defined") + + dialect = get_supported_file_dialect(resource, ["sqlite"]) + if not dialect: + raise Exception("Resource data is not compatible") + + conn = connect_database(first_path) + try: + cursor = conn.cursor() + tables = cursor.execute( + "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name" + ).fetchall() + + table_name = getattr(dialect, "tableName", None) or ( + tables[0]["name"] if tables else None + ) + + if not table_name: + raise Exception("Table name is not defined") + + pragma_rows = cursor.execute(f'PRAGMA table_info("{table_name}")').fetchall() + + columns: list[SqliteColumn] = [] + pk_columns: list[str] = [] + for row in pragma_rows: + columns.append( + SqliteColumn( + name=row["name"], + dataType=row["type"].lower() if row["type"] else "text", + isNullable=not bool(row["notnull"]), + hasDefaultValue=row["dflt_value"] is not None, + ) + ) + if row["pk"]: + pk_columns.append(row["name"]) + + schema = SqliteSchema( + name=table_name, + columns=columns, + primaryKey=pk_columns or None, + ) + + return convert_table_schema_from_database(schema) + finally: + conn.close() diff --git a/table/fairspec_table/plugins/sqlite/actions/table_schema/infer_spec.py b/table/fairspec_table/plugins/sqlite/actions/table_schema/infer_spec.py new file mode 100644 index 0000000..6197566 --- /dev/null +++ b/table/fairspec_table/plugins/sqlite/actions/table_schema/infer_spec.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +import polars as pl +import pytest +from fairspec_dataset import get_temp_file_path +from fairspec_metadata import Resource, SqliteFileDialect + +from fairspec_table.plugins.sqlite.actions.table.save import save_sqlite_table + +from .infer import infer_table_schema_from_sqlite + +DIALECT = SqliteFileDialect(tableName="fairspec") + + +class TestInferTableSchemaFromSqlite: + def test_should_infer_schema(self): + path = get_temp_file_path() + + source = pl.DataFrame( + [ + pl.Series("string", ["string"], dtype=pl.String), + pl.Series("integer", [1], dtype=pl.Int32), + pl.Series("number", [1.1], dtype=pl.Float64), + ] + ).lazy() + + save_sqlite_table( + source, path=path, fileDialect=DIALECT, overwrite=True + ) + + schema = infer_table_schema_from_sqlite( + Resource(data=path, fileDialect=DIALECT) + ) + properties = { + name: prop.model_dump(exclude_none=True) + for name, prop in schema["properties"].items() + } + + assert schema["required"] == [] + assert schema["primaryKey"] is None + assert properties == { + "string": {"type": ["string", "null"]}, + "integer": {"type": ["integer", "null"]}, + "number": {"type": ["number", "null"]}, + } + + def test_should_raise_error_when_resource_path_is_not_defined(self): + with pytest.raises(Exception, match="Database is not defined"): + infer_table_schema_from_sqlite( + Resource(fileDialect=SqliteFileDialect(tableName="fairspec")) + ) diff --git a/table/fairspec_table/plugins/sqlite/actions/table_schema/to_database.py b/table/fairspec_table/plugins/sqlite/actions/table_schema/to_database.py new file mode 100644 index 0000000..335ba25 --- /dev/null +++ b/table/fairspec_table/plugins/sqlite/actions/table_schema/to_database.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +from fairspec_metadata import TableSchema, get_columns + +from fairspec_table.plugins.sqlite.actions.column.to_database import ( + convert_column_to_database, +) +from fairspec_table.plugins.sqlite.models.schema import SqliteSchema + + +def convert_table_schema_to_database( + table_schema: TableSchema, table_name: str +) -> SqliteSchema: + schema = SqliteSchema(name=table_name, columns=[], isView=False) + + columns = get_columns(table_schema.model_dump()) + for column in columns: + is_nullable = ( + column.nullable if column.nullable is not None else not column.required + ) + database_column = convert_column_to_database(column, is_nullable) + schema.columns.append(database_column) + + if table_schema.primaryKey: + schema.primaryKey = table_schema.primaryKey + + return schema diff --git a/table/fairspec_table/plugins/sqlite/models/__init__.py b/table/fairspec_table/plugins/sqlite/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/sqlite/models/column.py b/table/fairspec_table/plugins/sqlite/models/column.py new file mode 100644 index 0000000..af4870a --- /dev/null +++ b/table/fairspec_table/plugins/sqlite/models/column.py @@ -0,0 +1,10 @@ +from fairspec_metadata.models.base import FairspecModel + + +class SqliteColumn(FairspecModel): + name: str + dataType: str + isNullable: bool = True + comment: str | None = None + isAutoIncrementing: bool = False + hasDefaultValue: bool = False diff --git a/table/fairspec_table/plugins/sqlite/models/schema.py b/table/fairspec_table/plugins/sqlite/models/schema.py new file mode 100644 index 0000000..43951f0 --- /dev/null +++ b/table/fairspec_table/plugins/sqlite/models/schema.py @@ -0,0 +1,10 @@ +from fairspec_metadata.models.base import FairspecModel + +from .column import SqliteColumn + + +class SqliteSchema(FairspecModel): + name: str + columns: list[SqliteColumn] + isView: bool = False + primaryKey: list[str] | None = None diff --git a/table/fairspec_table/plugins/sqlite/plugin.py b/table/fairspec_table/plugins/sqlite/plugin.py new file mode 100644 index 0000000..424154f --- /dev/null +++ b/table/fairspec_table/plugins/sqlite/plugin.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack, cast + +from fairspec_metadata import Resource, SqliteFileDialect, get_supported_file_dialect +from fairspec_metadata.models.file_dialect.file_dialect import FileDialect + +from fairspec_table.plugin import TablePlugin + +from .actions.table.load import load_sqlite_table +from .actions.table.save import save_sqlite_table + +if TYPE_CHECKING: + from fairspec_dataset.models.file_dialect import InferFileDialectOptions + + from fairspec_table.models.table import LoadTableOptions, SaveTableOptions, Table + + +class SqlitePlugin(TablePlugin): + def load_table( + self, + resource: Resource, + **options: Unpack[LoadTableOptions], + ) -> Table | None: + file_dialect = get_supported_file_dialect(resource, ["sqlite"]) + if not file_dialect: + return None + return load_sqlite_table(resource, **options) + + def save_table(self, table: Table, **options: Unpack[SaveTableOptions]) -> str | None: + resource = Resource( + data=options["path"], fileDialect=cast(FileDialect | None, options.get("fileDialect")) + ) + file_dialect = get_supported_file_dialect(resource, ["sqlite"]) + if not file_dialect: + return None + return save_sqlite_table(table, **options) + + def infer_file_dialect( + self, + resource: Resource, + **options: Unpack[InferFileDialectOptions], + ) -> FileDialect | None: + file_dialect = get_supported_file_dialect(resource, ["sqlite"]) + if not file_dialect: + return None + return SqliteFileDialect() diff --git a/table/fairspec_table/plugins/sqlite/plugin_spec.py b/table/fairspec_table/plugins/sqlite/plugin_spec.py new file mode 100644 index 0000000..167f10d --- /dev/null +++ b/table/fairspec_table/plugins/sqlite/plugin_spec.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import polars as pl +from fairspec_metadata import Resource, SqliteFileDialect + +from .plugin import SqlitePlugin + + +class TestSqlitePluginLoadTable: + def setup_method(self): + self.plugin = SqlitePlugin() + + @patch("fairspec_table.plugins.sqlite.plugin.load_sqlite_table") + def test_should_load_table_from_sqlite_file(self, mock_load: MagicMock): + resource = Resource(data="test.sqlite") + mock_table = pl.DataFrame().lazy() + mock_load.return_value = mock_table + + result = self.plugin.load_table(resource) + + assert result is mock_table + + @patch("fairspec_table.plugins.sqlite.plugin.load_sqlite_table") + def test_should_return_none_for_non_sqlite_files(self, mock_load: MagicMock): + resource = Resource(data="test.csv") + + result = self.plugin.load_table(resource) + + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_table.plugins.sqlite.plugin.load_sqlite_table") + def test_should_handle_explicit_sqlite_format(self, mock_load: MagicMock): + resource = Resource(data="test.db", fileDialect=SqliteFileDialect()) + mock_table = pl.DataFrame().lazy() + mock_load.return_value = mock_table + + result = self.plugin.load_table(resource) + + assert result is mock_table + + @patch("fairspec_table.plugins.sqlite.plugin.load_sqlite_table") + def test_should_pass_through_load_options(self, mock_load: MagicMock): + resource = Resource(data="test.sqlite") + mock_load.return_value = pl.DataFrame().lazy() + + self.plugin.load_table(resource, denormalized=True) + + mock_load.assert_called_once() + + @patch("fairspec_table.plugins.sqlite.plugin.load_sqlite_table") + def test_should_handle_paths_with_directories(self, mock_load: MagicMock): + resource = Resource(data="/path/to/data.sqlite") + mock_load.return_value = pl.DataFrame().lazy() + + self.plugin.load_table(resource) + + mock_load.assert_called_once() + + +class TestSqlitePluginSaveTable: + def setup_method(self): + self.plugin = SqlitePlugin() + + @patch("fairspec_table.plugins.sqlite.plugin.save_sqlite_table") + def test_should_save_table_to_sqlite_file(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "output.sqlite" + + result = self.plugin.save_table(table, path="output.sqlite") + + mock_save.assert_called_once_with(table, path="output.sqlite") + assert result == "output.sqlite" + + @patch("fairspec_table.plugins.sqlite.plugin.save_sqlite_table") + def test_should_return_none_for_non_sqlite_files(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + + result = self.plugin.save_table(table, path="output.csv") + + mock_save.assert_not_called() + assert result is None + + @patch("fairspec_table.plugins.sqlite.plugin.save_sqlite_table") + def test_should_handle_explicit_sqlite_format(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "output.db" + + result = self.plugin.save_table(table, path="output.db", fileDialect=SqliteFileDialect()) + + mock_save.assert_called_once_with(table, path="output.db", fileDialect=SqliteFileDialect()) + assert result == "output.db" + + @patch("fairspec_table.plugins.sqlite.plugin.save_sqlite_table") + def test_should_handle_paths_with_directories(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "/path/to/output.sqlite" + + self.plugin.save_table(table, path="/path/to/output.sqlite") + + mock_save.assert_called_once_with(table, path="/path/to/output.sqlite") + + @patch("fairspec_table.plugins.sqlite.plugin.save_sqlite_table") + def test_should_return_none_for_files_without_extension(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + + result = self.plugin.save_table(table, path="output") + + mock_save.assert_not_called() + assert result is None diff --git a/table/fairspec_table/plugins/sqlite/settings.py b/table/fairspec_table/plugins/sqlite/settings.py new file mode 100644 index 0000000..60505fd --- /dev/null +++ b/table/fairspec_table/plugins/sqlite/settings.py @@ -0,0 +1 @@ +NATIVE_TYPES = ["integer", "number", "string"] diff --git a/table/fairspec_table/plugins/xlsx/__init__.py b/table/fairspec_table/plugins/xlsx/__init__.py new file mode 100644 index 0000000..a33ad2e --- /dev/null +++ b/table/fairspec_table/plugins/xlsx/__init__.py @@ -0,0 +1,11 @@ +from .actions.file_dialect.infer import infer_xlsx_file_dialect +from .actions.table.load import load_xlsx_table +from .actions.table.save import save_xlsx_table +from .plugin import XlsxPlugin + +__all__ = [ + "XlsxPlugin", + "infer_xlsx_file_dialect", + "load_xlsx_table", + "save_xlsx_table", +] diff --git a/table/fairspec_table/plugins/xlsx/actions/__init__.py b/table/fairspec_table/plugins/xlsx/actions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/xlsx/actions/buffer/__init__.py b/table/fairspec_table/plugins/xlsx/actions/buffer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/xlsx/actions/buffer/decode.py b/table/fairspec_table/plugins/xlsx/actions/buffer/decode.py new file mode 100644 index 0000000..53ffeee --- /dev/null +++ b/table/fairspec_table/plugins/xlsx/actions/buffer/decode.py @@ -0,0 +1,135 @@ +from __future__ import annotations + +from io import BytesIO +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from fairspec_table.models.data import DataRow + +OFFICENS = "urn:oasis:names:tc:opendocument:xmlns:office:1.0" +TABLENS = "urn:oasis:names:tc:opendocument:xmlns:table:1.0" + + +def decode_xlsx_buffer( + data: bytes, + *, + format: str = "xlsx", + sheet_name: str | None = None, + sheet_number: int | None = None, +) -> list[DataRow]: + if format == "ods": + return _decode_ods(data, sheet_name=sheet_name, sheet_number=sheet_number) + return _decode_xlsx(data, sheet_name=sheet_name, sheet_number=sheet_number) + + +def _decode_xlsx( + data: bytes, + *, + sheet_name: str | None = None, + sheet_number: int | None = None, +) -> list[DataRow]: + import openpyxl + + wb = openpyxl.load_workbook(BytesIO(data), data_only=True) + ws = _select_openpyxl_sheet(wb, sheet_name, sheet_number) + if ws is None: + return [] + + rows: list[DataRow] = [] + for row_tuple in ws.iter_rows(values_only=True): # ty: ignore[unresolved-attribute] + row = list(row_tuple) + while row and row[-1] is None: + row.pop() + rows.append(row) + return rows + + +def _select_openpyxl_sheet( + wb: object, + sheet_name: str | None, + sheet_number: int | None, +) -> object | None: + if sheet_name is not None: + return wb[sheet_name] if sheet_name in wb.sheetnames else None # ty: ignore[unresolved-attribute, not-subscriptable] + index = (sheet_number - 1) if sheet_number else 0 + if 0 <= index < len(wb.sheetnames): # ty: ignore[unresolved-attribute] + return wb[wb.sheetnames[index]] # ty: ignore[unresolved-attribute, not-subscriptable] + return None + + +def _decode_ods( + data: bytes, + *, + sheet_name: str | None = None, + sheet_number: int | None = None, +) -> list[DataRow]: + from odf.opendocument import load as odf_load + from odf.table import Table as OdfTable + from odf.table import TableCell, TableRow + + doc = odf_load(BytesIO(data)) + sheets = doc.spreadsheet.getElementsByType(OdfTable) + + sheet = _select_ods_sheet(sheets, sheet_name, sheet_number) + if sheet is None: + return [] + + rows: list[DataRow] = [] + for table_row in sheet.getElementsByType(TableRow): # ty: ignore[unresolved-attribute] + row: DataRow = [] + for cell in table_row.getElementsByType(TableCell): + repeat_str = cell.getAttrNS(TABLENS, "number-columns-repeated") + repeat = int(repeat_str) if repeat_str else 1 + value = _get_ods_cell_value(cell) + row.extend([value] * min(repeat, 10000)) + + while row and row[-1] is None: + row.pop() + + rows.append(row) + + while rows and not rows[-1]: + rows.pop() + + return rows + + +def _select_ods_sheet( + sheets: list[object], + sheet_name: str | None, + sheet_number: int | None, +) -> object | None: + if sheet_name is not None: + for sheet in sheets: + if sheet.getAttribute("name") == sheet_name: # ty: ignore[unresolved-attribute] + return sheet + return None + index = (sheet_number - 1) if sheet_number else 0 + if 0 <= index < len(sheets): + return sheets[index] + return None + + +def _get_ods_cell_value(cell: object) -> object: + from odf.text import P + + value_type = cell.getAttrNS(OFFICENS, "value-type") # ty: ignore[unresolved-attribute] + if value_type == "float": + val = cell.getAttrNS(OFFICENS, "value") # ty: ignore[unresolved-attribute] + float_val = float(val) + if float_val == int(float_val): + return int(float_val) + return float_val + if value_type == "boolean": + return cell.getAttrNS(OFFICENS, "boolean-value") == "true" # ty: ignore[unresolved-attribute] + if value_type == "string": + ps = cell.getElementsByType(P) # ty: ignore[unresolved-attribute] + if ps: + return str(ps[0]) + return "" + if value_type is None: + return None + ps = cell.getElementsByType(P) # ty: ignore[unresolved-attribute] + if ps: + return str(ps[0]) + return None diff --git a/table/fairspec_table/plugins/xlsx/actions/buffer/encode.py b/table/fairspec_table/plugins/xlsx/actions/buffer/encode.py new file mode 100644 index 0000000..ef3d6f9 --- /dev/null +++ b/table/fairspec_table/plugins/xlsx/actions/buffer/encode.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +from io import BytesIO +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from fairspec_table.models.data import DataRow + + +def encode_xlsx_buffer( + rows: list[DataRow], + *, + sheet_name: str = "Sheet1", + book_type: str = "xlsx", +) -> bytes: + if book_type == "ods": + return _encode_ods(rows, sheet_name=sheet_name) + return _encode_xlsx(rows, sheet_name=sheet_name) + + +def _encode_xlsx(rows: list[DataRow], *, sheet_name: str) -> bytes: + import openpyxl + + wb = openpyxl.Workbook() + ws = wb.active + ws.title = sheet_name + for row in rows: + ws.append(row) + buf = BytesIO() + wb.save(buf) + return buf.getvalue() + + +def _encode_ods(rows: list[DataRow], *, sheet_name: str) -> bytes: + from odf.opendocument import OpenDocumentSpreadsheet + from odf.table import Table, TableRow + + doc = OpenDocumentSpreadsheet() + table = Table(name=sheet_name) + + for row_data in rows: + tr = TableRow() + for value in row_data: + tc = _create_ods_cell(value) + tr.addElement(tc) + table.addElement(tr) + + doc.spreadsheet.addElement(table) + buf = BytesIO() + doc.save(buf) + return buf.getvalue() + + +def _create_ods_cell(value: object) -> object: + from odf.table import TableCell + from odf.text import P + + if value is None: + return TableCell() + if isinstance(value, bool): + tc = TableCell(valuetype="boolean", booleanvalue=str(value).lower()) + tc.addElement(P(text=str(value).upper())) + return tc + if isinstance(value, int): + tc = TableCell(valuetype="float", value=str(value)) + tc.addElement(P(text=str(value))) + return tc + if isinstance(value, float): + tc = TableCell(valuetype="float", value=str(value)) + tc.addElement(P(text=str(value))) + return tc + tc = TableCell(valuetype="string") + tc.addElement(P(text=str(value))) + return tc diff --git a/table/fairspec_table/plugins/xlsx/actions/file_dialect/__init__.py b/table/fairspec_table/plugins/xlsx/actions/file_dialect/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/xlsx/actions/file_dialect/infer.py b/table/fairspec_table/plugins/xlsx/actions/file_dialect/infer.py new file mode 100644 index 0000000..a4381e2 --- /dev/null +++ b/table/fairspec_table/plugins/xlsx/actions/file_dialect/infer.py @@ -0,0 +1,80 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_dataset import load_file +from fairspec_metadata import ( + OdsFileDialect, + XlsxFileDialect, + get_data_first_path, + get_supported_file_dialect, +) + +from fairspec_table.plugins.xlsx.actions.buffer.decode import decode_xlsx_buffer +from fairspec_table.utils.sniffer.sniffer import Sniffer + +if TYPE_CHECKING: + from fairspec_metadata import Resource + + from fairspec_table.models.table import LoadTableOptions + + +def infer_xlsx_file_dialect( + resource: Resource, + options: LoadTableOptions | None = None, +) -> XlsxFileDialect | OdsFileDialect | None: + data_path = get_data_first_path(resource) + if not data_path: + return None + + file_dialect = get_supported_file_dialect(resource, ["xlsx", "ods"]) + if not file_dialect: + return None + + format: str = getattr(file_dialect, "format", "xlsx") + + try: + buffer = load_file(data_path) + except Exception: + return _make_dialect(format) + + try: + sheet_name: str | None = getattr(file_dialect, "sheetName", None) + sheet_number: int | None = getattr(file_dialect, "sheetNumber", None) + rows = decode_xlsx_buffer( + buffer, + format=format, + sheet_name=sheet_name, + sheet_number=sheet_number, + ) + except Exception: + return _make_dialect(format) + + if not rows: + return _make_dialect(format) + + sniffer = Sniffer() + try: + detection = sniffer.sniff_rows(rows[:100]) + except Exception: + return _make_dialect(format) + + header_rows: list[int] | bool | None = None + if detection.dialect.header.has_header_row: + header_rows = [detection.dialect.header.num_preamble_rows + 1] + elif detection.num_fields > 0: + header_rows = False + + return _make_dialect(format, header_rows) + + +def _make_dialect( + format: str, + header_rows: list[int] | bool | None = None, +) -> XlsxFileDialect | OdsFileDialect: + kwargs: dict[str, object] = {} + if header_rows is not None: + kwargs["headerRows"] = header_rows + if format == "ods": + return OdsFileDialect(**kwargs) # type: ignore[arg-type] + return XlsxFileDialect(**kwargs) # type: ignore[arg-type] diff --git a/table/fairspec_table/plugins/xlsx/actions/file_dialect/infer_spec.py b/table/fairspec_table/plugins/xlsx/actions/file_dialect/infer_spec.py new file mode 100644 index 0000000..c644539 --- /dev/null +++ b/table/fairspec_table/plugins/xlsx/actions/file_dialect/infer_spec.py @@ -0,0 +1,151 @@ +from __future__ import annotations + +from io import BytesIO + +from fairspec_dataset import write_temp_file +from fairspec_metadata import CsvFileDialect, OdsFileDialect, Resource, XlsxFileDialect + +from fairspec_table.plugins.xlsx.actions.buffer.encode import encode_xlsx_buffer + +from .infer import infer_xlsx_file_dialect + + +def _create_xlsx_file(rows: list[list[object]]) -> str: + buffer = encode_xlsx_buffer(rows, book_type="xlsx") + return write_temp_file(buffer, format="xlsx") + + +class TestInferXlsxFileDialect: + def test_should_detect_header_row_with_text_headers(self): + path = _create_xlsx_file( + [ + ["id", "name", "age"], + [1, "Alice", 25], + [2, "Bob", 30], + ] + ) + + file_dialect = infer_xlsx_file_dialect(Resource(data=path)) + + assert file_dialect == XlsxFileDialect(headerRows=[1]) + + def test_should_not_detect_header_when_first_row_is_numeric(self): + path = _create_xlsx_file( + [ + [1, 2, 3], + [4, 5, 6], + [7, 8, 9], + ] + ) + + file_dialect = infer_xlsx_file_dialect(Resource(data=path)) + + assert file_dialect == XlsxFileDialect(headerRows=False) + + def test_should_detect_header_with_mixed_case_and_underscores(self): + path = _create_xlsx_file( + [ + ["user_id", "User_Name", "EmailAddress"], + [1, "alice", "alice@example.com"], + [2, "bob", "bob@example.com"], + ] + ) + + file_dialect = infer_xlsx_file_dialect(Resource(data=path)) + + assert file_dialect == XlsxFileDialect(headerRows=[1]) + + def test_should_not_detect_header_with_boolean_values_in_first_row(self): + path = _create_xlsx_file( + [ + ["someId", 37257, 695.8, False, "2024-01-01"], + ["anotherId", 68694, 337.73, True, "2024-01-02"], + ["thirdId", 52019, 988.74, False, "2024-01-03"], + ] + ) + + file_dialect = infer_xlsx_file_dialect(Resource(data=path)) + + assert file_dialect == XlsxFileDialect(headerRows=False) + + def test_should_handle_single_row_files(self): + path = _create_xlsx_file([["id", "name", "age"]]) + + file_dialect = infer_xlsx_file_dialect(Resource(data=path)) + + assert file_dialect == XlsxFileDialect(headerRows=False) + + def test_should_handle_empty_files(self): + path = _create_xlsx_file([]) + + file_dialect = infer_xlsx_file_dialect(Resource(data=path)) + + assert file_dialect == XlsxFileDialect() + + def test_should_return_none_for_incompatible_format(self): + resource = Resource(data="test.csv", fileDialect=CsvFileDialect()) + + file_dialect = infer_xlsx_file_dialect(resource) + + assert file_dialect is None + + def test_should_return_none_for_resources_without_path(self): + resource = Resource(data=[{"id": 1, "name": "alice"}]) + + file_dialect = infer_xlsx_file_dialect(resource) + + assert file_dialect is None + + def test_should_respect_sheet_number_from_existing_dialect(self): + import openpyxl + + wb = openpyxl.Workbook() + wb.remove(wb.active) + ws1 = wb.create_sheet("Sheet1") + for row in [[1, 2, 3], [4, 5, 6]]: + ws1.append(row) + ws2 = wb.create_sheet("Sheet2") + for row in [["id", "name"], [1, "Alice"], [2, "Bob"]]: + ws2.append(row) + buf = BytesIO() + wb.save(buf) + path = write_temp_file(buf.getvalue()) + + file_dialect = infer_xlsx_file_dialect( + Resource(data=path, fileDialect=XlsxFileDialect(sheetNumber=2)) + ) + + assert file_dialect == XlsxFileDialect(headerRows=[1]) + + def test_should_respect_sheet_name_from_existing_dialect(self): + import openpyxl + + wb = openpyxl.Workbook() + wb.remove(wb.active) + ws1 = wb.create_sheet("Data") + ws1.append([1, 2, 3]) + ws2 = wb.create_sheet("Headers") + for row in [["id", "name"], [1, "Alice"]]: + ws2.append(row) + buf = BytesIO() + wb.save(buf) + path = write_temp_file(buf.getvalue()) + + file_dialect = infer_xlsx_file_dialect( + Resource(data=path, fileDialect=XlsxFileDialect(sheetName="Headers")) + ) + + assert file_dialect == XlsxFileDialect(headerRows=[1]) + + def test_should_support_ods_format(self): + buffer = encode_xlsx_buffer( + [["id", "name", "age"], [1, "Alice", 25], [2, "Bob", 30]], + book_type="ods", + ) + path = write_temp_file(buffer) + + file_dialect = infer_xlsx_file_dialect( + Resource(data=path, fileDialect=OdsFileDialect()) + ) + + assert file_dialect == OdsFileDialect(headerRows=[1]) diff --git a/table/fairspec_table/plugins/xlsx/actions/table/__init__.py b/table/fairspec_table/plugins/xlsx/actions/table/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/plugins/xlsx/actions/table/load.py b/table/fairspec_table/plugins/xlsx/actions/table/load.py new file mode 100644 index 0000000..c9c9000 --- /dev/null +++ b/table/fairspec_table/plugins/xlsx/actions/table/load.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack + +import polars as pl + +from fairspec_dataset import load_file, prefetch_files +from fairspec_metadata import Resource, get_supported_file_dialect, resolve_table_schema + +from fairspec_table.actions.data.file_dialect import get_records_from_rows +from fairspec_table.actions.table.normalize import normalize_table +from fairspec_table.actions.table_schema.infer import infer_table_schema_from_table +from fairspec_table.plugins.xlsx.actions.buffer.decode import decode_xlsx_buffer +from fairspec_table.plugins.xlsx.actions.file_dialect.infer import ( + infer_xlsx_file_dialect, +) + +if TYPE_CHECKING: + from fairspec_metadata.models.file_dialect.file_dialect import FileDialect + + from fairspec_table.models.table import LoadTableOptions, Table + + +def load_xlsx_table( + resource: Resource, **options: Unpack[LoadTableOptions] +) -> Table: + file_dialect = get_supported_file_dialect(resource, ["xlsx", "ods"]) + if not file_dialect: + raise Exception("Resource data is not compatible") + + paths = prefetch_files(resource) + if not paths: + raise Exception("Resource path is not defined") + + if _dialect_has_only_format(file_dialect): + inferred = infer_xlsx_file_dialect( + resource.model_copy(update={"data": paths[0]}) + ) + if inferred: + file_dialect = inferred + + format = getattr(file_dialect, "format", "xlsx") + sheet_name = getattr(file_dialect, "sheetName", None) + sheet_number = getattr(file_dialect, "sheetNumber", None) + + tables: list[Table] = [] + for path in paths: + buffer = load_file(path) + rows = decode_xlsx_buffer( + buffer, + format=format, + sheet_name=sheet_name, + sheet_number=sheet_number, + ) + if rows: + records = get_records_from_rows(rows, file_dialect) # type: ignore[arg-type] + table = pl.DataFrame(records).lazy() + tables.append(table) + + result = pl.concat(tables) + + if not options.get("denormalized"): + table_schema = resolve_table_schema(resource.tableSchema) + if not table_schema: + table_schema = infer_table_schema_from_table(result, **options) + result = normalize_table(result, table_schema) + + return result + + +def _dialect_has_only_format(dialect: FileDialect) -> bool: + keys = { + k + for k in type(dialect).model_fields + if getattr(dialect, k, None) is not None + } + meaningful = keys - {"format", "type", "title", "description"} + return len(meaningful) == 0 diff --git a/table/fairspec_table/plugins/xlsx/actions/table/load_spec.py b/table/fairspec_table/plugins/xlsx/actions/table/load_spec.py new file mode 100644 index 0000000..8bdcc2f --- /dev/null +++ b/table/fairspec_table/plugins/xlsx/actions/table/load_spec.py @@ -0,0 +1,294 @@ +from __future__ import annotations + +import polars as pl +from fairspec_dataset import get_temp_file_path +from fairspec_metadata import OdsFileDialect, Resource, XlsxFileDialect +from .load import load_xlsx_table +from .test import write_test_data + +ROW1 = ["id", "name"] +ROW2 = [1, "english"] +ROW3 = [2, "中文"] + +RECORD1 = {"id": 1, "name": "english"} +RECORD2 = {"id": 2, "name": "中文"} + + +class TestLoadXlsxTableXlsx: + def test_should_load_local_file(self): + path = get_temp_file_path() + write_test_data(path, [ROW1, ROW2, ROW3]) + + table = load_xlsx_table(Resource(data=path, fileDialect=XlsxFileDialect())) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [RECORD1, RECORD2] + + def test_should_load_local_file_multipart(self): + path1 = get_temp_file_path() + path2 = get_temp_file_path() + write_test_data(path1, [ROW1, ROW2, ROW3]) + write_test_data(path2, [ROW1, ROW2, ROW3]) + + table = load_xlsx_table( + Resource(data=[path1, path2], fileDialect=XlsxFileDialect()) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [RECORD1, RECORD2, RECORD1, RECORD2] + + def test_should_support_sheet_number(self): + path = get_temp_file_path() + write_test_data(path, [ROW1, ROW2, ROW3], sheet_number=2) + + table = load_xlsx_table( + Resource(data=path, fileDialect=XlsxFileDialect(sheetNumber=2)) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [RECORD1, RECORD2] + + def test_should_support_sheet_name(self): + path = get_temp_file_path() + write_test_data(path, [ROW1, ROW2, ROW3], sheet_name="Sheet2") + + table = load_xlsx_table( + Resource(data=path, fileDialect=XlsxFileDialect(sheetName="Sheet2")) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [RECORD1, RECORD2] + + def test_should_support_no_header(self): + path = get_temp_file_path() + write_test_data(path, [ROW2, ROW3]) + + table = load_xlsx_table( + Resource(data=path, fileDialect=XlsxFileDialect(headerRows=False)) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"column1": 1, "column2": "english"}, + {"column1": 2, "column2": "中文"}, + ] + + def test_should_support_header_rows_offset(self): + path = get_temp_file_path() + write_test_data(path, [ROW1, ROW2, ROW3]) + + table = load_xlsx_table( + Resource(data=path, fileDialect=XlsxFileDialect(headerRows=[2])) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [{"1": 2, "english": "中文"}] + + def test_should_support_multiline_header_rows(self): + path = get_temp_file_path() + write_test_data(path, [ROW1, ROW2, ROW3]) + + table = load_xlsx_table( + Resource(data=path, fileDialect=XlsxFileDialect(headerRows=[1, 2])) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [{"id 1": 2, "name english": "中文"}] + + def test_should_support_header_join(self): + path = get_temp_file_path() + write_test_data(path, [ROW1, ROW2, ROW3]) + + table = load_xlsx_table( + Resource( + data=path, + fileDialect=XlsxFileDialect(headerRows=[1, 2], headerJoin="-"), + ) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [{"id-1": 2, "name-english": "中文"}] + + def test_should_support_comment_rows(self): + path = get_temp_file_path() + write_test_data(path, [ROW1, ROW2, ROW3]) + + table = load_xlsx_table( + Resource(data=path, fileDialect=XlsxFileDialect(commentRows=[2])) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [RECORD2] + + def test_should_support_comment_prefix(self): + path = get_temp_file_path() + write_test_data(path, [ROW1, ROW2, ROW3, ["#comment"]]) + + table = load_xlsx_table( + Resource(data=path, fileDialect=XlsxFileDialect(commentPrefix="#")) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [RECORD1, RECORD2] + + def test_should_handle_longer_rows(self): + path = get_temp_file_path() + write_test_data(path, [ROW1, ROW2, ROW3, [3, "german", "bad"]]) + + table = load_xlsx_table( + Resource(data=path, fileDialect=XlsxFileDialect(commentPrefix="#")) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [RECORD1, RECORD2, {"id": 3, "name": "german"}] + + def test_should_handle_shorter_rows(self): + path = get_temp_file_path() + write_test_data(path, [ROW1, ROW2, ROW3, [3]]) + + table = load_xlsx_table( + Resource(data=path, fileDialect=XlsxFileDialect(commentPrefix="#")) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [RECORD1, RECORD2, {"id": 3, "name": None}] + + +class TestLoadXlsxTableOds: + def test_should_load_local_file(self): + path = get_temp_file_path() + write_test_data(path, [ROW1, ROW2, ROW3], format="ods") + + table = load_xlsx_table(Resource(data=path, fileDialect=OdsFileDialect())) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [RECORD1, RECORD2] + + def test_should_load_local_file_multipart(self): + path1 = get_temp_file_path() + path2 = get_temp_file_path() + write_test_data(path1, [ROW1, ROW2, ROW3], format="ods") + write_test_data(path2, [ROW1, ROW2, ROW3], format="ods") + + table = load_xlsx_table( + Resource(data=[path1, path2], fileDialect=OdsFileDialect()) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [RECORD1, RECORD2, RECORD1, RECORD2] + + def test_should_support_sheet_number(self): + path = get_temp_file_path() + write_test_data(path, [ROW1, ROW2, ROW3], sheet_number=2, format="ods") + + table = load_xlsx_table( + Resource(data=path, fileDialect=OdsFileDialect(sheetNumber=2)) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [RECORD1, RECORD2] + + def test_should_support_sheet_name(self): + path = get_temp_file_path() + write_test_data(path, [ROW1, ROW2, ROW3], sheet_name="Sheet2", format="ods") + + table = load_xlsx_table( + Resource(data=path, fileDialect=OdsFileDialect(sheetName="Sheet2")) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [RECORD1, RECORD2] + + def test_should_support_no_header(self): + path = get_temp_file_path() + write_test_data(path, [ROW2, ROW3], format="ods") + + table = load_xlsx_table( + Resource(data=path, fileDialect=OdsFileDialect(headerRows=False)) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + {"column1": 1, "column2": "english"}, + {"column1": 2, "column2": "中文"}, + ] + + def test_should_support_header_rows_offset(self): + path = get_temp_file_path() + write_test_data(path, [ROW1, ROW2, ROW3], format="ods") + + table = load_xlsx_table( + Resource(data=path, fileDialect=OdsFileDialect(headerRows=[2])) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [{"1": 2, "english": "中文"}] + + def test_should_support_multiline_header_rows(self): + path = get_temp_file_path() + write_test_data(path, [ROW1, ROW2, ROW3], format="ods") + + table = load_xlsx_table( + Resource(data=path, fileDialect=OdsFileDialect(headerRows=[1, 2])) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [{"id 1": 2, "name english": "中文"}] + + def test_should_support_header_join(self): + path = get_temp_file_path() + write_test_data(path, [ROW1, ROW2, ROW3], format="ods") + + table = load_xlsx_table( + Resource( + data=path, + fileDialect=OdsFileDialect(headerRows=[1, 2], headerJoin="-"), + ) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [{"id-1": 2, "name-english": "中文"}] + + def test_should_support_comment_rows(self): + path = get_temp_file_path() + write_test_data(path, [ROW1, ROW2, ROW3], format="ods") + + table = load_xlsx_table( + Resource(data=path, fileDialect=OdsFileDialect(commentRows=[2])) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [RECORD2] + + def test_should_support_comment_prefix(self): + path = get_temp_file_path() + write_test_data(path, [ROW1, ROW2, ROW3, ["#comment"]], format="ods") + + table = load_xlsx_table( + Resource(data=path, fileDialect=OdsFileDialect(commentPrefix="#")) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [RECORD1, RECORD2] + + def test_should_handle_longer_rows(self): + path = get_temp_file_path() + write_test_data(path, [ROW1, ROW2, ROW3, [3, "german", "bad"]], format="ods") + + table = load_xlsx_table( + Resource(data=path, fileDialect=OdsFileDialect(commentPrefix="#")) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [RECORD1, RECORD2, {"id": 3, "name": "german"}] + + def test_should_handle_shorter_rows(self): + path = get_temp_file_path() + write_test_data(path, [ROW1, ROW2, ROW3, [3]], format="ods") + + table = load_xlsx_table( + Resource(data=path, fileDialect=OdsFileDialect(commentPrefix="#")) + ) + frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [RECORD1, RECORD2, {"id": 3, "name": None}] diff --git a/table/fairspec_table/plugins/xlsx/actions/table/save.py b/table/fairspec_table/plugins/xlsx/actions/table/save.py new file mode 100644 index 0000000..266921a --- /dev/null +++ b/table/fairspec_table/plugins/xlsx/actions/table/save.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack, cast + +import polars as pl + +from fairspec_dataset import assert_local_path_vacant, save_file +from fairspec_metadata import Resource, TableSchema, get_supported_file_dialect + +from fairspec_table.actions.table.denormalize import denormalize_table +from fairspec_table.actions.table_schema.infer import infer_table_schema_from_table +from fairspec_table.plugins.xlsx.actions.buffer.encode import encode_xlsx_buffer +from fairspec_table.plugins.xlsx.settings import NATIVE_TYPES + +if TYPE_CHECKING: + from fairspec_table.models.table import SaveTableOptions, Table + + +def save_xlsx_table(table: Table, **options: Unpack[SaveTableOptions]) -> str: + path = options["path"] + + if not options.get("overwrite"): + assert_local_path_vacant(path) + + resource = Resource(data=path, fileDialect=options.get("fileDialect")) + file_dialect = get_supported_file_dialect(resource, ["xlsx", "ods"]) + if not file_dialect: + raise Exception("Saving options is not compatible") + + table_schema = options.get("tableSchema") + if not isinstance(table_schema, TableSchema): + table_schema = infer_table_schema_from_table( + table, **options, keepStrings=True + ) + + table = denormalize_table(table, table_schema, nativeTypes=NATIVE_TYPES) + + frame = cast("pl.DataFrame", table.collect()) + sheet_name = getattr(file_dialect, "sheetName", None) or "Sheet1" + format = getattr(file_dialect, "format", "xlsx") + book_type = "ods" if format == "ods" else "xlsx" + + records = frame.to_dicts() + rows: list[list[object]] = [] + if records: + rows.append(list(records[0].keys())) + for record in records: + rows.append(list(record.values())) + + buffer = encode_xlsx_buffer(rows, sheet_name=sheet_name, book_type=book_type) + save_file(path, buffer, overwrite=bool(options.get("overwrite"))) + + return path diff --git a/table/fairspec_table/plugins/xlsx/actions/table/save_spec.py b/table/fairspec_table/plugins/xlsx/actions/table/save_spec.py new file mode 100644 index 0000000..ac26da2 --- /dev/null +++ b/table/fairspec_table/plugins/xlsx/actions/table/save_spec.py @@ -0,0 +1,99 @@ +from __future__ import annotations + +import polars as pl +from fairspec_dataset import get_temp_file_path +from fairspec_metadata import OdsFileDialect, Resource, XlsxFileDialect + +from .load import load_xlsx_table +from .save import save_xlsx_table +from .test import read_test_data + +ROW1 = {"id": 1, "name": "english"} +ROW2 = {"id": 2, "name": "中文"} +TABLE = pl.DataFrame([ROW1, ROW2]).lazy() + + +class TestSaveXlsxTableXlsx: + def test_should_save_table_to_file(self): + path = get_temp_file_path() + + save_xlsx_table(TABLE, path=path, fileDialect=XlsxFileDialect()) + + data = read_test_data(path) + assert data == [ROW1, ROW2] + + def test_should_save_and_load_various_data_types(self): + path = get_temp_file_path(format="xlsx") + + source = pl.DataFrame( + [ + pl.Series("boolean", [True], dtype=pl.Boolean), + pl.Series("date", ["2025-01-01"], dtype=pl.String), + pl.Series("datetime", ["2025-01-01T00:00:00"], dtype=pl.String), + pl.Series("integer", [1], dtype=pl.Int32), + pl.Series("number", [1.1], dtype=pl.Float64), + pl.Series("string", ["string"], dtype=pl.String), + ] + ).lazy() + + save_xlsx_table(source, path=path, fileDialect=XlsxFileDialect()) + + target = load_xlsx_table( + Resource(data=path, fileDialect=XlsxFileDialect()), + denormalized=True, + ) + frame: pl.DataFrame = target.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + { + "boolean": True, + "date": "2025-01-01", + "datetime": "2025-01-01T00:00:00", + "integer": 1, + "number": 1.1, + "string": "string", + } + ] + + +class TestSaveXlsxTableOds: + def test_should_save_table_to_file(self): + path = get_temp_file_path() + + save_xlsx_table(TABLE, path=path, fileDialect=OdsFileDialect()) + + data = read_test_data(path) + assert data == [ROW1, ROW2] + + def test_should_save_and_load_various_data_types(self): + path = get_temp_file_path(format="ods") + + source = pl.DataFrame( + [ + pl.Series("boolean", [True], dtype=pl.Boolean), + pl.Series("date", ["2025-01-01"], dtype=pl.String), + pl.Series("datetime", ["2025-01-01T00:00:00"], dtype=pl.String), + pl.Series("integer", [1], dtype=pl.Int32), + pl.Series("number", [1.1], dtype=pl.Float64), + pl.Series("string", ["string"], dtype=pl.String), + ] + ).lazy() + + save_xlsx_table(source, path=path, fileDialect=OdsFileDialect()) + + target = load_xlsx_table( + Resource(data=path, fileDialect=OdsFileDialect()), + denormalized=True, + ) + frame: pl.DataFrame = target.collect() # ty: ignore[invalid-assignment] + + assert frame.to_dicts() == [ + { + "boolean": True, + "date": "2025-01-01", + "datetime": "2025-01-01T00:00:00", + "integer": 1, + "number": 1.1, + "string": "string", + } + ] diff --git a/table/fairspec_table/plugins/xlsx/actions/table/test.py b/table/fairspec_table/plugins/xlsx/actions/table/test.py new file mode 100644 index 0000000..85a7644 --- /dev/null +++ b/table/fairspec_table/plugins/xlsx/actions/table/test.py @@ -0,0 +1,153 @@ +from __future__ import annotations + +from io import BytesIO + + +def read_test_data(path: str) -> list[dict[str, object]]: + try: + return _read_xlsx_test_data(path) + except Exception: + return _read_ods_test_data(path) + + +def write_test_data( + path: str, + rows: list[list[object]], + *, + sheet_number: int = 1, + sheet_name: str | None = None, + format: str = "xlsx", +) -> None: + if format == "ods": + _write_ods_test_data( + path, rows, sheet_number=sheet_number, sheet_name=sheet_name + ) + else: + _write_xlsx_test_data( + path, rows, sheet_number=sheet_number, sheet_name=sheet_name + ) + + +def _read_xlsx_test_data(path: str) -> list[dict[str, object]]: + import openpyxl + + with open(path, "rb") as f: + data = f.read() + wb = openpyxl.load_workbook(BytesIO(data), data_only=True) + ws = wb[wb.sheetnames[0]] + all_rows = list(ws.iter_rows(values_only=True)) + if not all_rows: + return [] + + headers = [str(h) for h in all_rows[0]] + records: list[dict[str, object]] = [] + for row in all_rows[1:]: + record: dict[str, object] = {} + for i, h in enumerate(headers): + record[h] = row[i] if i < len(row) else None + records.append(record) + return records + + +def _read_ods_test_data(path: str) -> list[dict[str, object]]: + from odf.opendocument import load as odf_load + from odf.table import Table as OdfTable + from odf.table import TableCell, TableRow + from odf.text import P + + OFFICENS = "urn:oasis:names:tc:opendocument:xmlns:office:1.0" + TABLENS = "urn:oasis:names:tc:opendocument:xmlns:table:1.0" + + doc = odf_load(path) + sheets = doc.spreadsheet.getElementsByType(OdfTable) + if not sheets: + return [] + + sheet = sheets[0] + all_rows: list[list[object]] = [] + + for table_row in sheet.getElementsByType(TableRow): + row: list[object] = [] + for cell in table_row.getElementsByType(TableCell): + repeat_str = cell.getAttrNS(TABLENS, "number-columns-repeated") + repeat = int(repeat_str) if repeat_str else 1 + value_type = cell.getAttrNS(OFFICENS, "value-type") + value: object = None + if value_type == "float": + val = cell.getAttrNS(OFFICENS, "value") + float_val = float(val) + value = int(float_val) if float_val == int(float_val) else float_val + elif value_type == "boolean": + value = cell.getAttrNS(OFFICENS, "boolean-value") == "true" + elif value_type == "string": + ps = cell.getElementsByType(P) + value = str(ps[0]) if ps else "" + row.extend([value] * min(repeat, 10000)) + + while row and row[-1] is None: + row.pop() + all_rows.append(row) + + while all_rows and not all_rows[-1]: + all_rows.pop() + + if not all_rows: + return [] + + headers = [str(h) for h in all_rows[0]] + records: list[dict[str, object]] = [] + for row in all_rows[1:]: + record: dict[str, object] = {} + for i, h in enumerate(headers): + record[h] = row[i] if i < len(row) else None + records.append(record) + return records + + +def _write_xlsx_test_data( + path: str, + rows: list[list[object]], + *, + sheet_number: int, + sheet_name: str | None, +) -> None: + import openpyxl + + wb = openpyxl.Workbook() + wb.remove(wb.active) + + for i in range(sheet_number): + name = sheet_name or f"Sheet{i + 1}" + ws = wb.create_sheet(title=name) + for row in rows: + ws.append(row) + + wb.save(path) + + +def _write_ods_test_data( + path: str, + rows: list[list[object]], + *, + sheet_number: int, + sheet_name: str | None, +) -> None: + from odf.opendocument import OpenDocumentSpreadsheet + from odf.table import Table, TableRow + + from fairspec_table.plugins.xlsx.actions.buffer.encode import _create_ods_cell + + doc = OpenDocumentSpreadsheet() + + for i in range(sheet_number): + name = sheet_name or f"Sheet{i + 1}" + table = Table(name=name) + for row_data in rows: + tr = TableRow() + for value in row_data: + tc = _create_ods_cell(value) + tr.addElement(tc) + table.addElement(tr) + doc.spreadsheet.addElement(table) + + doc.save(path) diff --git a/table/fairspec_table/plugins/xlsx/plugin.py b/table/fairspec_table/plugins/xlsx/plugin.py new file mode 100644 index 0000000..13b22a6 --- /dev/null +++ b/table/fairspec_table/plugins/xlsx/plugin.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Unpack, cast + +from fairspec_metadata import Resource, get_supported_file_dialect +from fairspec_metadata.models.file_dialect.file_dialect import FileDialect + +from fairspec_table.plugin import TablePlugin + +from .actions.file_dialect.infer import infer_xlsx_file_dialect +from .actions.table.load import load_xlsx_table +from .actions.table.save import save_xlsx_table + +if TYPE_CHECKING: + from fairspec_dataset.models.file_dialect import InferFileDialectOptions + + from fairspec_table.models.table import LoadTableOptions, SaveTableOptions, Table + + +class XlsxPlugin(TablePlugin): + def load_table( + self, + resource: Resource, + **options: Unpack[LoadTableOptions], + ) -> Table | None: + file_dialect = get_supported_file_dialect(resource, ["xlsx", "ods"]) + if not file_dialect: + return None + + return load_xlsx_table( + resource.model_copy(update={"fileDialect": file_dialect}), **options + ) + + def save_table(self, table: Table, **options: Unpack[SaveTableOptions]) -> str | None: + resource = Resource( + data=options["path"], fileDialect=cast(FileDialect | None, options.get("fileDialect")) + ) + file_dialect = get_supported_file_dialect(resource, ["xlsx", "ods"]) + if not file_dialect: + return None + return save_xlsx_table(table, **options) + + def infer_file_dialect( + self, + resource: Resource, + **options: Unpack[InferFileDialectOptions], + ) -> FileDialect | None: + file_dialect = get_supported_file_dialect(resource, ["xlsx", "ods"]) + if not file_dialect: + return None + return infer_xlsx_file_dialect(resource) diff --git a/table/fairspec_table/plugins/xlsx/plugin_spec.py b/table/fairspec_table/plugins/xlsx/plugin_spec.py new file mode 100644 index 0000000..a576564 --- /dev/null +++ b/table/fairspec_table/plugins/xlsx/plugin_spec.py @@ -0,0 +1,197 @@ +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import polars as pl +from fairspec_metadata import OdsFileDialect, Resource, XlsxFileDialect + +from .plugin import XlsxPlugin + + +class TestXlsxPluginLoadTable: + def setup_method(self): + self.plugin = XlsxPlugin() + + @patch("fairspec_table.plugins.xlsx.plugin.load_xlsx_table") + def test_should_load_table_from_xlsx_file(self, mock_load: MagicMock): + resource = Resource(data="test.xlsx") + mock_table = pl.DataFrame().lazy() + mock_load.return_value = mock_table + + result = self.plugin.load_table(resource) + + assert result is mock_table + + @patch("fairspec_table.plugins.xlsx.plugin.load_xlsx_table") + def test_should_handle_explicit_xlsx_format(self, mock_load: MagicMock): + resource = Resource(data="test.txt", fileDialect=XlsxFileDialect()) + mock_table = pl.DataFrame().lazy() + mock_load.return_value = mock_table + + result = self.plugin.load_table(resource) + + assert result is mock_table + + @patch("fairspec_table.plugins.xlsx.plugin.load_xlsx_table") + def test_should_pass_through_load_options(self, mock_load: MagicMock): + resource = Resource(data="test.xlsx") + mock_load.return_value = pl.DataFrame().lazy() + + self.plugin.load_table(resource, denormalized=True) + + mock_load.assert_called_once() + + @patch("fairspec_table.plugins.xlsx.plugin.load_xlsx_table") + def test_should_handle_paths_with_directories(self, mock_load: MagicMock): + resource = Resource(data="/path/to/data.xlsx") + mock_load.return_value = pl.DataFrame().lazy() + + self.plugin.load_table(resource) + + mock_load.assert_called_once() + + @patch("fairspec_table.plugins.xlsx.plugin.load_xlsx_table") + def test_should_load_table_from_ods_file(self, mock_load: MagicMock): + resource = Resource(data="test.ods") + mock_table = pl.DataFrame().lazy() + mock_load.return_value = mock_table + + result = self.plugin.load_table(resource) + + assert result is mock_table + + @patch("fairspec_table.plugins.xlsx.plugin.load_xlsx_table") + def test_should_handle_explicit_ods_format(self, mock_load: MagicMock): + resource = Resource(data="test.txt", fileDialect=OdsFileDialect()) + mock_table = pl.DataFrame().lazy() + mock_load.return_value = mock_table + + result = self.plugin.load_table(resource) + + assert result is mock_table + + @patch("fairspec_table.plugins.xlsx.plugin.load_xlsx_table") + def test_should_pass_through_ods_load_options(self, mock_load: MagicMock): + resource = Resource(data="test.ods") + mock_load.return_value = pl.DataFrame().lazy() + + self.plugin.load_table(resource, denormalized=True) + + mock_load.assert_called_once() + + @patch("fairspec_table.plugins.xlsx.plugin.load_xlsx_table") + def test_should_handle_ods_paths_with_directories(self, mock_load: MagicMock): + resource = Resource(data="/path/to/data.ods") + mock_load.return_value = pl.DataFrame().lazy() + + self.plugin.load_table(resource) + + mock_load.assert_called_once() + + @patch("fairspec_table.plugins.xlsx.plugin.load_xlsx_table") + def test_should_return_none_for_non_xlsx_ods_files(self, mock_load: MagicMock): + resource = Resource(data="test.csv") + + result = self.plugin.load_table(resource) + + mock_load.assert_not_called() + assert result is None + + @patch("fairspec_table.plugins.xlsx.plugin.load_xlsx_table") + def test_should_return_none_for_json_files(self, mock_load: MagicMock): + resource = Resource(data="test.json") + + result = self.plugin.load_table(resource) + + mock_load.assert_not_called() + assert result is None + + +class TestXlsxPluginSaveTable: + def setup_method(self): + self.plugin = XlsxPlugin() + + @patch("fairspec_table.plugins.xlsx.plugin.save_xlsx_table") + def test_should_save_table_to_xlsx_file(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "output.xlsx" + + result = self.plugin.save_table(table, path="output.xlsx") + + mock_save.assert_called_once_with(table, path="output.xlsx") + assert result == "output.xlsx" + + @patch("fairspec_table.plugins.xlsx.plugin.save_xlsx_table") + def test_should_handle_explicit_xlsx_format(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "output.txt" + + result = self.plugin.save_table(table, path="output.txt", fileDialect=XlsxFileDialect()) + + mock_save.assert_called_once_with(table, path="output.txt", fileDialect=XlsxFileDialect()) + assert result == "output.txt" + + @patch("fairspec_table.plugins.xlsx.plugin.save_xlsx_table") + def test_should_handle_paths_with_directories(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "/path/to/output.xlsx" + + self.plugin.save_table(table, path="/path/to/output.xlsx") + + mock_save.assert_called_once_with(table, path="/path/to/output.xlsx") + + @patch("fairspec_table.plugins.xlsx.plugin.save_xlsx_table") + def test_should_save_table_to_ods_file(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "output.ods" + + result = self.plugin.save_table(table, path="output.ods") + + mock_save.assert_called_once_with(table, path="output.ods") + assert result == "output.ods" + + @patch("fairspec_table.plugins.xlsx.plugin.save_xlsx_table") + def test_should_handle_explicit_ods_format(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "output.txt" + + result = self.plugin.save_table(table, path="output.txt", fileDialect=OdsFileDialect()) + + mock_save.assert_called_once_with(table, path="output.txt", fileDialect=OdsFileDialect()) + assert result == "output.txt" + + @patch("fairspec_table.plugins.xlsx.plugin.save_xlsx_table") + def test_should_handle_ods_paths_with_directories(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + mock_save.return_value = "/path/to/output.ods" + + self.plugin.save_table(table, path="/path/to/output.ods") + + mock_save.assert_called_once_with(table, path="/path/to/output.ods") + + @patch("fairspec_table.plugins.xlsx.plugin.save_xlsx_table") + def test_should_return_none_for_non_xlsx_ods_files(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + + result = self.plugin.save_table(table, path="output.csv") + + mock_save.assert_not_called() + assert result is None + + @patch("fairspec_table.plugins.xlsx.plugin.save_xlsx_table") + def test_should_return_none_for_files_without_extension(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + + result = self.plugin.save_table(table, path="output") + + mock_save.assert_not_called() + assert result is None + + @patch("fairspec_table.plugins.xlsx.plugin.save_xlsx_table") + def test_should_return_none_for_json_files(self, mock_save: MagicMock): + table = pl.DataFrame().lazy() + + result = self.plugin.save_table(table, path="output.json") + + mock_save.assert_not_called() + assert result is None diff --git a/table/fairspec_table/plugins/xlsx/settings.py b/table/fairspec_table/plugins/xlsx/settings.py new file mode 100644 index 0000000..8e67e1d --- /dev/null +++ b/table/fairspec_table/plugins/xlsx/settings.py @@ -0,0 +1 @@ +NATIVE_TYPES = ["boolean", "integer", "number", "string"] diff --git a/table/fairspec_table/py.typed b/table/fairspec_table/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/schemas/__init__.py b/table/fairspec_table/schemas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/schemas/geojson.json b/table/fairspec_table/schemas/geojson.json new file mode 100644 index 0000000..3ca9e08 --- /dev/null +++ b/table/fairspec_table/schemas/geojson.json @@ -0,0 +1,216 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Geo JSON object", + "description": "Schema for a Geo JSON object", + "type": "object", + "required": ["type"], + "properties": { + "crs": { "$ref": "#/$defs/crs" }, + "bbox": { "$ref": "#/$defs/bbox" } + }, + "oneOf": [ + { "$ref": "#/$defs/geometry" }, + { "$ref": "#/$defs/geometryCollection" }, + { "$ref": "#/$defs/feature" }, + { "$ref": "#/$defs/featureCollection" } + ], + "$defs": { + "geometryCollection": { + "title": "GeometryCollection", + "description": "A collection of geometry objects", + "required": ["geometries"], + "properties": { + "type": { "enum": ["GeometryCollection"] }, + "geometries": { + "type": "array", + "items": { "$ref": "#/$defs/geometry" } + } + } + }, + "feature": { + "title": "Feature", + "description": "A Geo JSON feature object", + "required": ["geometry", "properties"], + "properties": { + "type": { "enum": ["Feature"] }, + "geometry": { + "oneOf": [{ "type": "null" }, { "$ref": "#/$defs/geometry" }] + }, + "properties": { "type": ["object", "null"] }, + "id": { "FIXME": "may be there, type not known (string? number?)" } + } + }, + "featureCollection": { + "title": "FeatureCollection", + "description": "A Geo JSON feature collection", + "required": ["features"], + "properties": { + "type": { "enum": ["FeatureCollection"] }, + "features": { + "type": "array", + "items": { "$ref": "#/$defs/feature" } + } + } + }, + "geometry": { + "title": "geometry", + "description": "One geometry as defined by GeoJSON", + "type": "object", + "required": ["type", "coordinates"], + "oneOf": [ + { + "title": "Point", + "properties": { + "type": { "enum": ["Point"] }, + "coordinates": { + "$ref": "#/$defs/geometry/$defs/position" + } + } + }, + { + "title": "MultiPoint", + "properties": { + "type": { "enum": ["MultiPoint"] }, + "coordinates": { + "$ref": "#/$defs/geometry/$defs/positionArray" + } + } + }, + { + "title": "LineString", + "properties": { + "type": { "enum": ["LineString"] }, + "coordinates": { + "$ref": "#/$defs/geometry/$defs/lineString" + } + } + }, + { + "title": "MultiLineString", + "properties": { + "type": { "enum": ["MultiLineString"] }, + "coordinates": { + "type": "array", + "items": { + "$ref": "#/$defs/geometry/$defs/lineString" + } + } + } + }, + { + "title": "Polygon", + "properties": { + "type": { "enum": ["Polygon"] }, + "coordinates": { + "$ref": "#/$defs/geometry/$defs/polygon" + } + } + }, + { + "title": "MultiPolygon", + "properties": { + "type": { "enum": ["MultiPolygon"] }, + "coordinates": { + "type": "array", + "items": { "$ref": "#/$defs/geometry/$defs/polygon" } + } + } + } + ], + "$defs": { + "position": { + "description": "A single position", + "type": "array", + "minItems": 2, + "maxItems": 2, + "prefixItems": [{ "type": "number" }, { "type": "number" }] + }, + "positionArray": { + "description": "An array of positions", + "type": "array", + "items": { "$ref": "#/$defs/geometry/$defs/position" } + }, + "lineString": { + "description": "An array of two or more positions", + "allOf": [ + { "$ref": "#/$defs/geometry/$defs/positionArray" }, + { "minItems": 2 } + ] + }, + "linearRing": { + "description": "An array of four positions where the first equals the last", + "allOf": [ + { "$ref": "#/$defs/geometry/$defs/positionArray" }, + { "minItems": 4 } + ] + }, + "polygon": { + "description": "An array of linear rings", + "type": "array", + "items": { "$ref": "#/$defs/geometry/$defs/linearRing" } + } + } + }, + "crs": { + "title": "crs", + "description": "a Coordinate Reference System object", + "type": ["object", "null"], + "required": ["type", "properties"], + "properties": { + "type": { "type": "string" }, + "properties": { "type": "object" } + }, + "additionalProperties": false, + "oneOf": [ + { "$ref": "#/$defs/crs/$defs/namedCrs" }, + { "$ref": "#/$defs/crs/$defs/linkedCrs" } + ], + "$defs": { + "namedCrs": { + "properties": { + "type": { "enum": ["name"] }, + "properties": { + "required": ["name"], + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "FIXME": "semantic validation necessary" + } + } + } + } + }, + "linkedObject": { + "type": "object", + "required": ["href"], + "properties": { + "href": { + "type": "string", + "format": "uri", + "FIXME": "spec says \"dereferenceable\", cannot enforce that" + }, + "type": { + "type": "string", + "description": "Suggested values: proj4, ogjwkt, esriwkt" + } + } + }, + "linkedCrs": { + "properties": { + "type": { "enum": ["link"] }, + "properties": { + "$ref": "#/$defs/crs/$defs/linkedObject" + } + } + } + } + }, + "bbox": { + "description": "A bounding box as defined by GeoJSON", + "FIXME": "unenforceable constraint: even number of elements in array", + "type": "array", + "items": { "type": "number" } + } + } +} diff --git a/table/fairspec_table/schemas/topojson.json b/table/fairspec_table/schemas/topojson.json new file mode 100644 index 0000000..b2dc719 --- /dev/null +++ b/table/fairspec_table/schemas/topojson.json @@ -0,0 +1,259 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "TopoJSON object", + "description": "Schema for a TopoJSON object", + "type": "object", + "required": ["type"], + "properties": { + "bbox": { + "$ref": "#/$defs/bbox" + } + }, + "oneOf": [ + { + "$ref": "#/$defs/topology" + }, + { + "$ref": "#/$defs/geometry" + } + ], + "$defs": { + "bbox": { + "title": "TopoJSON bounding box", + "description": "A bounding box as defined by TopoJSON", + "type": "array", + "items": { + "$ref": "#/$defs/bbox/$defs/dimension" + }, + "minItems": 2, + "maxItems": 2, + "$defs": { + "dimension": { + "type": "array", + "description": "This array should have an entry per dimension in the geometries", + "items": { + "type": "number" + } + } + } + }, + "geometry": { + "title": "Geometry objects", + "description": "A Geometry object as defined by TopoJSON", + "type": "object", + "required": ["type"], + "properties": { + "id": { + "type": ["string", "integer"] + }, + "properties": { + "type": "object" + } + }, + "oneOf": [ + { + "title": "Point", + "description": "A Point Geometry object as defined by TopoJSON", + "required": ["type", "coordinates"], + "properties": { + "type": { + "enum": ["Point"] + }, + "coordinates": { + "$ref": "#/$defs/geometry/$defs/position" + } + } + }, + { + "title": "MultiPoint", + "description": "A MultiPoint Geometry object as defined by TopoJSON", + "required": ["type", "coordinates"], + "properties": { + "type": { + "enum": ["MultiPoint"] + }, + "coordinates": { + "type": "array", + "items": { + "$ref": "#/$defs/geometry/$defs/position" + } + } + } + }, + { + "title": "LineString", + "description": "A LineString Geometry object as defined by TopoJSON", + "required": ["type", "arcs"], + "properties": { + "type": { + "enum": ["LineString"] + }, + "arcs": { + "type": "array", + "items": { + "type": "integer" + } + } + } + }, + { + "title": "MultiLineString", + "description": "A MultiLineString Geometry object as defined by TopoJSON", + "required": ["type", "arcs"], + "properties": { + "type": { + "enum": ["MultiLineString"] + }, + "arcs": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + } + } + }, + { + "title": "Polygon", + "description": "A Polygon Geometry object as defined by TopoJSON", + "required": ["type", "arcs"], + "properties": { + "type": { + "enum": ["Polygon"] + }, + "arcs": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + } + } + }, + { + "title": "MultiPolygon", + "description": "A MultiPolygon Geometry object as defined by TopoJSON", + "required": ["type", "arcs"], + "properties": { + "type": { + "enum": ["MultiPolygon"] + }, + "arcs": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + } + } + } + }, + { + "title": "GeometryCollection", + "description": "A MultiPolygon Geometry object as defined by TopoJSON", + "required": ["type", "geometries"], + "properties": { + "type": { + "enum": ["GeometryCollection"] + }, + "geometries": { + "type": "array", + "items": { + "$ref": "#/$defs/geometry" + } + } + } + } + ], + "$defs": { + "position": { + "type": "array", + "items": { + "type": "number" + }, + "minItems": 2 + } + } + }, + "topology": { + "title": "Topology", + "description": "A Topology object as defined by TopoJSON", + "type": "object", + "required": ["objects", "arcs"], + "properties": { + "type": { + "enum": ["Topology"] + }, + "objects": { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/geometry" + } + }, + "arcs": { + "$ref": "#/$defs/topology/$defs/arcs" + }, + "transform": { + "$ref": "#/$defs/topology/$defs/transform" + }, + "bbox": { + "$ref": "#/$defs/bbox" + } + }, + "$defs": { + "transform": { + "type": "object", + "required": ["scale", "translate"], + "properties": { + "scale": { + "type": "array", + "items": { + "type": "number" + }, + "minItems": 2 + }, + "translate": { + "type": "array", + "items": { + "type": "number" + }, + "minItems": 2 + } + } + }, + "arcs": { + "type": "array", + "items": { + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "#/$defs/topology/$defs/position" + }, + { + "type": "null" + } + ] + }, + "minItems": 2 + } + }, + "position": { + "type": "array", + "items": { + "type": "number" + }, + "minItems": 2 + } + } + } + } +} diff --git a/table/fairspec_table/settings.py b/table/fairspec_table/settings.py new file mode 100644 index 0000000..ac74a6d --- /dev/null +++ b/table/fairspec_table/settings.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +NUMBER_COLUMN_NAME = "fairspec:number" +ERROR_COLUMN_NAME = "fairspec:error" + +BASE64_REGEX = ( + r"^$|^(?:[0-9a-zA-Z+/]{4})*(?:(?:[0-9a-zA-Z+/]{2}==)|(?:[0-9a-zA-Z+/]{3}=))?$" +) +HEX_REGEX = r"^[0-9a-fA-F]*$" +RFC5322_EMAIL_REGEX = r'^(([^<>()\[\]\\.,;:\s@"]+(\.[^<>()\[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))$' +URL_REGEX = r"^https?://.+" diff --git a/table/fairspec_table/utils/__init__.py b/table/fairspec_table/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/table/fairspec_table/utils/sniffer/__init__.py b/table/fairspec_table/utils/sniffer/__init__.py new file mode 100644 index 0000000..739243f --- /dev/null +++ b/table/fairspec_table/utils/sniffer/__init__.py @@ -0,0 +1,45 @@ +from .error import EncodingError, ParseError, SnifferError +from .metadata import Dialect, Header, LineTerminator, Metadata, Quote +from .potential_dialects import ( + DELIMITERS, + QUOTE_CHARS, + PotentialDialect, + detect_line_terminator, + generate_potential_dialects, +) +from .sample import SampleSize, SampleSizeType +from .score import ( + DialectScore, + FindBestDialectPreferences, + find_best_dialect, + score_dialect, +) +from .sniffer import Sniffer +from .table import Table +from .uniformity import calculate_tau0, calculate_tau1 + +__all__ = [ + "DELIMITERS", + "Dialect", + "DialectScore", + "EncodingError", + "FindBestDialectPreferences", + "Header", + "LineTerminator", + "Metadata", + "ParseError", + "PotentialDialect", + "QUOTE_CHARS", + "Quote", + "SampleSize", + "SampleSizeType", + "Sniffer", + "SnifferError", + "Table", + "calculate_tau0", + "calculate_tau1", + "detect_line_terminator", + "find_best_dialect", + "generate_potential_dialects", + "score_dialect", +] diff --git a/table/fairspec_table/utils/sniffer/error.py b/table/fairspec_table/utils/sniffer/error.py new file mode 100644 index 0000000..b795fb6 --- /dev/null +++ b/table/fairspec_table/utils/sniffer/error.py @@ -0,0 +1,13 @@ +from __future__ import annotations + + +class SnifferError(Exception): + pass + + +class EncodingError(SnifferError): + pass + + +class ParseError(SnifferError): + pass diff --git a/table/fairspec_table/utils/sniffer/metadata.py b/table/fairspec_table/utils/sniffer/metadata.py new file mode 100644 index 0000000..fb9e5b9 --- /dev/null +++ b/table/fairspec_table/utils/sniffer/metadata.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from enum import StrEnum + +from fairspec_metadata.models.base import FairspecModel + + +class LineTerminator(StrEnum): + LF = "LF" + CRLF = "CRLF" + CR = "CR" + + +class Quote(FairspecModel): + char: int | None = None + + +class Header(FairspecModel): + has_header_row: bool + num_preamble_rows: int + + +class Dialect(FairspecModel): + delimiter: int + header: Header + quote: Quote + flexible: bool + is_utf8: bool + line_terminator: LineTerminator + + +class Metadata(FairspecModel): + dialect: Dialect + avg_record_len: float + num_fields: int + fields: list[str] diff --git a/table/fairspec_table/utils/sniffer/potential_dialects.py b/table/fairspec_table/utils/sniffer/potential_dialects.py new file mode 100644 index 0000000..7d7be59 --- /dev/null +++ b/table/fairspec_table/utils/sniffer/potential_dialects.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +from fairspec_metadata.models.base import FairspecModel + +from .metadata import LineTerminator, Quote + +DELIMITERS: list[int] = [ + 44, # , (comma) + 59, # ; (semicolon) + 9, # \t (tab) + 124, # | (pipe) + 94, # ^ (caret) + 126, # ~ (tilde) + 35, # # (hash) + 38, # & (ampersand) + 167, # § (section) + 47, # / (slash) +] + +QUOTE_CHARS: list[Quote] = [ + Quote(char=None), + Quote(char=34), # " (double quote) + Quote(char=39), # ' (single quote) +] + + +class PotentialDialect(FairspecModel): + delimiter: int + quote: Quote + line_terminator: LineTerminator + + +def detect_line_terminator(data: bytes) -> LineTerminator: + has_cr = False + has_lf = False + has_crlf = False + + i = 0 + while i < len(data): + if data[i] == 13: + if i + 1 < len(data) and data[i + 1] == 10: + has_crlf = True + i += 1 + else: + has_cr = True + elif data[i] == 10: + has_lf = True + i += 1 + + if has_crlf: + return LineTerminator.CRLF + if has_lf: + return LineTerminator.LF + if has_cr: + return LineTerminator.CR + + return LineTerminator.LF + + +def generate_potential_dialects( + line_terminator: LineTerminator, +) -> list[PotentialDialect]: + dialects: list[PotentialDialect] = [] + + for delimiter in DELIMITERS: + for quote in QUOTE_CHARS: + dialects.append( + PotentialDialect( + delimiter=delimiter, + quote=quote, + line_terminator=line_terminator, + ) + ) + + return dialects diff --git a/table/fairspec_table/utils/sniffer/potential_dialects_spec.py b/table/fairspec_table/utils/sniffer/potential_dialects_spec.py new file mode 100644 index 0000000..e697cc3 --- /dev/null +++ b/table/fairspec_table/utils/sniffer/potential_dialects_spec.py @@ -0,0 +1,95 @@ +from __future__ import annotations + +from .metadata import LineTerminator +from .potential_dialects import ( + DELIMITERS, + QUOTE_CHARS, + detect_line_terminator, + generate_potential_dialects, +) + + +class TestDetectLineTerminator: + def test_detect_lf(self): + data = "line1\nline2\nline3".encode() + assert detect_line_terminator(data) == LineTerminator.LF + + def test_detect_crlf(self): + data = "line1\r\nline2\r\nline3".encode() + assert detect_line_terminator(data) == LineTerminator.CRLF + + def test_detect_cr(self): + data = "line1\rline2\rline3".encode() + assert detect_line_terminator(data) == LineTerminator.CR + + def test_prefer_crlf_over_lf(self): + data = "line1\r\nline2\nline3".encode() + assert detect_line_terminator(data) == LineTerminator.CRLF + + def test_prefer_lf_over_cr(self): + data = "line1\nline2\rline3".encode() + assert detect_line_terminator(data) == LineTerminator.LF + + def test_default_lf_for_empty(self): + assert detect_line_terminator(b"") == LineTerminator.LF + + def test_default_lf_for_single_line(self): + data = "single line without terminator".encode() + assert detect_line_terminator(data) == LineTerminator.LF + + +class TestGeneratePotentialDialects: + def test_generates_all_combinations(self): + dialects = generate_potential_dialects(LineTerminator.LF) + assert len(dialects) == len(DELIMITERS) * len(QUOTE_CHARS) + + def test_uses_provided_line_terminator(self): + dialects = generate_potential_dialects(LineTerminator.CRLF) + assert all(d.line_terminator == "CRLF" for d in dialects) + + def test_includes_all_delimiters(self): + dialects = generate_potential_dialects(LineTerminator.LF) + delimiters = {d.delimiter for d in dialects} + for delimiter in DELIMITERS: + assert delimiter in delimiters + + def test_includes_all_quote_chars(self): + dialects = generate_potential_dialects(LineTerminator.LF) + quotes = [d.quote for d in dialects] + + has_none = any(q.char is None for q in quotes) + has_double_quote = any(q.char == 34 for q in quotes) + has_single_quote = any(q.char == 39 for q in quotes) + + assert has_none + assert has_double_quote + assert has_single_quote + + def test_generates_exactly_30(self): + dialects = generate_potential_dialects(LineTerminator.LF) + assert len(dialects) == 30 + + +class TestDelimitersConstant: + def test_includes_common_delimiters(self): + assert 44 in DELIMITERS + assert 9 in DELIMITERS + assert 59 in DELIMITERS + assert 124 in DELIMITERS + + def test_has_10_delimiters(self): + assert len(DELIMITERS) == 10 + + +class TestQuoteCharsConstant: + def test_includes_none(self): + assert any(q.char is None for q in QUOTE_CHARS) + + def test_includes_double_quote(self): + assert any(q.char == 34 for q in QUOTE_CHARS) + + def test_includes_single_quote(self): + assert any(q.char == 39 for q in QUOTE_CHARS) + + def test_has_3_options(self): + assert len(QUOTE_CHARS) == 3 diff --git a/table/fairspec_table/utils/sniffer/sample.py b/table/fairspec_table/utils/sniffer/sample.py new file mode 100644 index 0000000..0c8c06c --- /dev/null +++ b/table/fairspec_table/utils/sniffer/sample.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from enum import StrEnum + +from fairspec_metadata.models.base import FairspecModel + + +class SampleSizeType(StrEnum): + RECORDS = "Records" + BYTES = "Bytes" + ALL = "All" + + +class SampleSize(FairspecModel): + type: SampleSizeType + count: int = 0 diff --git a/table/fairspec_table/utils/sniffer/score.py b/table/fairspec_table/utils/sniffer/score.py new file mode 100644 index 0000000..ea82fa0 --- /dev/null +++ b/table/fairspec_table/utils/sniffer/score.py @@ -0,0 +1,193 @@ +from __future__ import annotations + +from fairspec_metadata.models.base import FairspecModel + +from .metadata import Quote +from .potential_dialects import PotentialDialect +from .table import Table +from .uniformity import calculate_tau0, calculate_tau1 + + +class DialectScore(FairspecModel): + dialect: PotentialDialect + gamma: float + tau0: float + tau1: float + num_fields: int + is_uniform: bool + + +class FindBestDialectPreferences(FairspecModel): + prefer_common_delimiters: bool + prefer_double_quote: bool + + +class _QuoteEvidence(FairspecModel): + quote_density: float + boundary_matches: int + internal_matches: int + + +def score_dialect( + data: bytes, + dialect: PotentialDialect, +) -> DialectScore: + table = Table.parse(data, dialect) + + tau0 = calculate_tau0(table.field_counts) + tau1 = calculate_tau1(table.field_counts, table.get_modal_field_count()) + num_fields = table.get_modal_field_count() + is_uniform = table.is_uniform() + num_rows = table.num_rows() + + quote_evidence = _analyze_quote_evidence(data, dialect) + + gamma = _calculate_gamma( + tau0, + tau1, + num_rows, + num_fields, + is_uniform, + dialect.delimiter, + dialect.quote, + quote_evidence, + ) + + return DialectScore( + dialect=dialect, + gamma=gamma, + tau0=tau0, + tau1=tau1, + num_fields=num_fields, + is_uniform=is_uniform, + ) + + +def _calculate_gamma( + tau0: float, + tau1: float, + num_rows: int, + num_fields: int, + is_uniform: bool, + delimiter: int, + quote: Quote, + quote_evidence: _QuoteEvidence, +) -> float: + gamma = tau0 * 0.4 + tau1 * 0.6 + + if is_uniform: + gamma += 0.2 + + if num_fields >= 2 and num_fields <= 50: + gamma += 0.3 + elif num_fields == 1: + gamma -= 1.0 + + if num_rows >= 2: + gamma += 0.05 + + common_delimiters = [44, 9, 59, 124] + if delimiter in common_delimiters: + gamma += 0.15 + + if quote.char is not None: + quote_score = ( + quote_evidence.boundary_matches * 0.5 + + quote_evidence.quote_density * 0.3 + - quote_evidence.internal_matches * 0.2 + ) + + gamma += max(0, min(0.2, quote_score)) + + if quote.char == 34: + gamma += 0.05 + else: + if quote_evidence.quote_density < 0.01: + gamma += 0.1 + + return max(0, min(2, gamma)) + + +def _analyze_quote_evidence( + data: bytes, + dialect: PotentialDialect, +) -> _QuoteEvidence: + if dialect.quote.char is None: + return _QuoteEvidence( + quote_density=0, + boundary_matches=0, + internal_matches=0, + ) + + quote_char = dialect.quote.char + delimiter_char = dialect.delimiter + + quote_count = 0 + boundary_matches = 0 + internal_matches = 0 + + for i in range(len(data)): + if data[i] == quote_char: + quote_count += 1 + + prev_char = data[i - 1] if i > 0 else None + next_char = data[i + 1] if i < len(data) - 1 else None + + at_boundary = ( + prev_char is None + or prev_char == delimiter_char + or prev_char == 10 + or prev_char == 13 + or next_char is None + or next_char == delimiter_char + or next_char == 10 + or next_char == 13 + ) + + if at_boundary: + boundary_matches += 1 + else: + internal_matches += 1 + + quote_density = quote_count / len(data) if len(data) > 0 else 0 + + return _QuoteEvidence( + quote_density=quote_density, + boundary_matches=boundary_matches, + internal_matches=internal_matches, + ) + + +def find_best_dialect( + scores: list[DialectScore], + preferences: FindBestDialectPreferences, +) -> DialectScore: + if len(scores) == 0: + raise ValueError("No dialect scores provided") + + best_score = scores[0] + + for score in scores: + current_gamma = score.gamma + best_gamma = best_score.gamma + + if preferences.prefer_common_delimiters: + common_delimiters = [44, 9, 59, 124] + if score.dialect.delimiter in common_delimiters: + current_gamma += 0.05 + if best_score.dialect.delimiter in common_delimiters: + best_gamma += 0.05 + + if preferences.prefer_double_quote: + if score.dialect.quote.char is not None and score.dialect.quote.char == 34: + current_gamma += 0.05 + if ( + best_score.dialect.quote.char is not None + and best_score.dialect.quote.char == 34 + ): + best_gamma += 0.05 + + if current_gamma > best_gamma: + best_score = score + + return best_score diff --git a/table/fairspec_table/utils/sniffer/score_spec.py b/table/fairspec_table/utils/sniffer/score_spec.py new file mode 100644 index 0000000..cc052e1 --- /dev/null +++ b/table/fairspec_table/utils/sniffer/score_spec.py @@ -0,0 +1,209 @@ +from __future__ import annotations + +import pytest + +from .metadata import LineTerminator, Quote +from .potential_dialects import PotentialDialect +from .score import FindBestDialectPreferences, find_best_dialect, score_dialect + + +class TestScoreDialect: + def test_comma_delimited_csv_scores_highly(self): + csv = "a,b,c\n1,2,3\n4,5,6" + dialect = PotentialDialect( + delimiter=44, quote=Quote(), line_terminator=LineTerminator.LF + ) + score = score_dialect(csv.encode(), dialect) + + assert score.gamma > 0.5 + assert score.is_uniform is True + assert score.num_fields == 3 + + def test_incorrect_delimiter_scores_poorly(self): + csv = "a,b,c\n1,2,3\n4,5,6" + dialect = PotentialDialect( + delimiter=9, quote=Quote(), line_terminator=LineTerminator.LF + ) + score = score_dialect(csv.encode(), dialect) + + assert score.gamma < 0.5 + assert score.num_fields == 1 + + def test_detect_non_uniform(self): + csv = "a,b\n1,2\n3,4,5" + dialect = PotentialDialect( + delimiter=44, quote=Quote(), line_terminator=LineTerminator.LF + ) + score = score_dialect(csv.encode(), dialect) + + assert score.is_uniform is False + + def test_calculates_tau0_and_tau1(self): + csv = "a,b,c\n1,2,3\n4,5,6" + dialect = PotentialDialect( + delimiter=44, quote=Quote(), line_terminator=LineTerminator.LF + ) + score = score_dialect(csv.encode(), dialect) + + assert score.tau0 > 0 + assert score.tau1 > 0 + + def test_handle_quoted_fields(self): + csv = '"a","b,c","d"\n"1","2,3","4"' + dialect = PotentialDialect( + delimiter=44, quote=Quote(char=34), line_terminator=LineTerminator.LF + ) + score = score_dialect(csv.encode(), dialect) + + assert score.num_fields == 3 + assert score.is_uniform is True + + +class TestFindBestDialect: + def test_select_highest_scoring(self): + csv = "a,b,c\n1,2,3\n4,5,6" + data = csv.encode() + + dialects = [ + PotentialDialect( + delimiter=44, quote=Quote(), line_terminator=LineTerminator.LF + ), + PotentialDialect( + delimiter=9, quote=Quote(), line_terminator=LineTerminator.LF + ), + PotentialDialect( + delimiter=59, quote=Quote(), line_terminator=LineTerminator.LF + ), + ] + + scores = [score_dialect(data, d) for d in dialects] + best = find_best_dialect( + scores, + FindBestDialectPreferences( + prefer_common_delimiters=True, prefer_double_quote=True + ), + ) + + assert best.dialect.delimiter == 44 + + def test_prefer_common_delimiters(self): + csv = "a,b\n1,2\n3,4" + data = csv.encode() + + dialects = [ + PotentialDialect( + delimiter=44, quote=Quote(), line_terminator=LineTerminator.LF + ), + PotentialDialect( + delimiter=94, quote=Quote(), line_terminator=LineTerminator.LF + ), + ] + + scores = [score_dialect(data, d) for d in dialects] + scores[0].gamma = 0.7 + scores[1].gamma = 0.71 + + best = find_best_dialect( + scores, + FindBestDialectPreferences( + prefer_common_delimiters=True, prefer_double_quote=False + ), + ) + + assert best.dialect.delimiter == 44 + + def test_prefer_double_quote(self): + csv = '"a","b"\n"1","2"' + data = csv.encode() + + dialects = [ + PotentialDialect( + delimiter=44, quote=Quote(char=34), line_terminator=LineTerminator.LF + ), + PotentialDialect( + delimiter=44, quote=Quote(char=39), line_terminator=LineTerminator.LF + ), + ] + + scores = [score_dialect(data, d) for d in dialects] + + best = find_best_dialect( + scores, + FindBestDialectPreferences( + prefer_common_delimiters=False, prefer_double_quote=True + ), + ) + + if best.dialect.quote.char is not None: + assert best.dialect.quote.char == 34 + + def test_empty_scores_throws(self): + with pytest.raises(ValueError): + find_best_dialect( + [], + FindBestDialectPreferences( + prefer_common_delimiters=True, prefer_double_quote=True + ), + ) + + def test_single_score(self): + csv = "a,b\n1,2" + dialect = PotentialDialect( + delimiter=44, quote=Quote(), line_terminator=LineTerminator.LF + ) + score = score_dialect(csv.encode(), dialect) + best = find_best_dialect( + [score], + FindBestDialectPreferences( + prefer_common_delimiters=True, prefer_double_quote=True + ), + ) + + assert best is score + + +class TestGammaCalculation: + def test_reward_uniform_tables(self): + uniform_csv = "a,b,c\n1,2,3\n4,5,6\n7,8,9" + non_uniform_csv = "a,b\n1,2,3\n4,5\n6,7,8,9" + dialect = PotentialDialect( + delimiter=44, quote=Quote(), line_terminator=LineTerminator.LF + ) + + uniform_score = score_dialect(uniform_csv.encode(), dialect) + non_uniform_score = score_dialect(non_uniform_csv.encode(), dialect) + + assert uniform_score.gamma > non_uniform_score.gamma + + def test_reward_reasonable_field_counts(self): + csv_2 = "a,b\n1,2\n3,4" + csv_100 = ( + "a," + "b," * 98 + "z\n" + "1," + "2," * 98 + "3\n" + "4," + "5," * 98 + "6" + ) + dialect = PotentialDialect( + delimiter=44, quote=Quote(), line_terminator=LineTerminator.LF + ) + + score_2 = score_dialect(csv_2.encode(), dialect) + score_100 = score_dialect(csv_100.encode(), dialect) + + assert score_2.num_fields == 2 + assert score_100.num_fields > 50 + + def test_cap_gamma_at_2(self): + csv = "a,b,c\n1,2,3\n4,5,6\n7,8,9\n10,11,12" + dialect = PotentialDialect( + delimiter=44, quote=Quote(), line_terminator=LineTerminator.LF + ) + score = score_dialect(csv.encode(), dialect) + + assert score.gamma <= 2 + + def test_gamma_non_negative(self): + csv = "random text without structure" + dialect = PotentialDialect( + delimiter=44, quote=Quote(), line_terminator=LineTerminator.LF + ) + score = score_dialect(csv.encode(), dialect) + + assert score.gamma >= 0 diff --git a/table/fairspec_table/utils/sniffer/sniffer.py b/table/fairspec_table/utils/sniffer/sniffer.py new file mode 100644 index 0000000..0bdd8d3 --- /dev/null +++ b/table/fairspec_table/utils/sniffer/sniffer.py @@ -0,0 +1,346 @@ +from __future__ import annotations + +import json +import re +from datetime import datetime + +from .metadata import Dialect, Header, LineTerminator, Metadata, Quote +from .potential_dialects import ( + PotentialDialect, + detect_line_terminator, + generate_potential_dialects, +) +from .sample import SampleSize, SampleSizeType +from .score import FindBestDialectPreferences, find_best_dialect, score_dialect +from .table import Table + + +class Sniffer: + def __init__(self) -> None: + self._sample_size: SampleSize = SampleSize( + type=SampleSizeType.BYTES, count=8192 + ) + self._forced_delimiter: int | None = None + self._forced_quote: Quote | None = None + + def with_sample_size(self, size: SampleSize) -> Sniffer: + self._sample_size = size + return self + + def with_delimiter(self, delimiter: int) -> Sniffer: + self._forced_delimiter = delimiter + return self + + def with_quote(self, quote: Quote) -> Sniffer: + self._forced_quote = quote + return self + + def sniff_bytes(self, data: bytes) -> Metadata: + bytes_no_bom = self._skip_bom(data) + current = bytes_no_bom + + without_comment_preamble, comment_preamble_lines = self._skip_preamble(current) + current = without_comment_preamble + + sample = self._take_sample(current) + + line_terminator = detect_line_terminator(sample) + + if self._forced_delimiter is not None: + dialects = self._generate_forced_dialects(line_terminator) + else: + dialects = generate_potential_dialects(line_terminator) + + scores = [score_dialect(sample, dialect) for dialect in dialects] + + best_score = find_best_dialect( + scores, + FindBestDialectPreferences( + prefer_common_delimiters=True, + prefer_double_quote=True, + ), + ) + + dialect = Dialect( + delimiter=best_score.dialect.delimiter, + quote=best_score.dialect.quote, + flexible=not best_score.is_uniform, + is_utf8=True, + line_terminator=best_score.dialect.line_terminator, + header=Header( + has_header_row=False, + num_preamble_rows=comment_preamble_lines, + ), + ) + + structural_preamble_rows = self._detect_structural_preamble(current, dialect) + dialect.header.num_preamble_rows += structural_preamble_rows + + data_after_all_preamble = self._skip_lines(current, structural_preamble_rows) + header_detection_result = self._detect_header(data_after_all_preamble, dialect) + dialect.header.has_header_row = header_detection_result + + return self._build_metadata(bytes_no_bom, dialect) + + def sniff_rows(self, rows: list[list[object]]) -> Metadata: + csv_string = self._rows_to_csv(rows) + data = csv_string.encode("utf-8") + return self.sniff_bytes(data) + + def _value_to_string(self, value: object) -> str: + if value is None: + return "" + if isinstance(value, str): + return value + if isinstance(value, bool): + return str(value).lower() + if isinstance(value, (int, float)): + return str(value) + if isinstance(value, datetime): + return value.isoformat() + if isinstance(value, (dict, list)): + return json.dumps(value) + return str(value) + + def _escape_field(self, value: str) -> str: + needs_quoting = bool(re.search(r'[,"\n\r]', value)) + + if not needs_quoting: + return value + + escaped = value.replace('"', '""') + return f'"{escaped}"' + + def _rows_to_csv(self, rows: list[list[object]]) -> str: + if len(rows) == 0: + return "" + + lines: list[str] = [] + + for row in rows: + values = [self._escape_field(self._value_to_string(value)) for value in row] + lines.append(",".join(values)) + + return "\n".join(lines) + + def _skip_bom(self, data: bytes) -> bytes: + if len(data) >= 3 and data[0] == 0xEF and data[1] == 0xBB and data[2] == 0xBF: + return data[3:] + return data + + def _skip_preamble(self, data: bytes) -> tuple[bytes, int]: + line_count = 0 + i = 0 + + while i < len(data): + if data[i] == 35: + while i < len(data) and data[i] != 10 and data[i] != 13: + i += 1 + + if ( + i < len(data) + and data[i] == 13 + and i + 1 < len(data) + and data[i + 1] == 10 + ): + i += 2 + elif i < len(data): + i += 1 + + line_count += 1 + else: + break + + return data[i:], line_count + + def _take_sample(self, data: bytes) -> bytes: + if self._sample_size.type == SampleSizeType.ALL: + return data + + if self._sample_size.type == SampleSizeType.BYTES: + return data[: self._sample_size.count] + + line_count = 0 + i = 0 + + while i < len(data) and line_count < self._sample_size.count: + if data[i] == 10: + line_count += 1 + elif data[i] == 13: + if i + 1 < len(data) and data[i + 1] == 10: + i += 1 + line_count += 1 + i += 1 + + return data[:i] + + def _generate_forced_dialects( + self, + line_terminator: LineTerminator, + ) -> list[PotentialDialect]: + if self._forced_delimiter is None: + raise ValueError("generateForcedDialects called without forcedDelimiter") + + delimiter = self._forced_delimiter + + if self._forced_quote is not None: + quotes: list[Quote] = [self._forced_quote] + else: + quotes = [ + Quote(char=None), + Quote(char=34), + Quote(char=39), + ] + + return [ + PotentialDialect( + delimiter=delimiter, + quote=quote, + line_terminator=line_terminator, + ) + for quote in quotes + ] + + def _detect_structural_preamble( + self, + data: bytes, + dialect: Dialect, + ) -> int: + table = Table.parse( + data, + PotentialDialect( + delimiter=dialect.delimiter, + quote=dialect.quote, + line_terminator=dialect.line_terminator, + ), + ) + + if table.num_rows() < 2: + return 0 + + modal_field_count = table.get_modal_field_count() + preamble_rows = 0 + + for i in range(len(table.field_counts)): + count = table.field_counts[i] + if count == modal_field_count: + break + preamble_rows += 1 + + if preamble_rows >= table.num_rows(): + return 0 + + return preamble_rows + + def _skip_lines(self, data: bytes, num_lines: int) -> bytes: + if num_lines == 0: + return data + + line_count = 0 + i = 0 + + while i < len(data) and line_count < num_lines: + if data[i] == 10: + line_count += 1 + i += 1 + elif data[i] == 13: + if i + 1 < len(data) and data[i + 1] == 10: + i += 2 + else: + i += 1 + line_count += 1 + else: + i += 1 + + return data[i:] + + def _detect_header( + self, + data: bytes, + dialect: Dialect, + ) -> bool: + table = Table.parse( + data, + PotentialDialect( + delimiter=dialect.delimiter, + quote=dialect.quote, + line_terminator=dialect.line_terminator, + ), + ) + + if table.num_rows() < 2: + return False + + first_row = table.rows[0] + if not first_row: + return False + + header_score = 0.0 + + for field in first_row: + trimmed = field.strip() + + if len(trimmed) == 0: + header_score -= 0.2 + continue + + if re.fullmatch(r"(?i)^(true|false)$", trimmed): + header_score -= 0.3 + continue + + if re.fullmatch(r"^[a-zA-Z_][a-zA-Z0-9_]*$", trimmed): + header_score += 0.3 + + if re.search(r"[A-Z]", trimmed) and not re.fullmatch(r"^\d+$", trimmed): + header_score += 0.2 + + if re.search(r"[_\s-]", trimmed): + header_score += 0.1 + + if re.fullmatch(r"^\d+(\.\d+)?$", trimmed): + header_score -= 0.3 + + return header_score > 0 + + def _build_metadata(self, data: bytes, dialect: Dialect) -> Metadata: + data_after_preamble = self._skip_lines(data, dialect.header.num_preamble_rows) + + table = Table.parse( + data_after_preamble, + PotentialDialect( + delimiter=dialect.delimiter, + quote=dialect.quote, + line_terminator=dialect.line_terminator, + ), + ) + + if dialect.header.has_header_row and len(table.field_counts) > 0: + data_field_counts = table.field_counts[1:] + else: + data_field_counts = table.field_counts + + is_uniform = len(data_field_counts) == 0 or all( + count == data_field_counts[0] for count in data_field_counts + ) + + dialect.flexible = not is_uniform + + num_fields = table.get_modal_field_count() + + fields: list[str] = [] + if dialect.header.has_header_row and table.num_rows() > 0: + header_row = table.rows[0] + fields = [field.strip() for field in header_row] if header_row else [] + else: + fields = [f"field_{i + 1}" for i in range(num_fields)] + + total_bytes = len(data_after_preamble) + total_rows = table.num_rows() + avg_record_len = total_bytes / total_rows if total_rows > 0 else 0 + + return Metadata( + dialect=dialect, + avg_record_len=avg_record_len, + num_fields=num_fields, + fields=fields, + ) diff --git a/table/fairspec_table/utils/sniffer/sniffer_spec.py b/table/fairspec_table/utils/sniffer/sniffer_spec.py new file mode 100644 index 0000000..60dbaa1 --- /dev/null +++ b/table/fairspec_table/utils/sniffer/sniffer_spec.py @@ -0,0 +1,294 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +from .metadata import Quote +from .sample import SampleSize, SampleSizeType +from .sniffer import Sniffer + + +class TestSniffBytes: + def test_detect_comma_delimited_csv(self): + csv = "id,name,age\n1,Alice,25\n2,Bob,30" + sniffer = Sniffer() + metadata = sniffer.sniff_bytes(csv.encode()) + + assert metadata.dialect.delimiter == 44 + assert metadata.dialect.header.has_header_row is True + assert metadata.num_fields == 3 + assert metadata.fields == ["id", "name", "age"] + + def test_detect_tab_delimited_tsv(self): + tsv = "id\tname\tage\n1\tAlice\t25\n2\tBob\t30" + sniffer = Sniffer() + metadata = sniffer.sniff_bytes(tsv.encode()) + + assert metadata.dialect.delimiter == 9 + assert metadata.dialect.header.has_header_row is True + assert metadata.num_fields == 3 + + def test_detect_semicolon_delimited(self): + csv = "id;name;age\n1;Alice;25\n2;Bob;30" + sniffer = Sniffer() + metadata = sniffer.sniff_bytes(csv.encode()) + + assert metadata.dialect.delimiter == 59 + assert metadata.num_fields == 3 + + def test_detect_pipe_delimited(self): + csv = "id|name|age\n1|Alice|25\n2|Bob|30" + sniffer = Sniffer() + metadata = sniffer.sniff_bytes(csv.encode()) + + assert metadata.dialect.delimiter == 124 + assert metadata.num_fields == 3 + + def test_detect_quoted_fields(self): + csv = 'id,name,description\n1,"Alice","She said, ""Hello"""\n2,"Bob","Normal text"' + sniffer = Sniffer() + metadata = sniffer.sniff_bytes(csv.encode()) + + assert metadata.dialect.delimiter == 44 + assert metadata.dialect.quote.char is not None + assert metadata.dialect.quote.char == 34 + + def test_detect_crlf(self): + csv = "id,name\r\n1,Alice\r\n2,Bob" + sniffer = Sniffer() + metadata = sniffer.sniff_bytes(csv.encode()) + + assert metadata.dialect.delimiter == 44 + assert metadata.num_fields == 2 + + def test_detect_cr(self): + csv = "id,name\r1,Alice\r2,Bob" + sniffer = Sniffer() + metadata = sniffer.sniff_bytes(csv.encode()) + + assert metadata.dialect.delimiter == 44 + assert metadata.num_fields == 2 + + def test_detect_no_header(self): + csv = "1,Alice,25\n2,Bob,30\n3,Charlie,35" + sniffer = Sniffer() + metadata = sniffer.sniff_bytes(csv.encode()) + + assert metadata.dialect.header.has_header_row is False + assert metadata.fields == ["field_1", "field_2", "field_3"] + + def test_skip_comment_preamble(self): + csv = "# This is a comment\n# Another comment\nid,name\n1,Alice\n2,Bob" + sniffer = Sniffer() + metadata = sniffer.sniff_bytes(csv.encode()) + + assert metadata.dialect.header.num_preamble_rows == 2 + assert metadata.fields == ["id", "name"] + + def test_detect_structural_preamble(self): + csv = "Report Title\nReport Date: 2024-01-01\nid,name\n1,Alice\n2,Bob" + sniffer = Sniffer() + metadata = sniffer.sniff_bytes(csv.encode()) + + assert metadata.dialect.header.num_preamble_rows >= 1 + + def test_handle_utf8_bom(self): + bom = bytes([0xEF, 0xBB, 0xBF]) + csv = "id,name\n1,Alice".encode() + data = bom + csv + + sniffer = Sniffer() + metadata = sniffer.sniff_bytes(data) + + assert metadata.dialect.delimiter == 44 + assert metadata.fields == ["id", "name"] + + def test_flexible_mode(self): + csv = "id,name\n1,Alice\n2,Bob,Extra\n3,Charlie" + sniffer = Sniffer() + metadata = sniffer.sniff_bytes(csv.encode()) + + assert metadata.dialect.flexible is True + + def test_force_delimiter(self): + csv = "id;name;age\n1;Alice;25\n2;Bob;30" + sniffer = Sniffer().with_delimiter(59) + metadata = sniffer.sniff_bytes(csv.encode()) + + assert metadata.dialect.delimiter == 59 + + def test_force_quote(self): + csv = "id,'name','age'\n1,'Alice','25'" + sniffer = Sniffer().with_quote(Quote(char=39)) + metadata = sniffer.sniff_bytes(csv.encode()) + + assert metadata.dialect.quote.char is not None + assert metadata.dialect.quote.char == 39 + + def test_avg_record_len(self): + csv = "id,name\n1,Alice\n2,Bob" + sniffer = Sniffer() + metadata = sniffer.sniff_bytes(csv.encode()) + + assert metadata.avg_record_len > 0 + + def test_empty_file(self): + sniffer = Sniffer() + metadata = sniffer.sniff_bytes(b"") + + assert metadata.num_fields == 0 + + def test_single_line(self): + csv = "id,name,age" + sniffer = Sniffer() + metadata = sniffer.sniff_bytes(csv.encode()) + + assert metadata.num_fields == 3 + + +class TestSniffRows: + def test_detect_comma_with_header(self): + rows: list[list[object]] = [ + ["id", "name", "age"], + [1, "Alice", 25], + [2, "Bob", 30], + ] + sniffer = Sniffer() + metadata = sniffer.sniff_rows(rows) + + assert metadata.dialect.delimiter == 44 + assert metadata.dialect.header.has_header_row is True + assert metadata.fields == ["id", "name", "age"] + assert metadata.num_fields == 3 + + def test_detect_no_header(self): + rows: list[list[object]] = [ + [1, "Alice", 25], + [2, "Bob", 30], + [3, "Charlie", 35], + ] + sniffer = Sniffer() + metadata = sniffer.sniff_rows(rows) + + assert metadata.dialect.delimiter == 44 + assert metadata.dialect.header.has_header_row is False + assert metadata.fields == ["field_1", "field_2", "field_3"] + + def test_string_values_with_commas(self): + rows: list[list[object]] = [ + ["name", "city"], + ["Smith, John", "New York"], + ["Doe, Jane", "Los Angeles"], + ] + sniffer = Sniffer() + metadata = sniffer.sniff_rows(rows) + + assert metadata.dialect.delimiter == 44 + assert metadata.fields == ["name", "city"] + + def test_string_values_with_quotes(self): + rows: list[list[object]] = [ + ["text", "author"], + ['He said "Hello"', "Alice"], + ['She said "Hi"', "Bob"], + ] + sniffer = Sniffer() + metadata = sniffer.sniff_rows(rows) + + assert metadata.dialect.quote.char is not None + assert metadata.dialect.quote.char == 34 + + def test_null_to_empty_string(self): + rows: list[list[object]] = [ + ["id", "name", "optional"], + [1, "Alice", None], + [2, "Bob", None], + ] + sniffer = Sniffer() + metadata = sniffer.sniff_rows(rows) + + assert metadata.fields == ["id", "name", "optional"] + + def test_numbers_and_booleans(self): + rows: list[list[object]] = [ + ["count", "price", "active"], + [42, 19.99, True], + [100, 5.5, False], + ] + sniffer = Sniffer() + metadata = sniffer.sniff_rows(rows) + + assert metadata.num_fields == 3 + + def test_date_objects(self): + rows: list[list[object]] = [ + ["event", "timestamp"], + ["Login", datetime(2024, 1, 1, 10, 0, 0, tzinfo=timezone.utc)], + ["Logout", datetime(2024, 1, 1, 11, 0, 0, tzinfo=timezone.utc)], + ] + sniffer = Sniffer() + metadata = sniffer.sniff_rows(rows) + + assert metadata.fields == ["event", "timestamp"] + + def test_objects_and_arrays(self): + rows: list[list[object]] = [ + ["id", "metadata", "tags"], + [1, {"key": "value"}, ["a", "b"]], + [2, {"key": "other"}, ["c"]], + ] + sniffer = Sniffer() + metadata = sniffer.sniff_rows(rows) + + assert metadata.num_fields == 3 + + def test_variable_lengths_flexible(self): + rows: list[list[object]] = [ + ["id", "name"], + [1, "Alice"], + [2, "Bob", "extra"], + [3], + ] + sniffer = Sniffer() + metadata = sniffer.sniff_rows(rows) + + assert metadata.dialect.flexible is True + + def test_empty_array(self): + rows: list[list[object]] = [] + sniffer = Sniffer() + metadata = sniffer.sniff_rows(rows) + + assert metadata.num_fields == 0 + assert metadata.fields == [] + + def test_single_row(self): + rows: list[list[object]] = [[1, "Alice"]] + sniffer = Sniffer() + metadata = sniffer.sniff_rows(rows) + + assert metadata.num_fields == 2 + + def test_configured_sample_size(self): + rows: list[list[object]] = [[i, f"data{i}"] for i in range(1000)] + sniffer = Sniffer().with_sample_size( + SampleSize(type=SampleSizeType.RECORDS, count=10) + ) + metadata = sniffer.sniff_rows(rows) + + assert metadata.num_fields == 2 + + def test_preamble_rows_with_hash(self): + rows: list[list[object]] = [ + ["# Exported from Excel on 2024-01-01"], + ["# Data source: Sales Report"], + ["id", "product", "quantity"], + [1, "Widget", 100], + [2, "Gadget", 150], + ] + sniffer = Sniffer() + metadata = sniffer.sniff_rows(rows) + + assert metadata.dialect.header.num_preamble_rows == 2 + assert metadata.dialect.header.has_header_row is True + assert metadata.fields == ["id", "product", "quantity"] + assert metadata.num_fields == 3 diff --git a/table/fairspec_table/utils/sniffer/table.py b/table/fairspec_table/utils/sniffer/table.py new file mode 100644 index 0000000..7ca2bbe --- /dev/null +++ b/table/fairspec_table/utils/sniffer/table.py @@ -0,0 +1,127 @@ +from __future__ import annotations + +from .potential_dialects import PotentialDialect + + +class Table: + rows: list[list[str]] + field_counts: list[int] + _cached_modal_field_count: int | None + + def __init__(self, rows: list[list[str]], field_counts: list[int]) -> None: + self.rows = rows + self.field_counts = field_counts + self._cached_modal_field_count = None + + @classmethod + def parse(cls, data: bytes, dialect: PotentialDialect) -> Table: + text = data.decode("utf-8", errors="replace") + + rows: list[list[str]] = [] + field_counts: list[int] = [] + + delimiter_char = chr(dialect.delimiter) + quote_char = chr(dialect.quote.char) if dialect.quote.char is not None else None + + terminator_string = { + "CRLF": "\r\n", + "CR": "\r", + "LF": "\n", + }[dialect.line_terminator] + + lines = text.split(terminator_string) + + for line in lines: + if len(line) == 0: + continue + + if quote_char: + fields = _parse_quoted_line(line, delimiter_char, quote_char) + else: + fields = line.split(delimiter_char) + + rows.append(fields) + field_counts.append(len(fields)) + + return cls(rows, field_counts) + + def get_modal_field_count(self) -> int: + if self._cached_modal_field_count is not None: + return self._cached_modal_field_count + + if len(self.field_counts) == 0: + self._cached_modal_field_count = 0 + return 0 + + max_field_count = max(self.field_counts) + + if max_field_count <= 256: + frequency = [0] * (max_field_count + 1) + for count in self.field_counts: + frequency[count] += 1 + + max_freq = 0 + modal = 0 + for i in range(max_field_count + 1): + if frequency[i] > max_freq: + max_freq = frequency[i] + modal = i + + self._cached_modal_field_count = modal + return modal + else: + frequency: dict[int, int] = {} + for count in self.field_counts: + frequency[count] = frequency.get(count, 0) + 1 + + max_freq = 0 + modal = 0 + for count, freq in frequency.items(): + if freq > max_freq: + max_freq = freq + modal = count + + self._cached_modal_field_count = modal + return modal + + def is_uniform(self) -> bool: + if len(self.field_counts) == 0: + return True + + first = self.field_counts[0] + return all(count == first for count in self.field_counts) + + def num_rows(self) -> int: + return len(self.rows) + + +def _parse_quoted_line( + line: str, + delimiter: str, + quote: str, +) -> list[str]: + fields: list[str] = [] + current_field = "" + in_quotes = False + i = 0 + + while i < len(line): + char = line[i] + + if char == quote: + if in_quotes and i + 1 < len(line) and line[i + 1] == quote: + current_field += quote + i += 2 + else: + in_quotes = not in_quotes + i += 1 + elif char == delimiter and not in_quotes: + fields.append(current_field) + current_field = "" + i += 1 + else: + current_field += char + i += 1 + + fields.append(current_field) + return fields diff --git a/table/fairspec_table/utils/sniffer/table_spec.py b/table/fairspec_table/utils/sniffer/table_spec.py new file mode 100644 index 0000000..1b58b05 --- /dev/null +++ b/table/fairspec_table/utils/sniffer/table_spec.py @@ -0,0 +1,118 @@ +from __future__ import annotations + +from .metadata import LineTerminator, Quote +from .potential_dialects import PotentialDialect +from .table import Table + + +class TestTable: + def test_parse_comma_delimited(self): + csv = "a,b,c\n1,2,3\n4,5,6" + dialect = PotentialDialect( + delimiter=44, quote=Quote(), line_terminator=LineTerminator.LF + ) + table = Table.parse(csv.encode(), dialect) + + assert table.rows == [ + ["a", "b", "c"], + ["1", "2", "3"], + ["4", "5", "6"], + ] + assert table.field_counts == [3, 3, 3] + + def test_parse_quoted_fields(self): + csv = 'a,"b,c",d\n1,"2,3",4' + dialect = PotentialDialect( + delimiter=44, quote=Quote(char=34), line_terminator=LineTerminator.LF + ) + table = Table.parse(csv.encode(), dialect) + + assert table.rows == [ + ["a", "b,c", "d"], + ["1", "2,3", "4"], + ] + + def test_escaped_quotes(self): + csv = 'a,"b""c",d' + dialect = PotentialDialect( + delimiter=44, quote=Quote(char=34), line_terminator=LineTerminator.LF + ) + table = Table.parse(csv.encode(), dialect) + + assert table.rows == [["a", 'b"c', "d"]] + + def test_modal_field_count(self): + rows = [ + ["a", "b", "c"], + ["1", "2", "3"], + ["4", "5"], + ["7", "8", "9"], + ] + field_counts = [3, 3, 2, 3] + table = Table(rows, field_counts) + + assert table.get_modal_field_count() == 3 + + def test_cache_modal_field_count(self): + rows = [["a", "b"], ["1", "2"]] + field_counts = [2, 2] + table = Table(rows, field_counts) + + first = table.get_modal_field_count() + second = table.get_modal_field_count() + + assert first == second + assert first == 2 + + def test_detect_uniform(self): + rows = [["a", "b"], ["1", "2"], ["3", "4"]] + field_counts = [2, 2, 2] + table = Table(rows, field_counts) + + assert table.is_uniform() is True + + def test_detect_non_uniform(self): + rows = [["a", "b"], ["1", "2", "3"], ["4", "5"]] + field_counts = [2, 3, 2] + table = Table(rows, field_counts) + + assert table.is_uniform() is False + + def test_num_rows(self): + rows = [["a"], ["b"], ["c"]] + field_counts = [1, 1, 1] + table = Table(rows, field_counts) + + assert table.num_rows() == 3 + + def test_empty_table(self): + table = Table([], []) + + assert table.num_rows() == 0 + assert table.get_modal_field_count() == 0 + assert table.is_uniform() is True + + def test_parse_crlf(self): + csv = "a,b\r\n1,2\r\n3,4" + dialect = PotentialDialect( + delimiter=44, quote=Quote(), line_terminator=LineTerminator.CRLF + ) + table = Table.parse(csv.encode(), dialect) + + assert len(table.rows) == 3 + + def test_parse_cr(self): + csv = "a,b\r1,2\r3,4" + dialect = PotentialDialect( + delimiter=44, quote=Quote(), line_terminator=LineTerminator.CR + ) + table = Table.parse(csv.encode(), dialect) + + assert len(table.rows) == 3 + + def test_modal_field_count_wide_tables(self): + rows = [["x"] * (260 if i % 2 == 0 else 250) for i in range(300)] + field_counts = [len(row) for row in rows] + table = Table(rows, field_counts) + + assert table.get_modal_field_count() == 260 diff --git a/table/fairspec_table/utils/sniffer/uniformity.py b/table/fairspec_table/utils/sniffer/uniformity.py new file mode 100644 index 0000000..f71a5e5 --- /dev/null +++ b/table/fairspec_table/utils/sniffer/uniformity.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +import math + + +def calculate_tau0(field_counts: list[int]) -> float: + if len(field_counts) == 0: + return 0 + + std_dev = _calculate_std_dev(field_counts) + return 1 / (1 + 2 * std_dev) + + +def calculate_tau1( + field_counts: list[int], + modal_count: int, +) -> float: + if len(field_counts) == 0: + return 0 + + range_score = _calculate_range_score(field_counts, modal_count) + transition_score = _calculate_transition_score(field_counts) + mode_score = _calculate_mode_score(field_counts, modal_count) + + return (range_score + transition_score + mode_score) / 3 + + +def _calculate_std_dev(counts: list[int]) -> float: + if len(counts) == 0: + return 0 + + mean = sum(counts) / len(counts) + variance = sum((count - mean) ** 2 for count in counts) / len(counts) + + return math.sqrt(variance) + + +def _calculate_range_score(counts: list[int], modal_count: int) -> float: + if len(counts) == 0: + return 0 + + min_val = min(counts) + max_val = max(counts) + range_val = max_val - min_val + + if range_val == 0: + return 1 + if modal_count <= 0: + return 0 + + normalized_range = range_val / modal_count + return 1 / (1 + normalized_range) + + +def _calculate_transition_score(counts: list[int]) -> float: + if len(counts) <= 1: + return 1 + + transitions = 0 + for i in range(1, len(counts)): + if counts[i] != counts[i - 1]: + transitions += 1 + + max_transitions = len(counts) - 1 + transition_rate = transitions / max_transitions + + return 1 - transition_rate + + +def _calculate_mode_score(counts: list[int], modal_count: int) -> float: + if len(counts) == 0: + return 0 + + modal_frequency = sum(1 for count in counts if count == modal_count) + mode_ratio = modal_frequency / len(counts) + + return mode_ratio diff --git a/table/fairspec_table/utils/sniffer/uniformity_spec.py b/table/fairspec_table/utils/sniffer/uniformity_spec.py new file mode 100644 index 0000000..77a5d29 --- /dev/null +++ b/table/fairspec_table/utils/sniffer/uniformity_spec.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from .uniformity import calculate_tau0, calculate_tau1 + + +class TestCalculateTau0: + def test_uniform_field_counts(self): + assert calculate_tau0([3, 3, 3, 3, 3]) == 1 + + def test_varying_field_counts(self): + tau0 = calculate_tau0([3, 4, 3, 5, 3]) + assert tau0 < 1 + assert tau0 > 0 + + def test_empty_array(self): + assert calculate_tau0([]) == 0 + + def test_penalize_high_variance(self): + tau0_low = calculate_tau0([3, 3, 3, 4, 3]) + tau0_high = calculate_tau0([1, 5, 2, 8, 3]) + assert tau0_low > tau0_high + + +class TestCalculateTau1: + def test_consistent_field_counts(self): + tau1 = calculate_tau1([3, 3, 3, 3, 3], 3) + assert tau1 > 0.9 + + def test_varying_field_counts(self): + tau1 = calculate_tau1([3, 4, 5, 3, 4], 3) + assert tau1 < 1 + assert tau1 > 0 + + def test_empty_array(self): + assert calculate_tau1([], 0) == 0 + + def test_penalize_frequent_transitions(self): + tau1_few = calculate_tau1([3, 3, 3, 4, 4, 4], 3) + tau1_many = calculate_tau1([3, 4, 3, 4, 3, 4], 3) + assert tau1_few > tau1_many + + def test_favor_higher_mode_dominance(self): + tau1_high = calculate_tau1([3, 3, 3, 3, 4], 3) + tau1_low = calculate_tau1([3, 3, 4, 4, 5], 3) + assert tau1_high > tau1_low + + def test_penalize_wide_range(self): + tau1_narrow = calculate_tau1([3, 3, 4, 4, 3], 3) + tau1_wide = calculate_tau1([1, 3, 7, 3, 2], 3) + assert tau1_narrow > tau1_wide + + +class TestCombinedTau: + def test_uniform_data_scores_highly(self): + field_counts = [5, 5, 5, 5, 5] + tau0 = calculate_tau0(field_counts) + tau1 = calculate_tau1(field_counts, 5) + assert tau0 > 0.9 + assert tau1 > 0.9 + + def test_chaotic_data_scores_lowly(self): + field_counts = [1, 5, 2, 8, 3, 9, 1, 7] + tau0 = calculate_tau0(field_counts) + tau1 = calculate_tau1(field_counts, 1) + assert tau0 < 0.5 + assert tau1 < 0.5 diff --git a/table/pyproject.toml b/table/pyproject.toml new file mode 100644 index 0000000..39abcc3 --- /dev/null +++ b/table/pyproject.toml @@ -0,0 +1,53 @@ +[project] +name = "fairspec-table" +version = "0.0.0-dev" +description = "Fairspec Python is a fast data management framework built on top of the Fairspec standard and Polars DataFrames" +readme = "README.md" +requires-python = ">=3.12" +license = "MIT" +keywords = [ + "arrow", + "csv", + "data", + "dataframe", + "fair", + "fairspec", + "inline", + "json", + "jsonschema", + "ods", + "parquet", + "polars", + "quality", + "table", + "tableschema", + "typescript", + "validation", + "xlsx" +] +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.12", + "Topic :: Software Development :: Libraries :: Python Modules", +] +dependencies = [ + "fairspec-dataset", + "fairspec-metadata", + "isodate>=0.7", + "odfpy>=1.4", + "openpyxl>=3.1", + "polars>=1.0", + "shapely>=2.0", + "tzdata>=2024.1" +] + +[[project.authors]] +name = "Evgeny Karev" + +[project.urls] +homepage = "https://github.com/fairspec/fairspec-python" +repository = "https://github.com/fairspec/fairspec-python" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" diff --git a/terminal/README.md b/terminal/README.md new file mode 100644 index 0000000..3fc89c7 --- /dev/null +++ b/terminal/README.md @@ -0,0 +1,3 @@ +# fairspec-terminal + +Fairspec Python is a fast data management framework built on top of the Fairspec standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please read the [project's documentation](https://python.fairspec.org). diff --git a/terminal/fairspec_terminal/__init__.py b/terminal/fairspec_terminal/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/terminal/fairspec_terminal/actions/__init__.py b/terminal/fairspec_terminal/actions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/terminal/fairspec_terminal/actions/error/__init__.py b/terminal/fairspec_terminal/actions/error/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/terminal/fairspec_terminal/actions/error/render.py b/terminal/fairspec_terminal/actions/error/render.py new file mode 100644 index 0000000..5419378 --- /dev/null +++ b/terminal/fairspec_terminal/actions/error/render.py @@ -0,0 +1,191 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from fairspec_metadata import FairspecError + + +def render_error(error: FairspecError) -> str: + match error.type: + case "cell/type": + return _render_cell_type_error(error) + case "cell/missing": + return _render_cell_missing_error(error) + case "cell/minimum": + return _render_cell_minimum_error(error) + case "cell/maximum": + return _render_cell_maximum_error(error) + case "cell/exclusiveMinimum": + return _render_cell_exclusive_minimum_error(error) + case "cell/exclusiveMaximum": + return _render_cell_exclusive_maximum_error(error) + case "cell/multipleOf": + return _render_cell_multiple_of_error(error) + case "cell/minLength": + return _render_cell_min_length_error(error) + case "cell/maxLength": + return _render_cell_max_length_error(error) + case "cell/pattern": + return _render_cell_pattern_error(error) + case "cell/unique": + return _render_cell_unique_error(error) + case "cell/const": + return _render_cell_const_error(error) + case "cell/enum": + return _render_cell_enum_error(error) + case "cell/json": + return _render_cell_json_error(error) + case "cell/minItems": + return _render_cell_min_items_error(error) + case "cell/maxItems": + return _render_cell_max_items_error(error) + case "column/missing": + return _render_column_missing_error(error) + case "column/type": + return _render_column_type_error(error) + case "data": + return _render_data_error(error) + case "file/textual": + return _render_textual_error(error) + case "file/integrity": + return _render_integrity_error(error) + case "foreignKey": + return _render_foreign_key_error(error) + case "metadata": + return _render_metadata_error(error) + case "row/primaryKey": + return _render_row_primary_key_error(error) + case "row/uniqueKey": + return _render_row_unique_key_error(error) + case "resource/missing": + return _render_resource_missing_error(error) + case "resource/type": + return _render_resource_type_error(error) + case _: + return str(error) + + +def _b(value: object) -> str: + return f"[bold]{value}[/bold]" + + +def _in_resource(resource_name: str | None) -> str: + return f" in resource {_b(resource_name)}" if resource_name else "" + + +def _render_cell_type_error(error: object) -> str: + return f"Value of the cell {_b(error.cell)} in column {_b(error.columnName)} of row {_b(error.rowNumber)} is not {_b(error.columnType)}{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_cell_missing_error(error: object) -> str: + return f"A cell in column {_b(error.columnName)} of row {_b(error.rowNumber)} is missing{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_cell_minimum_error(error: object) -> str: + return f"Value of the cell {_b(error.cell)} in column {_b(error.columnName)} of row {_b(error.rowNumber)} is less than {_b(error.minimum)}{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_cell_maximum_error(error: object) -> str: + return f"Value of the cell {_b(error.cell)} in column {_b(error.columnName)} of row {_b(error.rowNumber)} is more than {_b(error.maximum)}{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_cell_exclusive_minimum_error(error: object) -> str: + return f"Value of the cell {_b(error.cell)} in column {_b(error.columnName)} of row {_b(error.rowNumber)} is less or equal to {_b(error.minimum)}{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_cell_exclusive_maximum_error(error: object) -> str: + return f"Value of the cell {_b(error.cell)} in column {_b(error.columnName)} of row {_b(error.rowNumber)} is greater or equal to {_b(error.maximum)}{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_cell_multiple_of_error(error: object) -> str: + return f"Value of the cell {_b(error.cell)} in column {_b(error.columnName)} of row {_b(error.rowNumber)} is not a multiple of {_b(error.multipleOf)}{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_cell_min_length_error(error: object) -> str: + return f"Length of the cell {_b(error.cell)} in column {_b(error.columnName)} of row {_b(error.rowNumber)} is less than {_b(error.minLength)}{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_cell_max_length_error(error: object) -> str: + return f"Length of the cell {_b(error.cell)} in column {_b(error.columnName)} of row {_b(error.rowNumber)} is more than {_b(error.maxLength)}{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_cell_pattern_error(error: object) -> str: + return f"Value of the cell {_b(error.cell)} in column {_b(error.columnName)} of row {_b(error.rowNumber)} does not match the {_b(error.pattern)}{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_cell_unique_error(error: object) -> str: + return f"Value of the cell {_b(error.cell)} in column {_b(error.columnName)} of row {_b(error.rowNumber)} is not unique{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_cell_const_error(error: object) -> str: + const_val = getattr(error, "const", None) + return f"Value of the cell {_b(error.cell)} in column {_b(error.columnName)} of row {_b(error.rowNumber)} is not allowed value {_b(const_val)}{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_cell_enum_error(error: object) -> str: + enum_values = ", ".join(_b(v) for v in error.enum) # type: ignore[union-attr] + return f"Value of the cell {_b(error.cell)} in column {_b(error.columnName)} of row {_b(error.rowNumber)} is not in the allowed values {enum_values}{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_cell_json_error(error: object) -> str: + return f"Value of the cell {_b(error.cell)} in column {_b(error.columnName)} of row {_b(error.rowNumber)} violates JSON schema at {_b(error.jsonPointer)}: {error.message}{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_cell_min_items_error(error: object) -> str: + return f"Value of the cell {_b(error.cell)} in column {_b(error.columnName)} of row {_b(error.rowNumber)} has less than {_b(error.minItems)} items{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_cell_max_items_error(error: object) -> str: + return f"Value of the cell {_b(error.cell)} in column {_b(error.columnName)} of row {_b(error.rowNumber)} has more than {_b(error.maxItems)} items{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_column_missing_error(error: object) -> str: + return f"Required column {_b(error.columnName)} is missing{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_column_type_error(error: object) -> str: + return f"Column {_b(error.columnName)} is expected to be {_b(error.expectedColumnType)}, but it is {_b(error.actualColumnType)}{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_data_error(error: object) -> str: + return f"Data error at {_b(error.jsonPointer)}: {error.message}{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_textual_error(error: object) -> str: + actual_encoding = getattr(error, "actualEncoding", None) + encoding_text = _b(actual_encoding) if actual_encoding else "binary" + return f"File is expected to be textual with utf-8 encoding but it is {encoding_text}{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_integrity_error(error: object) -> str: + return f"File hash {_b(error.hashType)} is expected to be {_b(error.expectedHash)}, but it is {_b(error.actualHash)}){_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_foreign_key_error(error: object) -> str: + cells = ", ".join(_b(c) for c in error.cells) # type: ignore[union-attr] + return f"Foreign key constraint violated as cells {cells} do not reference existing values{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_metadata_error(error: object) -> str: + return f"{error.message} at {_b(error.jsonPointer)}{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_row_primary_key_error(error: object) -> str: + column_names = ", ".join(_b(c) for c in error.columnNames) # type: ignore[union-attr] + return f"Row {_b(error.rowNumber)} violates primary key constraint on columns {column_names}{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_row_unique_key_error(error: object) -> str: + column_names = ", ".join(_b(c) for c in error.columnNames) # type: ignore[union-attr] + return f"Row {_b(error.rowNumber)} violates unique key constraint on columns {column_names}{_in_resource(error.resourceName)}" # type: ignore[union-attr] + + +def _render_resource_missing_error(error: object) -> str: + in_ref = _in_resource(getattr(error, "referencingResourceName", None)) + return f"Resource {_b(error.resourceName)} is missing, but expected{in_ref}" # type: ignore[union-attr] + + +def _render_resource_type_error(error: object) -> str: + in_ref = _in_resource(getattr(error, "referencingResourceName", None)) + return f"Resource {_b(error.resourceName)} is expected to be {_b(error.expectedResourceType)}{in_ref}" # type: ignore[union-attr] diff --git a/terminal/fairspec_terminal/commands/__init__.py b/terminal/fairspec_terminal/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/terminal/fairspec_terminal/commands/data/__init__.py b/terminal/fairspec_terminal/commands/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/terminal/fairspec_terminal/commands/data/infer_schema.py b/terminal/fairspec_terminal/commands/data/infer_schema.py new file mode 100644 index 0000000..c1f63d7 --- /dev/null +++ b/terminal/fairspec_terminal/commands/data/infer_schema.py @@ -0,0 +1,27 @@ +from fairspec_library import infer_data_schema +from fairspec_metadata import Resource + +from fairspec_terminal.params import Debug, Json, RequiredFilePath, Silent +from fairspec_terminal.program import data_program +from fairspec_terminal.session import Session + + +@data_program.command(name="infer-schema") +def infer_schema( + path: RequiredFilePath, + silent: Silent = False, + debug: Debug = False, + json: Json = False, +) -> None: + """Infer a Data Schema.""" + session = Session(silent=silent, debug=debug, json=json) + + def _infer() -> object: + data_schema = infer_data_schema(Resource(data=path)) + if not data_schema: + raise ValueError("Could not infer data schema") + return data_schema + + data_schema = session.task("Inferring data schema", _infer) + + session.render_data_result(data_schema) diff --git a/terminal/fairspec_terminal/commands/data/validate.py b/terminal/fairspec_terminal/commands/data/validate.py new file mode 100644 index 0000000..e413bcc --- /dev/null +++ b/terminal/fairspec_terminal/commands/data/validate.py @@ -0,0 +1,32 @@ +from fairspec_library import validate_data +from fairspec_metadata import Report, load_data_schema + +from fairspec_terminal.params import DataSchemaPath, Debug, Json, RequiredFilePath, Silent +from fairspec_terminal.program import data_program +from fairspec_terminal.session import Session + + +@data_program.command() +def validate( + path: RequiredFilePath, + schema: DataSchemaPath, + silent: Silent = False, + debug: Debug = False, + json: Json = False, +) -> None: + """Validate a JSON Data from a local or remote path.""" + session = Session(silent=silent, debug=debug, json=json) + + def _load_schema() -> object: + if not schema: + raise ValueError("No data schema provided") + return load_data_schema(schema) + + data_schema = session.task("Loading data schema", _load_schema) + + def _validate() -> Report: + return validate_data({"data": path, "dataSchema": data_schema}) # type: ignore[arg-type] + + report = session.task("Validating data", _validate) + + session.render_report_result(report) diff --git a/terminal/fairspec_terminal/commands/data/validate_schema.py b/terminal/fairspec_terminal/commands/data/validate_schema.py new file mode 100644 index 0000000..2e050df --- /dev/null +++ b/terminal/fairspec_terminal/commands/data/validate_schema.py @@ -0,0 +1,24 @@ +from fairspec_metadata import validate_data_schema + +from fairspec_terminal.params import Debug, Json, RequiredFilePath, Silent +from fairspec_terminal.program import data_program +from fairspec_terminal.session import Session + + +@data_program.command(name="validate-schema") +def validate_schema( + path: RequiredFilePath, + silent: Silent = False, + debug: Debug = False, + json: Json = False, +) -> None: + """Validate a Data Schema.""" + session = Session(silent=silent, debug=debug, json=json) + + def _validate() -> object: + result = validate_data_schema(path) + return result + + report = session.task("Validating data schema", _validate) + + session.render_report_result(report) # type: ignore[arg-type] diff --git a/terminal/fairspec_terminal/commands/dataset/__init__.py b/terminal/fairspec_terminal/commands/dataset/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/terminal/fairspec_terminal/commands/dataset/copy.py b/terminal/fairspec_terminal/commands/dataset/copy.py new file mode 100644 index 0000000..d9db679 --- /dev/null +++ b/terminal/fairspec_terminal/commands/dataset/copy.py @@ -0,0 +1,28 @@ +from fairspec_library import load_dataset, save_dataset + +from fairspec_terminal.params import Debug, Json, RequiredPath, Silent, ToPathRequired +from fairspec_terminal.program import dataset_program +from fairspec_terminal.session import Session + + +@dataset_program.command() +def copy( + path: RequiredPath, + to_path: ToPathRequired, + silent: Silent = False, + debug: Debug = False, + json: Json = False, +) -> None: + """Copy a local or remote dataset to a local folder.""" + session = Session(silent=silent, debug=debug, json=json) + + def _copy() -> None: + dataset = load_dataset(path) + save_dataset(dataset, target=to_path) # type: ignore[arg-type] + + session.task("Copy dataset", _copy) + + session.render_text_result( + f"Copied dataset from [bold]{path}[/bold] to [bold]{to_path}[/bold]", + status="success", + ) diff --git a/terminal/fairspec_terminal/commands/dataset/infer.py b/terminal/fairspec_terminal/commands/dataset/infer.py new file mode 100644 index 0000000..b7b9934 --- /dev/null +++ b/terminal/fairspec_terminal/commands/dataset/infer.py @@ -0,0 +1,62 @@ +from fairspec_library import infer_dataset +from fairspec_metadata import Dataset + +from fairspec_terminal.params import ( + ArrayType, + ColumnTypes, + CommaDecimal, + Confidence, + DateFormat, + DatetimeFormat, + Debug, + DecimalChar, + FalseValues, + GroupChar, + Json, + KeepStrings, + ListDelimiter, + ListItemType, + MissingValues, + MonthFirst, + SampleRows, + TimeFormat, + TrueValues, + VariadicPaths, +) +from fairspec_terminal.program import dataset_program +from fairspec_terminal.session import Session + + +@dataset_program.command() +def infer( + paths: VariadicPaths, + json: Json = False, + debug: Debug = False, + sample_rows: SampleRows = None, + confidence: Confidence = None, + comma_decimal: CommaDecimal = False, + month_first: MonthFirst = False, + keep_strings: KeepStrings = False, + column_types: ColumnTypes = None, + missing_values: MissingValues = None, + decimal_char: DecimalChar = None, + group_char: GroupChar = None, + true_values: TrueValues = None, + false_values: FalseValues = None, + datetime_format: DatetimeFormat = None, + date_format: DateFormat = None, + time_format: TimeFormat = None, + array_type: ArrayType = None, + list_delimiter: ListDelimiter = None, + list_item_type: ListItemType = None, +) -> None: + """Infer a dataset from local or remote file paths.""" + session = Session(debug=debug, json=json) + + def _infer() -> Dataset: + dataset = Dataset(resources=[{"data": data} for data in paths]) # type: ignore[list-item] + return infer_dataset(dataset) + + dataset = session.task("Inferring dataset", _infer) + + session.render_data_result(dataset.model_dump(exclude_none=True)) diff --git a/terminal/fairspec_terminal/commands/dataset/list_.py b/terminal/fairspec_terminal/commands/dataset/list_.py new file mode 100644 index 0000000..4bc76f5 --- /dev/null +++ b/terminal/fairspec_terminal/commands/dataset/list_.py @@ -0,0 +1,31 @@ +from fairspec_library import load_dataset +from fairspec_metadata import infer_resource_name + +from fairspec_terminal.params import Debug, Json, RequiredPath +from fairspec_terminal.program import dataset_program +from fairspec_terminal.session import Session + + +@dataset_program.command(name="list") +def list_( + path: RequiredPath, + json: Json = False, + debug: Debug = False, +) -> None: + """List Dataset resources.""" + session = Session(debug=debug, json=json) + + def _load() -> object: + dataset = load_dataset(path) + if not dataset: + raise ValueError("Could not load dataset") + return dataset + + dataset = session.task("Loading dataset", _load) + + resource_names = [ + resource.name or infer_resource_name(resource) + for resource in getattr(dataset, "resources", None) or [] + ] + + session.render_data_result(resource_names) diff --git a/terminal/fairspec_terminal/commands/dataset/script.py b/terminal/fairspec_terminal/commands/dataset/script.py new file mode 100644 index 0000000..6000bb8 --- /dev/null +++ b/terminal/fairspec_terminal/commands/dataset/script.py @@ -0,0 +1,32 @@ +import code + +import fairspec_library +from fairspec_library import load_dataset + +from fairspec_terminal.params import Debug, RequiredPath +from fairspec_terminal.program import dataset_program +from fairspec_terminal.session import Session + + +@dataset_program.command() +def script( + path: RequiredPath, + debug: Debug = False, +) -> None: + """Script a dataset descriptor.""" + session = Session(debug=debug) + + def _load() -> object: + return load_dataset(path) + + dataset = session.task("Loading dataset", _load) + + session.render_text( + "[dim]`fairspec` and `dataset` variables are available in the session[/dim]", + status="warning", + ) + + code.interact( + banner="", + local={"fairspec": fairspec_library, "dataset": dataset}, + ) diff --git a/terminal/fairspec_terminal/commands/dataset/validate.py b/terminal/fairspec_terminal/commands/dataset/validate.py new file mode 100644 index 0000000..9222418 --- /dev/null +++ b/terminal/fairspec_terminal/commands/dataset/validate.py @@ -0,0 +1,23 @@ +from fairspec_library import validate_dataset +from fairspec_metadata import Report + +from fairspec_terminal.params import Debug, Json, RequiredPath +from fairspec_terminal.program import dataset_program +from fairspec_terminal.session import Session + + +@dataset_program.command() +def validate( + path: RequiredPath, + json: Json = False, + debug: Debug = False, +) -> None: + """Validate a dataset from a local or remote path.""" + session = Session(debug=debug, json=json) + + def _validate() -> Report: + return validate_dataset(path) + + report = session.task("Validating dataset", _validate) + + session.render_report_result(report) diff --git a/terminal/fairspec_terminal/commands/file/__init__.py b/terminal/fairspec_terminal/commands/file/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/terminal/fairspec_terminal/commands/file/copy.py b/terminal/fairspec_terminal/commands/file/copy.py new file mode 100644 index 0000000..b0926e8 --- /dev/null +++ b/terminal/fairspec_terminal/commands/file/copy.py @@ -0,0 +1,34 @@ +from fairspec_dataset import copy_file + +from fairspec_terminal.helpers.file import select_file +from fairspec_terminal.params import Debug, FromDataset, FromResource, Json, OptionalPath, Silent, ToPathRequired +from fairspec_terminal.program import file_program +from fairspec_terminal.session import Session + + +@file_program.command() +def copy( + path: OptionalPath = None, + to_path: ToPathRequired = ..., # type: ignore[assignment] + dataset: FromDataset = None, + resource: FromResource = None, + silent: Silent = False, + debug: Debug = False, + json: Json = False, +) -> None: + """Copy a local or remote file to a local path.""" + session = Session(silent=silent, debug=debug, json=json) + + if not path: + path = select_file(session, dataset=dataset, resource=resource) + + def _copy() -> None: + assert path + copy_file(source_path=path, target_path=to_path) + + session.task("Copying file", _copy) + + session.render_text_result( + f"Copied file from [bold]{path}[/bold] to [bold]{to_path}[/bold]", + status="success", + ) diff --git a/terminal/fairspec_terminal/commands/file/describe.py b/terminal/fairspec_terminal/commands/file/describe.py new file mode 100644 index 0000000..190572e --- /dev/null +++ b/terminal/fairspec_terminal/commands/file/describe.py @@ -0,0 +1,31 @@ +from fairspec_dataset import describe_file + +from fairspec_terminal.helpers.file import select_file +from fairspec_terminal.params import Debug, FromDataset, FromResource, HashType, Json, OptionalPath, Silent +from fairspec_terminal.program import file_program +from fairspec_terminal.session import Session + + +@file_program.command() +def describe( + path: OptionalPath = None, + dataset: FromDataset = None, + resource: FromResource = None, + hash_type: HashType = "sha256", + silent: Silent = False, + debug: Debug = False, + json: Json = False, +) -> None: + """Show stats for a local or remote file.""" + session = Session(silent=silent, debug=debug, json=json) + + if not path: + path = select_file(session, dataset=dataset, resource=resource) + + def _describe() -> object: + assert path + return describe_file(path, hash_type=hash_type) + + stats = session.task("Describing file", _describe) + + session.render_data_result(stats) diff --git a/terminal/fairspec_terminal/commands/file/validate.py b/terminal/fairspec_terminal/commands/file/validate.py new file mode 100644 index 0000000..9afcf64 --- /dev/null +++ b/terminal/fairspec_terminal/commands/file/validate.py @@ -0,0 +1,31 @@ +from fairspec_dataset import validate_file +from fairspec_metadata import Report, Resource + +from fairspec_terminal.params import Debug, Hash_, HashType, Json, RequiredFilePath, Silent +from fairspec_terminal.program import file_program +from fairspec_terminal.session import Session + + +@file_program.command() +def validate( + path: RequiredFilePath, + hash: Hash_ = None, + hash_type: HashType = "sha256", + silent: Silent = False, + debug: Debug = False, + json: Json = False, +) -> None: + """Validate a file from a local or remote path.""" + session = Session(silent=silent, debug=debug, json=json) + + def _validate() -> Report: + integrity = ( + {"hash": hash, "type": hash_type or "md5"} + if hash + else None + ) + return validate_file(Resource(data=path, integrity=integrity)) # type: ignore[arg-type] + + report = session.task("Validating file", _validate) + + session.render_report_result(report) diff --git a/terminal/fairspec_terminal/commands/file_dialect/__init__.py b/terminal/fairspec_terminal/commands/file_dialect/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/terminal/fairspec_terminal/commands/file_dialect/infer.py b/terminal/fairspec_terminal/commands/file_dialect/infer.py new file mode 100644 index 0000000..b2084a4 --- /dev/null +++ b/terminal/fairspec_terminal/commands/file_dialect/infer.py @@ -0,0 +1,49 @@ +from fairspec_library import infer_file_dialect +from fairspec_metadata import Resource + +from fairspec_terminal.helpers.file import select_file +from fairspec_terminal.params import ( + Debug, + FromDataset, + FromResource, + Json, + OptionalPath, + SampleBytes, + Silent, +) +from fairspec_terminal.session import Session + + +def create_infer_dialect_command(program: object) -> None: + import typer + + assert isinstance(program, typer.Typer) + + @program.command(name="infer-dialect") + def infer_dialect( + path: OptionalPath = None, + dataset: FromDataset = None, + resource: FromResource = None, + sample_bytes: SampleBytes = None, + silent: Silent = False, + debug: Debug = False, + json: Json = False, + ) -> None: + """Infer the dialect of a file.""" + session = Session(silent=silent, debug=debug, json=json) + + if not path: + path = select_file(session, dataset=dataset, resource=resource) + + def _infer() -> object: + kwargs = {} + if sample_bytes is not None: + kwargs["sampleBytes"] = sample_bytes + file_dialect = infer_file_dialect(Resource(data=path), **kwargs) + if not file_dialect: + raise ValueError("Could not infer dialect") + return file_dialect + + file_dialect = session.task("Inferring dialect", _infer) + + session.render_data_result(file_dialect) diff --git a/terminal/fairspec_terminal/commands/table/__init__.py b/terminal/fairspec_terminal/commands/table/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/terminal/fairspec_terminal/commands/table/describe.py b/terminal/fairspec_terminal/commands/table/describe.py new file mode 100644 index 0000000..ac16b6b --- /dev/null +++ b/terminal/fairspec_terminal/commands/table/describe.py @@ -0,0 +1,129 @@ +import polars as pl +from fairspec_library import load_table +from fairspec_metadata import Resource + +from fairspec_terminal.helpers.file_dialect import create_file_dialect_from_path_and_options +from fairspec_terminal.helpers.resource import select_resource +from fairspec_terminal.params import ( + ArrayType, + ColumnNamesParam, + ColumnTypes, + CommaDecimal, + CommentPrefix, + CommentRows, + Confidence, + DateFormat, + DatetimeFormat, + Debug, + DecimalChar, + Delimiter, + Dialect, + FalseValues, + Format, + FromDataset, + FromResource, + GroupChar, + HeaderJoin, + HeaderRows, + Json, + JsonPointer, + KeepStrings, + LineTerminator, + ListDelimiter, + ListItemType, + MissingValues, + MonthFirst, + NullSequence, + OptionalPath, + QuoteChar, + RowType, + SampleRows, + SheetName, + SheetNumber, + TableName, + TableSchemaPath, + TimeFormat, + TrueValues, +) +from fairspec_terminal.program import table_program +from fairspec_terminal.session import Session + + +@table_program.command() +def describe( + path: OptionalPath = None, + dataset: FromDataset = None, + resource: FromResource = None, + json: Json = False, + debug: Debug = False, + dialect: Dialect = None, + format: Format = None, + delimiter: Delimiter = None, + line_terminator: LineTerminator = None, + quote_char: QuoteChar = None, + null_sequence: NullSequence = None, + header_rows: HeaderRows = None, + header_join: HeaderJoin = None, + comment_rows: CommentRows = None, + comment_prefix: CommentPrefix = None, + column_names: ColumnNamesParam = None, + json_pointer: JsonPointer = None, + row_type: RowType = None, + sheet_number: SheetNumber = None, + sheet_name: SheetName = None, + table_name: TableName = None, + schema: TableSchemaPath = None, + sample_rows: SampleRows = None, + confidence: Confidence = None, + comma_decimal: CommaDecimal = False, + month_first: MonthFirst = False, + keep_strings: KeepStrings = False, + column_types: ColumnTypes = None, + missing_values: MissingValues = None, + decimal_char: DecimalChar = None, + group_char: GroupChar = None, + true_values: TrueValues = None, + false_values: FalseValues = None, + datetime_format: DatetimeFormat = None, + date_format: DateFormat = None, + time_format: TimeFormat = None, + array_type: ArrayType = None, + list_delimiter: ListDelimiter = None, + list_item_type: ListItemType = None, +) -> None: + """Show stats for a table from a local or remote path.""" + session = Session(debug=debug, json=json) + + file_dialect = ( + dialect or create_file_dialect_from_path_and_options( + path, + format=format, delimiter=delimiter, line_terminator=line_terminator, + quote_char=quote_char, null_sequence=null_sequence, header_rows=header_rows, + header_join=header_join, comment_rows=comment_rows, comment_prefix=comment_prefix, + column_names=column_names, json_pointer=json_pointer, row_type=row_type, + sheet_number=sheet_number, sheet_name=sheet_name, table_name=table_name, + ) + ) if path else None + + res: Resource = ( + Resource(data=path, fileDialect=file_dialect, tableSchema=schema) + if path + else select_resource(session, dataset=dataset, resource=resource) + ) + + def _load() -> pl.LazyFrame: + table = load_table(res) + if table is None: + raise ValueError("Could not load table") + return table + + table = session.task("Loading table", _load) + + def _describe() -> pl.DataFrame: + frame = table.collect() + assert isinstance(frame, pl.DataFrame) + return frame.describe().rename({"statistic": "#"}) + + stats = session.task("Calculating stats", _describe) + + session.render_frame_result(stats) diff --git a/terminal/fairspec_terminal/commands/table/infer_schema.py b/terminal/fairspec_terminal/commands/table/infer_schema.py new file mode 100644 index 0000000..8217afa --- /dev/null +++ b/terminal/fairspec_terminal/commands/table/infer_schema.py @@ -0,0 +1,119 @@ +from fairspec_library import infer_table_schema +from fairspec_metadata import Resource, TableSchema + +from fairspec_terminal.helpers.file_dialect import create_file_dialect_from_path_and_options +from fairspec_terminal.helpers.resource import select_resource +from fairspec_terminal.params import ( + ArrayType, + ColumnNamesParam, + ColumnTypes, + CommaDecimal, + CommentPrefix, + CommentRows, + Confidence, + DateFormat, + DatetimeFormat, + Debug, + DecimalChar, + Delimiter, + Dialect, + FalseValues, + Format, + FromDataset, + FromResource, + GroupChar, + HeaderJoin, + HeaderRows, + Json, + JsonPointer, + KeepStrings, + LineTerminator, + ListDelimiter, + ListItemType, + MissingValues, + MonthFirst, + NullSequence, + OptionalPath, + QuoteChar, + RowType, + SampleRows, + SheetName, + SheetNumber, + TableName, + TimeFormat, + TrueValues, +) +from fairspec_terminal.program import table_program +from fairspec_terminal.session import Session + + +@table_program.command(name="infer-schema") +def infer_schema( + path: OptionalPath = None, + dataset: FromDataset = None, + resource: FromResource = None, + json: Json = False, + debug: Debug = False, + dialect: Dialect = None, + format: Format = None, + delimiter: Delimiter = None, + line_terminator: LineTerminator = None, + quote_char: QuoteChar = None, + null_sequence: NullSequence = None, + header_rows: HeaderRows = None, + header_join: HeaderJoin = None, + comment_rows: CommentRows = None, + comment_prefix: CommentPrefix = None, + column_names: ColumnNamesParam = None, + json_pointer: JsonPointer = None, + row_type: RowType = None, + sheet_number: SheetNumber = None, + sheet_name: SheetName = None, + table_name: TableName = None, + sample_rows: SampleRows = None, + confidence: Confidence = None, + comma_decimal: CommaDecimal = False, + month_first: MonthFirst = False, + keep_strings: KeepStrings = False, + column_types: ColumnTypes = None, + missing_values: MissingValues = None, + decimal_char: DecimalChar = None, + group_char: GroupChar = None, + true_values: TrueValues = None, + false_values: FalseValues = None, + datetime_format: DatetimeFormat = None, + date_format: DateFormat = None, + time_format: TimeFormat = None, + array_type: ArrayType = None, + list_delimiter: ListDelimiter = None, + list_item_type: ListItemType = None, +) -> None: + """Infer a table schema from a table.""" + session = Session(debug=debug, json=json) + + file_dialect = ( + dialect or create_file_dialect_from_path_and_options( + path, + format=format, delimiter=delimiter, line_terminator=line_terminator, + quote_char=quote_char, null_sequence=null_sequence, header_rows=header_rows, + header_join=header_join, comment_rows=comment_rows, comment_prefix=comment_prefix, + column_names=column_names, json_pointer=json_pointer, row_type=row_type, + sheet_number=sheet_number, sheet_name=sheet_name, table_name=table_name, + ) + ) if path else None + + res: Resource = ( + Resource(data=path, fileDialect=file_dialect) + if path + else select_resource(session, dataset=dataset, resource=resource) + ) + + def _infer() -> TableSchema: + table_schema = infer_table_schema(res) + if not table_schema: + raise ValueError("Could not infer table schema") + return table_schema + + table_schema = session.task("Inferring schema", _infer) + + session.render_data_result(table_schema.model_dump(exclude_none=True)) diff --git a/terminal/fairspec_terminal/commands/table/preview.py b/terminal/fairspec_terminal/commands/table/preview.py new file mode 100644 index 0000000..b79957e --- /dev/null +++ b/terminal/fairspec_terminal/commands/table/preview.py @@ -0,0 +1,129 @@ +import polars as pl +from fairspec_library import load_table +from fairspec_metadata import Resource + +from fairspec_terminal.helpers.file_dialect import create_file_dialect_from_path_and_options +from fairspec_terminal.helpers.resource import select_resource +from fairspec_terminal.params import ( + ArrayType, + ColumnNamesParam, + ColumnTypes, + CommaDecimal, + CommentPrefix, + CommentRows, + Confidence, + DateFormat, + DatetimeFormat, + Debug, + DecimalChar, + Delimiter, + Dialect, + FalseValues, + Format, + FromDataset, + FromResource, + GroupChar, + HeaderJoin, + HeaderRows, + Json, + JsonPointer, + KeepStrings, + LineTerminator, + ListDelimiter, + ListItemType, + MissingValues, + MonthFirst, + NullSequence, + OptionalPath, + QuoteChar, + RowType, + SampleRows, + SheetName, + SheetNumber, + TableName, + TableSchemaPath, + TimeFormat, + TrueValues, +) +from fairspec_terminal.program import table_program +from fairspec_terminal.session import Session + + +@table_program.command() +def preview( + path: OptionalPath = None, + dataset: FromDataset = None, + resource: FromResource = None, + json: Json = False, + debug: Debug = False, + dialect: Dialect = None, + format: Format = None, + delimiter: Delimiter = None, + line_terminator: LineTerminator = None, + quote_char: QuoteChar = None, + null_sequence: NullSequence = None, + header_rows: HeaderRows = None, + header_join: HeaderJoin = None, + comment_rows: CommentRows = None, + comment_prefix: CommentPrefix = None, + column_names: ColumnNamesParam = None, + json_pointer: JsonPointer = None, + row_type: RowType = None, + sheet_number: SheetNumber = None, + sheet_name: SheetName = None, + table_name: TableName = None, + schema: TableSchemaPath = None, + sample_rows: SampleRows = None, + confidence: Confidence = None, + comma_decimal: CommaDecimal = False, + month_first: MonthFirst = False, + keep_strings: KeepStrings = False, + column_types: ColumnTypes = None, + missing_values: MissingValues = None, + decimal_char: DecimalChar = None, + group_char: GroupChar = None, + true_values: TrueValues = None, + false_values: FalseValues = None, + datetime_format: DatetimeFormat = None, + date_format: DateFormat = None, + time_format: TimeFormat = None, + array_type: ArrayType = None, + list_delimiter: ListDelimiter = None, + list_item_type: ListItemType = None, +) -> None: + """Preview a table from a local or remote path.""" + session = Session(debug=debug, json=json) + + file_dialect = ( + dialect or create_file_dialect_from_path_and_options( + path, + format=format, delimiter=delimiter, line_terminator=line_terminator, + quote_char=quote_char, null_sequence=null_sequence, header_rows=header_rows, + header_join=header_join, comment_rows=comment_rows, comment_prefix=comment_prefix, + column_names=column_names, json_pointer=json_pointer, row_type=row_type, + sheet_number=sheet_number, sheet_name=sheet_name, table_name=table_name, + ) + ) if path else None + + res: Resource = ( + Resource(data=path, fileDialect=file_dialect, tableSchema=schema) + if path + else select_resource(session, dataset=dataset, resource=resource) + ) + + def _load() -> pl.LazyFrame: + table = load_table(res, previewBytes=10_000) + if table is None: + raise ValueError("Could not load table") + return table + + table = session.task("Loading table preview", _load) + + def _collect() -> pl.DataFrame: + result = table.limit(100).collect() + assert isinstance(result, pl.DataFrame) + return result + + frame = session.task("Collecting data", _collect) + + session.render_frame_result(frame) diff --git a/terminal/fairspec_terminal/commands/table/query.py b/terminal/fairspec_terminal/commands/table/query.py new file mode 100644 index 0000000..0837801 --- /dev/null +++ b/terminal/fairspec_terminal/commands/table/query.py @@ -0,0 +1,139 @@ +import polars as pl +from fairspec_library import load_table +from fairspec_metadata import Resource +from fairspec_table import query_table + +from fairspec_terminal.helpers.file_dialect import create_file_dialect_from_path_and_options +from fairspec_terminal.helpers.resource import select_resource +from fairspec_terminal.params import ( + ArrayType, + ColumnNamesParam, + ColumnTypes, + CommaDecimal, + CommentPrefix, + CommentRows, + Confidence, + DateFormat, + DatetimeFormat, + Debug, + DecimalChar, + Delimiter, + Dialect, + FalseValues, + Format, + FromDataset, + FromResource, + GroupChar, + HeaderJoin, + HeaderRows, + Json, + JsonPointer, + KeepStrings, + LineTerminator, + ListDelimiter, + ListItemType, + MissingValues, + MonthFirst, + NullSequence, + OptionalPath, + Query, + QuoteChar, + RowType, + SampleRows, + SheetName, + SheetNumber, + TableName, + TableSchemaPath, + TimeFormat, + TrueValues, +) +from fairspec_terminal.program import table_program +from fairspec_terminal.session import Session + + +@table_program.command() +def query( + path: OptionalPath = None, + sql_query: Query = None, + dataset: FromDataset = None, + resource: FromResource = None, + json: Json = False, + debug: Debug = False, + dialect: Dialect = None, + format: Format = None, + delimiter: Delimiter = None, + line_terminator: LineTerminator = None, + quote_char: QuoteChar = None, + null_sequence: NullSequence = None, + header_rows: HeaderRows = None, + header_join: HeaderJoin = None, + comment_rows: CommentRows = None, + comment_prefix: CommentPrefix = None, + column_names: ColumnNamesParam = None, + json_pointer: JsonPointer = None, + row_type: RowType = None, + sheet_number: SheetNumber = None, + sheet_name: SheetName = None, + table_name: TableName = None, + schema: TableSchemaPath = None, + sample_rows: SampleRows = None, + confidence: Confidence = None, + comma_decimal: CommaDecimal = False, + month_first: MonthFirst = False, + keep_strings: KeepStrings = False, + column_types: ColumnTypes = None, + missing_values: MissingValues = None, + decimal_char: DecimalChar = None, + group_char: GroupChar = None, + true_values: TrueValues = None, + false_values: FalseValues = None, + datetime_format: DatetimeFormat = None, + date_format: DateFormat = None, + time_format: TimeFormat = None, + array_type: ArrayType = None, + list_delimiter: ListDelimiter = None, + list_item_type: ListItemType = None, +) -> None: + """Query a table from a local or remote path.""" + session = Session(debug=debug, json=json) + + file_dialect = ( + dialect or create_file_dialect_from_path_and_options( + path, + format=format, delimiter=delimiter, line_terminator=line_terminator, + quote_char=quote_char, null_sequence=null_sequence, header_rows=header_rows, + header_join=header_join, comment_rows=comment_rows, comment_prefix=comment_prefix, + column_names=column_names, json_pointer=json_pointer, row_type=row_type, + sheet_number=sheet_number, sheet_name=sheet_name, table_name=table_name, + ) + ) if path else None + + res: Resource = ( + Resource(data=path, fileDialect=file_dialect, tableSchema=schema) + if path + else select_resource(session, dataset=dataset, resource=resource) + ) + + def _load() -> pl.LazyFrame: + table = load_table(res) + if table is None: + raise ValueError("Could not load table") + return table + + table = session.task("Loading table", _load) + + if sql_query: + def _query() -> pl.LazyFrame: + assert sql_query + return query_table(table, sql_query) + + table = session.task("Executing query", _query) + + def _collect() -> pl.DataFrame: + result = table.collect() + assert isinstance(result, pl.DataFrame) + return result + + frame = session.task("Collecting data", _collect) + + session.render_frame_result(frame) diff --git a/terminal/fairspec_terminal/commands/table/render_schema.py b/terminal/fairspec_terminal/commands/table/render_schema.py new file mode 100644 index 0000000..377fb76 --- /dev/null +++ b/terminal/fairspec_terminal/commands/table/render_schema.py @@ -0,0 +1,58 @@ +from typing import Annotated + +import typer +from fairspec_library import render_table_schema_as +from fairspec_metadata import RenderTableSchemaOptions, TableSchema, resolve_table_schema + +from fairspec_terminal.params import Debug, Json, RequiredFilePath, Silent, ToPath +from fairspec_terminal.program import table_program +from fairspec_terminal.session import Session + +ToFormat = Annotated[str, typer.Option("--to-format", help="target schema format")] + + +@table_program.command(name="render-schema") +def render_schema( + path: RequiredFilePath, + to_format: ToFormat, + to_path: ToPath = None, + silent: Silent = False, + debug: Debug = False, + json: Json = False, +) -> None: + """Render a Table Schema as HTML or Markdown.""" + session = Session(silent=silent, debug=debug, json=json) + + if not to_format: + raise ValueError("--to-format must be specified") + + def _load() -> TableSchema | None: + return resolve_table_schema(path) + + table_schema = session.task("Loading table schema", _load) + + if not table_schema: + raise ValueError("Could not load table schema") + + def _render() -> str | None: + return render_table_schema_as(table_schema, RenderTableSchemaOptions(format=to_format)) + + rendered = session.task("Rendering table schema", _render) + + def _save() -> bool: + if not to_path: + return False + with open(to_path, "w", encoding="utf-8") as f: + f.write(rendered or "") + return True + + is_saved = session.task("Saving rendered schema", _save) + + if not is_saved: + session.render_text_result(rendered or "") + return + + session.render_text_result( + f"Saved rendered schema to [bold]{to_path}[/bold]", + status="success", + ) diff --git a/terminal/fairspec_terminal/commands/table/script.py b/terminal/fairspec_terminal/commands/table/script.py new file mode 100644 index 0000000..7cee5fa --- /dev/null +++ b/terminal/fairspec_terminal/commands/table/script.py @@ -0,0 +1,132 @@ +import code + +import polars as pl + +import fairspec_library +from fairspec_library import load_table +from fairspec_metadata import Resource + +from fairspec_terminal.helpers.file_dialect import create_file_dialect_from_path_and_options +from fairspec_terminal.helpers.resource import select_resource +from fairspec_terminal.params import ( + ArrayType, + ColumnNamesParam, + ColumnTypes, + CommaDecimal, + CommentPrefix, + CommentRows, + Confidence, + DateFormat, + DatetimeFormat, + Debug, + DecimalChar, + Delimiter, + Dialect, + FalseValues, + Format, + FromDataset, + FromResource, + GroupChar, + HeaderJoin, + HeaderRows, + JsonPointer, + KeepStrings, + LineTerminator, + ListDelimiter, + ListItemType, + MissingValues, + MonthFirst, + NullSequence, + OptionalPath, + QuoteChar, + RowType, + SampleRows, + SheetName, + SheetNumber, + TableName, + TableSchemaPath, + TimeFormat, + TrueValues, +) +from fairspec_terminal.program import table_program +from fairspec_terminal.session import Session + + +@table_program.command() +def script( + path: OptionalPath = None, + dataset: FromDataset = None, + resource: FromResource = None, + debug: Debug = False, + dialect: Dialect = None, + format: Format = None, + delimiter: Delimiter = None, + line_terminator: LineTerminator = None, + quote_char: QuoteChar = None, + null_sequence: NullSequence = None, + header_rows: HeaderRows = None, + header_join: HeaderJoin = None, + comment_rows: CommentRows = None, + comment_prefix: CommentPrefix = None, + column_names: ColumnNamesParam = None, + json_pointer: JsonPointer = None, + row_type: RowType = None, + sheet_number: SheetNumber = None, + sheet_name: SheetName = None, + table_name: TableName = None, + schema: TableSchemaPath = None, + sample_rows: SampleRows = None, + confidence: Confidence = None, + comma_decimal: CommaDecimal = False, + month_first: MonthFirst = False, + keep_strings: KeepStrings = False, + column_types: ColumnTypes = None, + missing_values: MissingValues = None, + decimal_char: DecimalChar = None, + group_char: GroupChar = None, + true_values: TrueValues = None, + false_values: FalseValues = None, + datetime_format: DatetimeFormat = None, + date_format: DateFormat = None, + time_format: TimeFormat = None, + array_type: ArrayType = None, + list_delimiter: ListDelimiter = None, + list_item_type: ListItemType = None, +) -> None: + """Start a scripting session for a table.""" + session = Session(debug=debug) + + file_dialect = ( + dialect or create_file_dialect_from_path_and_options( + path, + format=format, delimiter=delimiter, line_terminator=line_terminator, + quote_char=quote_char, null_sequence=null_sequence, header_rows=header_rows, + header_join=header_join, comment_rows=comment_rows, comment_prefix=comment_prefix, + column_names=column_names, json_pointer=json_pointer, row_type=row_type, + sheet_number=sheet_number, sheet_name=sheet_name, table_name=table_name, + ) + ) if path else None + + res: Resource = ( + Resource(data=path, fileDialect=file_dialect, tableSchema=schema) + if path + else select_resource(session, dataset=dataset, resource=resource) + ) + + def _load() -> pl.LazyFrame: + table = load_table(res, denormalized=True) + if table is None: + raise ValueError("Could not load table") + return table + + table = session.task("Loading table", _load) + + session.render_text( + "[dim]`fairspec` and `table` variables are available in the session[/dim]", + status="warning", + ) + + code.interact( + banner="", + local={"fairspec": fairspec_library, "table": table}, + ) diff --git a/terminal/fairspec_terminal/commands/table/validate.py b/terminal/fairspec_terminal/commands/table/validate.py new file mode 100644 index 0000000..bf922f8 --- /dev/null +++ b/terminal/fairspec_terminal/commands/table/validate.py @@ -0,0 +1,140 @@ +import polars as pl +from fairspec_library import load_table +from fairspec_metadata import Resource, Report, create_report, resolve_table_schema +from fairspec_table import infer_table_schema_from_table, inspect_table + +from fairspec_terminal.helpers.file_dialect import create_file_dialect_from_path_and_options +from fairspec_terminal.helpers.resource import select_resource +from fairspec_terminal.params import ( + ArrayType, + ColumnNamesParam, + ColumnTypes, + CommaDecimal, + CommentPrefix, + CommentRows, + Confidence, + DateFormat, + DatetimeFormat, + Debug, + DecimalChar, + Delimiter, + Dialect, + FalseValues, + Format, + FromDataset, + FromResource, + GroupChar, + HeaderJoin, + HeaderRows, + Json, + JsonPointer, + KeepStrings, + LineTerminator, + ListDelimiter, + ListItemType, + MissingValues, + MonthFirst, + NullSequence, + OptionalPath, + QuoteChar, + RowType, + SampleRows, + SheetName, + SheetNumber, + TableName, + TableSchemaPath, + TimeFormat, + TrueValues, +) +from fairspec_terminal.program import table_program +from fairspec_terminal.session import Session + + +@table_program.command() +def validate( + path: OptionalPath = None, + dataset: FromDataset = None, + resource: FromResource = None, + json: Json = False, + debug: Debug = False, + dialect: Dialect = None, + format: Format = None, + delimiter: Delimiter = None, + line_terminator: LineTerminator = None, + quote_char: QuoteChar = None, + null_sequence: NullSequence = None, + header_rows: HeaderRows = None, + header_join: HeaderJoin = None, + comment_rows: CommentRows = None, + comment_prefix: CommentPrefix = None, + column_names: ColumnNamesParam = None, + json_pointer: JsonPointer = None, + row_type: RowType = None, + sheet_number: SheetNumber = None, + sheet_name: SheetName = None, + table_name: TableName = None, + schema: TableSchemaPath = None, + sample_rows: SampleRows = None, + confidence: Confidence = None, + comma_decimal: CommaDecimal = False, + month_first: MonthFirst = False, + keep_strings: KeepStrings = False, + column_types: ColumnTypes = None, + missing_values: MissingValues = None, + decimal_char: DecimalChar = None, + group_char: GroupChar = None, + true_values: TrueValues = None, + false_values: FalseValues = None, + datetime_format: DatetimeFormat = None, + date_format: DateFormat = None, + time_format: TimeFormat = None, + array_type: ArrayType = None, + list_delimiter: ListDelimiter = None, + list_item_type: ListItemType = None, +) -> None: + """Validate a table from a local or remote path.""" + session = Session(debug=debug, json=json) + + file_dialect = ( + dialect or create_file_dialect_from_path_and_options( + path, + format=format, delimiter=delimiter, line_terminator=line_terminator, + quote_char=quote_char, null_sequence=null_sequence, header_rows=header_rows, + header_join=header_join, comment_rows=comment_rows, comment_prefix=comment_prefix, + column_names=column_names, json_pointer=json_pointer, row_type=row_type, + sheet_number=sheet_number, sheet_name=sheet_name, table_name=table_name, + ) + ) if path else None + + res: Resource = ( + Resource(data=path, fileDialect=file_dialect, tableSchema=schema) + if path + else select_resource(session, dataset=dataset, resource=resource) + ) + + def _load() -> pl.LazyFrame: + table = load_table(res, denormalized=True) + if table is None: + raise ValueError("Could not load table") + return table + + table = session.task("Loading table", _load) + + table_schema = session.task( + "Loading schema", + lambda: resolve_table_schema(res.tableSchema), + ) + + if not table_schema: + table_schema = session.task( + "Inferring schema", + lambda: infer_table_schema_from_table(table), + ) + + def _validate() -> Report: + errors = list(inspect_table(table, table_schema=table_schema)) + return create_report(errors) # type: ignore[arg-type] + + report = session.task("Validating table", _validate) + + session.render_report_result(report) diff --git a/terminal/fairspec_terminal/commands/table/validate_schema.py b/terminal/fairspec_terminal/commands/table/validate_schema.py new file mode 100644 index 0000000..3cb177a --- /dev/null +++ b/terminal/fairspec_terminal/commands/table/validate_schema.py @@ -0,0 +1,24 @@ +from fairspec_metadata import validate_table_schema + +from fairspec_terminal.params import Debug, Json, RequiredFilePath, Silent +from fairspec_terminal.program import table_program +from fairspec_terminal.session import Session + + +@table_program.command(name="validate-schema") +def validate_schema( + path: RequiredFilePath, + silent: Silent = False, + debug: Debug = False, + json: Json = False, +) -> None: + """Validate a Table Schema.""" + session = Session(silent=silent, debug=debug, json=json) + + def _validate() -> object: + result = validate_table_schema(path) + return result + + report = session.task("Validating table schema", _validate) + + session.render_report_result(report) # type: ignore[arg-type] diff --git a/terminal/fairspec_terminal/helpers/__init__.py b/terminal/fairspec_terminal/helpers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/terminal/fairspec_terminal/helpers/file.py b/terminal/fairspec_terminal/helpers/file.py new file mode 100644 index 0000000..e3c2509 --- /dev/null +++ b/terminal/fairspec_terminal/helpers/file.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata import Resource, get_data_first_path + +from .resource import select_resource + +if TYPE_CHECKING: + from fairspec_terminal.session import Session + + +def select_file( + session: Session, + *, + dataset: str | None = None, + resource: str | None = None, +) -> str: + selected = select_resource(session, dataset=dataset, resource=resource) + + path = session.task("Selecting file", lambda: _get_first_path(selected)) + + return path + + +def _get_first_path(resource: Resource) -> str: + first_path = get_data_first_path(resource) + if not first_path: + raise ValueError("Resource does not have files") + return first_path diff --git a/terminal/fairspec_terminal/helpers/file_dialect.py b/terminal/fairspec_terminal/helpers/file_dialect.py new file mode 100644 index 0000000..1ebd8da --- /dev/null +++ b/terminal/fairspec_terminal/helpers/file_dialect.py @@ -0,0 +1,118 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_metadata import Resource, infer_file_dialect_format + +if TYPE_CHECKING: + from fairspec_metadata import FileDialect + + +def create_file_dialect_from_path_and_options( + path: str, + *, + format: str | None = None, + delimiter: str | None = None, + line_terminator: str | None = None, + quote_char: str | None = None, + null_sequence: str | None = None, + header_rows: str | None = None, + header_join: str | None = None, + comment_rows: str | None = None, + comment_prefix: str | None = None, + column_names: str | None = None, + json_pointer: str | None = None, + row_type: str | None = None, + sheet_number: int | None = None, + sheet_name: str | None = None, + table_name: str | None = None, +) -> FileDialect | None: + resolved_format = format or infer_file_dialect_format(Resource(data=path)) + + parsed_header_rows = _parse_header_rows(header_rows) if header_rows else None + parsed_comment_rows = _parse_int_list(comment_rows) if comment_rows else None + parsed_column_names = column_names.split(",") if column_names else None + + if resolved_format in ("csv", "tsv"): + file_dialect: dict[str, object] = {"format": resolved_format} + + if line_terminator: + file_dialect["lineTerminator"] = line_terminator + if null_sequence: + file_dialect["nullSequence"] = null_sequence + if parsed_header_rows is not None: + file_dialect["headerRows"] = parsed_header_rows + if header_join: + file_dialect["headerJoin"] = header_join + if parsed_comment_rows: + file_dialect["commentRows"] = parsed_comment_rows + if comment_prefix: + file_dialect["commentPrefix"] = comment_prefix + if parsed_column_names: + file_dialect["columnNames"] = parsed_column_names + + if resolved_format == "csv": + if delimiter: + file_dialect["delimiter"] = delimiter + if quote_char: + file_dialect["quoteChar"] = quote_char + + return file_dialect # type: ignore[return-value] + + if resolved_format in ("xlsx", "ods"): + file_dialect = {"format": resolved_format} + + if sheet_number is not None: + file_dialect["sheetNumber"] = sheet_number + if sheet_name: + file_dialect["sheetName"] = sheet_name + if parsed_header_rows is not None: + file_dialect["headerRows"] = parsed_header_rows + if header_join: + file_dialect["headerJoin"] = header_join + if parsed_comment_rows: + file_dialect["commentRows"] = parsed_comment_rows + if comment_prefix: + file_dialect["commentPrefix"] = comment_prefix + + return file_dialect # type: ignore[return-value] + + if resolved_format in ("json", "jsonl"): + file_dialect = {"format": resolved_format} + + if parsed_header_rows is not None: + file_dialect["headerRows"] = parsed_header_rows + if header_join: + file_dialect["headerJoin"] = header_join + if parsed_comment_rows: + file_dialect["commentRows"] = parsed_comment_rows + if comment_prefix: + file_dialect["commentPrefix"] = comment_prefix + if row_type: + file_dialect["rowType"] = row_type + + if resolved_format == "json": + if json_pointer: + file_dialect["jsonPointer"] = json_pointer + + return file_dialect # type: ignore[return-value] + + if resolved_format == "sqlite": + file_dialect = {"format": resolved_format} + + if table_name: + file_dialect["tableName"] = table_name + + return file_dialect # type: ignore[return-value] + + return None + + +def _parse_header_rows(value: str) -> list[int] | bool: + if value == "false": + return False + return [int(x) for x in value.split(",")] + + +def _parse_int_list(value: str) -> list[int]: + return [int(x) for x in value.split(",")] diff --git a/terminal/fairspec_terminal/helpers/resource.py b/terminal/fairspec_terminal/helpers/resource.py new file mode 100644 index 0000000..de70d29 --- /dev/null +++ b/terminal/fairspec_terminal/helpers/resource.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fairspec_library import infer_resource_name, load_dataset + +if TYPE_CHECKING: + from fairspec_metadata import Resource + + from fairspec_terminal.session import Session + + +def select_resource( + session: Session, + *, + dataset: str | None = None, + resource: str | None = None, +) -> Resource: + loaded = session.task("Loading dataset", lambda: _load_dataset(dataset)) + + selected = session.task("Selecting resource", lambda: _find_resource(loaded, resource)) + + return selected + + +def _load_dataset(dataset_path: str | None) -> object: + if not dataset_path: + raise ValueError("Please provide a path argument or a dataset option") + + result = load_dataset(dataset_path) + if not result: + raise ValueError("Could not load dataset") + + return result + + +def _find_resource(dataset: object, resource_name: str | None) -> Resource: + if not resource_name: + raise ValueError("Please provide a resource option") + + for res in getattr(dataset, "resources", None) or []: + name = res.name or infer_resource_name(res) + if resource_name == name: + return res + + raise ValueError(f'Resource "{resource_name}" not found') diff --git a/terminal/fairspec_terminal/main.py b/terminal/fairspec_terminal/main.py new file mode 100644 index 0000000..1324595 --- /dev/null +++ b/terminal/fairspec_terminal/main.py @@ -0,0 +1,6 @@ +from fairspec_terminal.program import program, register_commands + + +def main() -> None: + register_commands() + program() diff --git a/terminal/fairspec_terminal/params/__init__.py b/terminal/fairspec_terminal/params/__init__.py new file mode 100644 index 0000000..f9cdfc6 --- /dev/null +++ b/terminal/fairspec_terminal/params/__init__.py @@ -0,0 +1,55 @@ +from .data_schema import DataSchemaPath as DataSchemaPath +from .dataset import FromDataset as FromDataset +from .dataset import FromResource as FromResource +from .dataset import ToArchive as ToArchive +from .dataset import ToFolder as ToFolder +from .dataset import WithRemote as WithRemote +from .file import Bytes_ as Bytes_ +from .file import Hash_ as Hash_ +from .file import HashType as HashType +from .file import SampleBytes as SampleBytes +from .file_dialect import ColumnNamesParam as ColumnNamesParam +from .file_dialect import CommentPrefix as CommentPrefix +from .file_dialect import CommentRows as CommentRows +from .file_dialect import Delimiter as Delimiter +from .file_dialect import Dialect as Dialect +from .file_dialect import Format as Format +from .file_dialect import HeaderJoin as HeaderJoin +from .file_dialect import HeaderRows as HeaderRows +from .file_dialect import JsonPointer as JsonPointer +from .file_dialect import LineTerminator as LineTerminator +from .file_dialect import NullSequence as NullSequence +from .file_dialect import QuoteChar as QuoteChar +from .file_dialect import RowType as RowType +from .file_dialect import SheetName as SheetName +from .file_dialect import SheetNumber as SheetNumber +from .file_dialect import TableName as TableName +from .path import OptionalPath as OptionalPath +from .path import RequiredFilePath as RequiredFilePath +from .path import RequiredPath as RequiredPath +from .path import ToPath as ToPath +from .path import ToPathRequired as ToPathRequired +from .path import VariadicPaths as VariadicPaths +from .session import Debug as Debug +from .session import Json as Json +from .session import Silent as Silent +from .table import Overwrite as Overwrite +from .table import Query as Query +from .table_schema import ArrayType as ArrayType +from .table_schema import ColumnTypes as ColumnTypes +from .table_schema import CommaDecimal as CommaDecimal +from .table_schema import Confidence as Confidence +from .table_schema import DateFormat as DateFormat +from .table_schema import DatetimeFormat as DatetimeFormat +from .table_schema import DecimalChar as DecimalChar +from .table_schema import FalseValues as FalseValues +from .table_schema import GroupChar as GroupChar +from .table_schema import KeepStrings as KeepStrings +from .table_schema import ListDelimiter as ListDelimiter +from .table_schema import ListItemType as ListItemType +from .table_schema import MissingValues as MissingValues +from .table_schema import MonthFirst as MonthFirst +from .table_schema import SampleRows as SampleRows +from .table_schema import TableSchemaPath as TableSchemaPath +from .table_schema import TimeFormat as TimeFormat +from .table_schema import TrueValues as TrueValues diff --git a/terminal/fairspec_terminal/params/data_schema.py b/terminal/fairspec_terminal/params/data_schema.py new file mode 100644 index 0000000..09b072a --- /dev/null +++ b/terminal/fairspec_terminal/params/data_schema.py @@ -0,0 +1,5 @@ +from typing import Annotated + +import typer + +DataSchemaPath = Annotated[str, typer.Option("--schema", help="path to a data schema descriptor (JSON Schema)")] diff --git a/terminal/fairspec_terminal/params/dataset.py b/terminal/fairspec_terminal/params/dataset.py new file mode 100644 index 0000000..0830e0e --- /dev/null +++ b/terminal/fairspec_terminal/params/dataset.py @@ -0,0 +1,9 @@ +from typing import Annotated + +import typer + +WithRemote = Annotated[bool, typer.Option("--with-remote", help="include remote resources")] +FromDataset = Annotated[str | None, typer.Option("-d", "--dataset", help="dataset to select resource from")] +FromResource = Annotated[str | None, typer.Option("-r", "--resource", help="resource in provided dataset")] +ToFolder = Annotated[str, typer.Option("--to-folder", help="a local output folder path")] +ToArchive = Annotated[str, typer.Option("--to-archive", help="a local output zip file path")] diff --git a/terminal/fairspec_terminal/params/file.py b/terminal/fairspec_terminal/params/file.py new file mode 100644 index 0000000..3eab758 --- /dev/null +++ b/terminal/fairspec_terminal/params/file.py @@ -0,0 +1,8 @@ +from typing import Annotated + +import typer + +HashType = Annotated[str, typer.Option("--hash-type", help="hash type")] +Bytes_ = Annotated[str | None, typer.Option("--bytes", help="expected file size in bytes")] +SampleBytes = Annotated[int | None, typer.Option("--sample-bytes", help="sample size in bytes")] +Hash_ = Annotated[str | None, typer.Option("--hash", help="expected file hash calculated with the specified hash type")] diff --git a/terminal/fairspec_terminal/params/file_dialect.py b/terminal/fairspec_terminal/params/file_dialect.py new file mode 100644 index 0000000..3798eaa --- /dev/null +++ b/terminal/fairspec_terminal/params/file_dialect.py @@ -0,0 +1,20 @@ +from typing import Annotated + +import typer + +Dialect = Annotated[str | None, typer.Option("--dialect", help="path to a Dialect descriptor")] +Format = Annotated[str | None, typer.Option("--format", help="format type (csv, json, xlsx, etc)")] +Delimiter = Annotated[str | None, typer.Option("--delimiter", help="character used to separate fields in the data")] +LineTerminator = Annotated[str | None, typer.Option("--line-terminator", help="character sequence used to terminate rows")] +QuoteChar = Annotated[str | None, typer.Option("--quote-char", help="character used to quote fields")] +NullSequence = Annotated[str | None, typer.Option("--null-sequence", help="character sequence representing null or missing values")] +HeaderRows = Annotated[str | None, typer.Option("--header-rows", help="comma-separated row numbers (1-indexed) for headers, or 'false' to disable")] +HeaderJoin = Annotated[str | None, typer.Option("--header-join", help="character used to join multi-line headers")] +CommentRows = Annotated[str | None, typer.Option("--comment-rows", help="comma-separated row numbers (1-indexed) to exclude from data")] +CommentPrefix = Annotated[str | None, typer.Option("--comment-prefix", help="character sequence denoting the start of a comment line")] +ColumnNamesParam = Annotated[str | None, typer.Option("--column-names", help="comma-separated list of column names")] +JsonPointer = Annotated[str | None, typer.Option("--json-pointer", help="JSON pointer to the data array within the JSON document")] +RowType = Annotated[str | None, typer.Option("--row-type", help="the type of each row in the data")] +SheetNumber = Annotated[int | None, typer.Option("--sheet-number", help="for spreadsheet data, the sheet number to read (0-indexed)")] +SheetName = Annotated[str | None, typer.Option("--sheet-name", help="for spreadsheet data, the sheet name to read")] +TableName = Annotated[str | None, typer.Option("--table-name", help="for database sources, the table name to read")] diff --git a/terminal/fairspec_terminal/params/path.py b/terminal/fairspec_terminal/params/path.py new file mode 100644 index 0000000..7c7f8e7 --- /dev/null +++ b/terminal/fairspec_terminal/params/path.py @@ -0,0 +1,10 @@ +from typing import Annotated + +import typer + +OptionalPath = Annotated[str | None, typer.Argument(help="local or remote path")] +RequiredPath = Annotated[str, typer.Argument(help="local or remote path")] +RequiredFilePath = Annotated[str, typer.Argument(help="local or remote path to the file")] +VariadicPaths = Annotated[list[str], typer.Argument(help="local paths to files")] +ToPath = Annotated[str | None, typer.Option("--to-path", help="a local output path")] +ToPathRequired = Annotated[str, typer.Option("--to-path", help="a local output path")] diff --git a/terminal/fairspec_terminal/params/session.py b/terminal/fairspec_terminal/params/session.py new file mode 100644 index 0000000..fd733b0 --- /dev/null +++ b/terminal/fairspec_terminal/params/session.py @@ -0,0 +1,7 @@ +from typing import Annotated + +import typer + +Json = Annotated[bool, typer.Option("--json", help="output as JSON")] +Debug = Annotated[bool, typer.Option("--debug", help="Enable debug mode to print exception details to stderr")] +Silent = Annotated[bool, typer.Option("--silent", help="suppress all output except errors")] diff --git a/terminal/fairspec_terminal/params/table.py b/terminal/fairspec_terminal/params/table.py new file mode 100644 index 0000000..44ecdc3 --- /dev/null +++ b/terminal/fairspec_terminal/params/table.py @@ -0,0 +1,6 @@ +from typing import Annotated + +import typer + +Query = Annotated[str | None, typer.Argument(help="a SQL query to execute against a table (use `self` to refer to the table)")] +Overwrite = Annotated[bool, typer.Option("--overwrite", help="whether to overwrite a file if it already exists")] diff --git a/terminal/fairspec_terminal/params/table_schema.py b/terminal/fairspec_terminal/params/table_schema.py new file mode 100644 index 0000000..102261d --- /dev/null +++ b/terminal/fairspec_terminal/params/table_schema.py @@ -0,0 +1,22 @@ +from typing import Annotated + +import typer + +TableSchemaPath = Annotated[str | None, typer.Option("--schema", help="path to a Table Schema descriptor")] +SampleRows = Annotated[int | None, typer.Option("--sample-rows", help="number of rows to sample for schema inference")] +Confidence = Annotated[float | None, typer.Option("--confidence", help="confidence threshold for schema inference")] +CommaDecimal = Annotated[bool, typer.Option("--comma-decimal", help="use comma as decimal separator in schema inference")] +MonthFirst = Annotated[bool, typer.Option("--month-first", help="interpret dates as month-first in schema inference")] +KeepStrings = Annotated[bool, typer.Option("--keep-strings", help="keep fields as strings instead of inferring types")] +ColumnTypes = Annotated[str | None, typer.Option("--column-types", help="a list of comma-separated column name:type pairs to use for the schema")] +MissingValues = Annotated[str | None, typer.Option("--missing-values", help="comma-separated values to treat as missing")] +DecimalChar = Annotated[str | None, typer.Option("--decimal-char", help="character to use as decimal separator")] +GroupChar = Annotated[str | None, typer.Option("--group-char", help="character to use for digit grouping")] +TrueValues = Annotated[str | None, typer.Option("--true-values", help="values to treat as true")] +FalseValues = Annotated[str | None, typer.Option("--false-values", help="values to treat as false")] +DatetimeFormat = Annotated[str | None, typer.Option("--datetime-format", help="datetime format pattern")] +DateFormat = Annotated[str | None, typer.Option("--date-format", help="date format pattern")] +TimeFormat = Annotated[str | None, typer.Option("--time-format", help="time format pattern")] +ArrayType = Annotated[str | None, typer.Option("--array-type", help="array type (array or list)")] +ListDelimiter = Annotated[str | None, typer.Option("--list-delimiter", help="delimiter for list values")] +ListItemType = Annotated[str | None, typer.Option("--list-item-type", help="type of items in lists")] diff --git a/terminal/fairspec_terminal/program.py b/terminal/fairspec_terminal/program.py new file mode 100644 index 0000000..f22ca59 --- /dev/null +++ b/terminal/fairspec_terminal/program.py @@ -0,0 +1,39 @@ +import typer + +program = typer.Typer(name="fairspec", help="Fairspec Python is a fast data management framework built on top of the Fairspec standard and Polars DataFrames") +dataset_program = typer.Typer(help="Dataset related commands") +table_program = typer.Typer(help="Table related commands") +data_program = typer.Typer(help="Data related commands") +file_program = typer.Typer(help="File related commands") + +program.add_typer(dataset_program, name="dataset") +program.add_typer(table_program, name="table") +program.add_typer(data_program, name="data") +program.add_typer(file_program, name="file") + + +def register_commands() -> None: + import fairspec_terminal.commands.dataset.copy # noqa: F401 + import fairspec_terminal.commands.dataset.infer # noqa: F401 + import fairspec_terminal.commands.dataset.list_ # noqa: F401 + import fairspec_terminal.commands.dataset.script # noqa: F401 + import fairspec_terminal.commands.dataset.validate # noqa: F401 + import fairspec_terminal.commands.table.describe # noqa: F401 + import fairspec_terminal.commands.table.preview # noqa: F401 + import fairspec_terminal.commands.table.query # noqa: F401 + import fairspec_terminal.commands.table.script # noqa: F401 + import fairspec_terminal.commands.table.validate # noqa: F401 + import fairspec_terminal.commands.table.infer_schema # noqa: F401 + import fairspec_terminal.commands.table.render_schema # noqa: F401 + import fairspec_terminal.commands.table.validate_schema # noqa: F401 + import fairspec_terminal.commands.data.validate # noqa: F401 + import fairspec_terminal.commands.data.infer_schema # noqa: F401 + import fairspec_terminal.commands.data.validate_schema # noqa: F401 + import fairspec_terminal.commands.file.describe # noqa: F401 + import fairspec_terminal.commands.file.copy # noqa: F401 + import fairspec_terminal.commands.file.validate # noqa: F401 + from fairspec_terminal.commands.file_dialect.infer import create_infer_dialect_command + + create_infer_dialect_command(table_program) + create_infer_dialect_command(data_program) + create_infer_dialect_command(file_program) diff --git a/terminal/fairspec_terminal/session.py b/terminal/fairspec_terminal/session.py new file mode 100644 index 0000000..4cfe100 --- /dev/null +++ b/terminal/fairspec_terminal/session.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +import atexit +import json +import sys +from typing import TYPE_CHECKING, Callable, TypeVar + +import typer +from rich.syntax import Syntax +from rich.console import Console +from rich.status import Status + +from fairspec_metadata import FairspecException + +from .actions.error.render import render_error + +if TYPE_CHECKING: + from fairspec_metadata import Report + +T = TypeVar("T") + + +class Session: + silent: bool + debug: bool + json: bool + + def __init__(self, *, silent: bool = False, debug: bool = False, json: bool = False) -> None: + self.silent = silent + self.debug = debug + self.json = json + self._console = Console() + + if not self.silent and not self.json: + sys.stdout.write("\n") + atexit.register(lambda: sys.stdout.write("\n")) + + def render_text(self, text: str, *, status: str | None = None) -> None: + if self.silent or self.json: + return + + if not status: + self._console.print(text) + return + + self._console.print(f"{_render_status(status)} {text}") + + def render_text_result(self, text: str, *, status: str | None = None) -> None: + if self.silent: + return + + if self.json: + sys.stdout.write(json.dumps({"result": text}, indent=2)) + sys.stdout.write("\n") + return + + if not status: + self._console.print(text) + return + + self._console.print(f"{_render_status(status)} {text}") + + def render_data_result(self, data: object) -> None: + if self.silent: + return + + if self.json: + sys.stdout.write(json.dumps(data, indent=2, default=_json_default)) + sys.stdout.write("\n") + return + + text = json.dumps(data, indent=2, default=_json_default) + self._console.print(Syntax(text, "json", theme="monokai")) + + def render_frame_result(self, frame: object) -> None: + if self.silent: + return + + if self.json: + import polars as pl + + assert isinstance(frame, pl.DataFrame) + sys.stdout.write(json.dumps(frame.to_dicts(), indent=2, default=str)) + sys.stdout.write("\n") + return + + sys.stdout.write(str(frame)) + sys.stdout.write("\n") + + def render_report_result(self, report: Report) -> None: + if self.silent: + return + + if self.json: + sys.stdout.write(json.dumps(report.model_dump(exclude_none=True), indent=2, default=str)) + sys.stdout.write("\n") + return + + if report.valid: + self.render_text("Validation passed", status="success") + return + + for error in report.errors: + self.render_text(render_error(error), status="error") + + def task(self, title: str, func: Callable[[], T]) -> T: + if self.json or self.silent: + try: + return func() + except FairspecException as exception: + if self.debug: + raise + if self.json: + sys.stdout.write(json.dumps({"error": str(exception)}, indent=2)) + sys.stdout.write("\n") + if exception.report: + self.render_report_result(exception.report) + raise typer.Exit(1) from exception + except Exception as exception: + if self.debug: + raise + if self.json: + sys.stdout.write(json.dumps({"error": str(exception)}, indent=2)) + sys.stdout.write("\n") + raise typer.Exit(1) from exception + + with Status(title, console=self._console): + try: + return func() + except FairspecException as exception: + if self.debug: + raise + if exception.report: + sys.stdout.write("\n") + self.render_report_result(exception.report) + raise typer.Exit(1) from exception + except Exception as exception: + if self.debug: + raise + self._console.print(f"[red]\u2716[/red] {title}: {exception}") + raise typer.Exit(1) from exception + + +def _render_status(status: str) -> str: + if status == "success": + return "[green]\u2714[/green]" + if status == "warning": + return "[yellow]\u26A0[/yellow]" + if status == "error": + return "[red]\u2716[/red]" + return "" + + +def _json_default(obj: object) -> object: + if hasattr(obj, "model_dump"): + return obj.model_dump(exclude_none=True) # type: ignore[union-attr] + return str(obj) diff --git a/terminal/pyproject.toml b/terminal/pyproject.toml new file mode 100644 index 0000000..46e380f --- /dev/null +++ b/terminal/pyproject.toml @@ -0,0 +1,50 @@ +[project] +name = "fairspec-terminal" +version = "0.0.0-dev" +description = "Fairspec Python is a fast data management framework built on top of the Fairspec standard and Polars DataFrames" +readme = "README.md" +requires-python = ">=3.12" +license = "MIT" +keywords = [ + "arrow", + "csv", + "data", + "dataframe", + "fair", + "fairspec", + "inline", + "json", + "jsonschema", + "ods", + "parquet", + "polars", + "quality", + "table", + "tableschema", + "typescript", + "validation", + "xlsx" +] +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.12", + "Topic :: Software Development :: Libraries :: Python Modules", +] +dependencies = [ + "fairspec-library", + "typer>=0.15", +] + +[project.scripts] +fairspec = "fairspec_terminal.main:main" + +[[project.authors]] +name = "Evgeny Karev" + +[project.urls] +homepage = "https://github.com/fairspec/fairspec-python" +repository = "https://github.com/fairspec/fairspec-python" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" diff --git a/uv.lock b/uv.lock index ba574a9..3a707a2 100644 --- a/uv.lock +++ b/uv.lock @@ -4,8 +4,22 @@ requires-python = ">=3.12" [manifest] members = [ + "fairspec", + "fairspec-dataset", + "fairspec-library", "fairspec-metadata", "fairspec-python", + "fairspec-table", + "fairspec-terminal", +] + +[[package]] +name = "annotated-doc" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, ] [[package]] @@ -17,6 +31,114 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, ] +[[package]] +name = "attrs" +version = "25.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6b/5c/685e6633917e101e5dcb62b9dd76946cbb57c26e133bae9e0cd36033c0a9/attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11", size = 934251, upload-time = "2025-10-06T13:54:44.725Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" }, +] + +[[package]] +name = "certifi" +version = "2026.1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" }, +] + +[[package]] +name = "cfgv" +version = "3.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4e/b5/721b8799b04bf9afe054a3899c6cf4e880fcf8563cc71c15610242490a0c/cfgv-3.5.0.tar.gz", hash = "sha256:d5b1034354820651caa73ede66a6294d6e95c1b00acc5e9b098e917404669132", size = 7334, upload-time = "2025-11-19T20:55:51.612Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/3c/33bac158f8ab7f89b2e59426d5fe2e4f63f7ed25df84c036890172b412b5/cfgv-3.5.0-py2.py3-none-any.whl", hash = "sha256:a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0", size = 7445, upload-time = "2025-11-19T20:55:50.744Z" }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" }, + { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" }, + { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" }, + { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload-time = "2025-10-14T04:40:57.217Z" }, + { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload-time = "2025-10-14T04:40:58.358Z" }, + { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload-time = "2025-10-14T04:40:59.468Z" }, + { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload-time = "2025-10-14T04:41:00.623Z" }, + { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload-time = "2025-10-14T04:41:01.754Z" }, + { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload-time = "2025-10-14T04:41:03.231Z" }, + { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload-time = "2025-10-14T04:41:04.715Z" }, + { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload-time = "2025-10-14T04:41:05.827Z" }, + { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload-time = "2025-10-14T04:41:06.938Z" }, + { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload-time = "2025-10-14T04:41:08.101Z" }, + { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload-time = "2025-10-14T04:41:09.23Z" }, + { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload-time = "2025-10-14T04:41:10.467Z" }, + { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload-time = "2025-10-14T04:41:11.915Z" }, + { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091, upload-time = "2025-10-14T04:41:13.346Z" }, + { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936, upload-time = "2025-10-14T04:41:14.461Z" }, + { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180, upload-time = "2025-10-14T04:41:15.588Z" }, + { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346, upload-time = "2025-10-14T04:41:16.738Z" }, + { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874, upload-time = "2025-10-14T04:41:17.923Z" }, + { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076, upload-time = "2025-10-14T04:41:19.106Z" }, + { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601, upload-time = "2025-10-14T04:41:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376, upload-time = "2025-10-14T04:41:21.398Z" }, + { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825, upload-time = "2025-10-14T04:41:22.583Z" }, + { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583, upload-time = "2025-10-14T04:41:23.754Z" }, + { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366, upload-time = "2025-10-14T04:41:25.27Z" }, + { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300, upload-time = "2025-10-14T04:41:26.725Z" }, + { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465, upload-time = "2025-10-14T04:41:28.322Z" }, + { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404, upload-time = "2025-10-14T04:41:29.95Z" }, + { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092, upload-time = "2025-10-14T04:41:31.188Z" }, + { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408, upload-time = "2025-10-14T04:41:32.624Z" }, + { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746, upload-time = "2025-10-14T04:41:33.773Z" }, + { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889, upload-time = "2025-10-14T04:41:34.897Z" }, + { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641, upload-time = "2025-10-14T04:41:36.116Z" }, + { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779, upload-time = "2025-10-14T04:41:37.229Z" }, + { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035, upload-time = "2025-10-14T04:41:38.368Z" }, + { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542, upload-time = "2025-10-14T04:41:39.862Z" }, + { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524, upload-time = "2025-10-14T04:41:41.319Z" }, + { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395, upload-time = "2025-10-14T04:41:42.539Z" }, + { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680, upload-time = "2025-10-14T04:41:43.661Z" }, + { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045, upload-time = "2025-10-14T04:41:44.821Z" }, + { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687, upload-time = "2025-10-14T04:41:46.442Z" }, + { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014, upload-time = "2025-10-14T04:41:47.631Z" }, + { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044, upload-time = "2025-10-14T04:41:48.81Z" }, + { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940, upload-time = "2025-10-14T04:41:49.946Z" }, + { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104, upload-time = "2025-10-14T04:41:51.051Z" }, + { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743, upload-time = "2025-10-14T04:41:52.122Z" }, + { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" }, +] + +[[package]] +name = "click" +version = "8.1.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593, upload-time = "2024-12-21T18:38:44.339Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188, upload-time = "2024-12-21T18:38:41.666Z" }, +] + +[[package]] +name = "click-option-group" +version = "0.5.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ef/ff/d291d66595b30b83d1cb9e314b2c9be7cfc7327d4a0d40a15da2416ea97b/click_option_group-0.5.9.tar.gz", hash = "sha256:f94ed2bc4cf69052e0f29592bd1e771a1789bd7bfc482dd0bc482134aff95823", size = 22222, upload-time = "2025-10-09T09:38:01.474Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/75/45/54bb2d8d4138964a94bef6e9afe48b0be4705ba66ac442ae7d8a8dc4ffef/click_option_group-0.5.9-py3-none-any.whl", hash = "sha256:ad2599248bd373e2e19bec5407967c3eec1d0d4fc4a5e77b08a0481e75991080", size = 11553, upload-time = "2025-10-09T09:38:00.066Z" }, +] + [[package]] name = "colorama" version = "0.4.6" @@ -26,17 +148,116 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, ] +[[package]] +name = "defusedxml" +version = "0.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" }, +] + +[[package]] +name = "deprecated" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/49/85/12f0a49a7c4ffb70572b6c2ef13c90c88fd190debda93b23f026b25f9634/deprecated-1.3.1.tar.gz", hash = "sha256:b1b50e0ff0c1fddaa5708a2c6b0a6588bb09b892825ab2b214ac9ea9d92a5223", size = 2932523, upload-time = "2025-10-30T08:19:02.757Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/84/d0/205d54408c08b13550c733c4b85429e7ead111c7f0014309637425520a9a/deprecated-1.3.1-py2.py3-none-any.whl", hash = "sha256:597bfef186b6f60181535a29fbe44865ce137a5079f295b479886c82729d5f3f", size = 11298, upload-time = "2025-10-30T08:19:00.758Z" }, +] + +[[package]] +name = "distlib" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/96/8e/709914eb2b5749865801041647dc7f4e6d00b549cfe88b65ca192995f07c/distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d", size = 614605, upload-time = "2025-07-17T16:52:00.465Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" }, +] + +[[package]] +name = "dotty-dict" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6a/ab/88d67f02024700b48cd8232579ad1316aa9df2272c63049c27cc094229d6/dotty_dict-1.3.1.tar.gz", hash = "sha256:4b016e03b8ae265539757a53eba24b9bfda506fb94fbce0bee843c6f05541a15", size = 7699, upload-time = "2022-07-09T18:50:57.727Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1a/91/e0d457ee03ec33d79ee2cd8d212debb1bc21dfb99728ae35efdb5832dc22/dotty_dict-1.3.1-py3-none-any.whl", hash = "sha256:5022d234d9922f13aa711b4950372a06a6d64cb6d6db9ba43d0ba133ebfce31f", size = 7014, upload-time = "2022-07-09T18:50:55.058Z" }, +] + +[[package]] +name = "et-xmlfile" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234, upload-time = "2024-10-25T17:25:40.039Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" }, +] + +[[package]] +name = "fairspec" +version = "0.0.0.dev0" +source = { editable = "fairspec" } +dependencies = [ + { name = "fairspec-library" }, + { name = "fairspec-terminal" }, +] + +[package.metadata] +requires-dist = [ + { name = "fairspec-library", editable = "library" }, + { name = "fairspec-terminal", editable = "terminal" }, +] + +[[package]] +name = "fairspec-dataset" +version = "0.0.0.dev0" +source = { editable = "dataset" } +dependencies = [ + { name = "charset-normalizer" }, + { name = "fairspec-metadata" }, +] + +[package.metadata] +requires-dist = [ + { name = "charset-normalizer", specifier = ">=3.0" }, + { name = "fairspec-metadata", editable = "metadata" }, +] + +[[package]] +name = "fairspec-library" +version = "0.0.0.dev0" +source = { editable = "library" } +dependencies = [ + { name = "fairspec-dataset" }, + { name = "fairspec-metadata" }, + { name = "fairspec-table" }, + { name = "genson" }, +] + +[package.metadata] +requires-dist = [ + { name = "fairspec-dataset", editable = "dataset" }, + { name = "fairspec-metadata", editable = "metadata" }, + { name = "fairspec-table", editable = "table" }, + { name = "genson", specifier = ">=1.3" }, +] + [[package]] name = "fairspec-metadata" version = "0.0.0.dev0" source = { editable = "metadata" } dependencies = [ + { name = "jsonschema" }, { name = "pydantic" }, { name = "typing-extensions" }, ] [package.metadata] requires-dist = [ + { name = "jsonschema", specifier = ">=4.23" }, { name = "pydantic", specifier = ">=2.12" }, { name = "typing-extensions", specifier = ">=4.15" }, ] @@ -48,7 +269,10 @@ source = { virtual = "." } [package.dev-dependencies] dev = [ + { name = "pre-commit" }, { name = "pytest" }, + { name = "pytest-recording" }, + { name = "python-semantic-release" }, { name = "ruff" }, { name = "taskipy" }, { name = "ty" }, @@ -58,10 +282,124 @@ dev = [ [package.metadata.requires-dev] dev = [ - { name = "pytest" }, - { name = "ruff" }, - { name = "taskipy" }, - { name = "ty" }, + { name = "pre-commit", specifier = "==4.2.0" }, + { name = "pytest", specifier = "==9.0.2" }, + { name = "pytest-recording", specifier = "==0.13.4" }, + { name = "python-semantic-release", specifier = "==10.5.3" }, + { name = "ruff", specifier = "==0.15.0" }, + { name = "taskipy", specifier = "==1.14.1" }, + { name = "ty", specifier = "==0.0.15" }, +] + +[[package]] +name = "fairspec-table" +version = "0.0.0.dev0" +source = { editable = "table" } +dependencies = [ + { name = "fairspec-dataset" }, + { name = "fairspec-metadata" }, + { name = "isodate" }, + { name = "odfpy" }, + { name = "openpyxl" }, + { name = "polars" }, + { name = "shapely" }, + { name = "tzdata" }, +] + +[package.metadata] +requires-dist = [ + { name = "fairspec-dataset", editable = "dataset" }, + { name = "fairspec-metadata", editable = "metadata" }, + { name = "isodate", specifier = ">=0.7" }, + { name = "odfpy", specifier = ">=1.4" }, + { name = "openpyxl", specifier = ">=3.1" }, + { name = "polars", specifier = ">=1.0" }, + { name = "shapely", specifier = ">=2.0" }, + { name = "tzdata", specifier = ">=2024.1" }, +] + +[[package]] +name = "fairspec-terminal" +version = "0.0.0.dev0" +source = { editable = "terminal" } +dependencies = [ + { name = "fairspec-library" }, + { name = "typer" }, +] + +[package.metadata] +requires-dist = [ + { name = "fairspec-library", editable = "library" }, + { name = "typer", specifier = ">=0.15" }, +] + +[[package]] +name = "filelock" +version = "3.20.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1d/65/ce7f1b70157833bf3cb851b556a37d4547ceafc158aa9b34b36782f23696/filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1", size = 19485, upload-time = "2026-01-09T17:55:05.421Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/36/7fb70f04bf00bc646cd5bb45aa9eddb15e19437a28b8fb2b4a5249fac770/filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1", size = 16701, upload-time = "2026-01-09T17:55:04.334Z" }, +] + +[[package]] +name = "genson" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c5/cf/2303c8ad276dcf5ee2ad6cf69c4338fd86ef0f471a5207b069adf7a393cf/genson-1.3.0.tar.gz", hash = "sha256:e02db9ac2e3fd29e65b5286f7135762e2cd8a986537c075b06fc5f1517308e37", size = 34919, upload-time = "2024-05-15T22:08:49.123Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f8/5c/e226de133afd8bb267ec27eead9ae3d784b95b39a287ed404caab39a5f50/genson-1.3.0-py3-none-any.whl", hash = "sha256:468feccd00274cc7e4c09e84b08704270ba8d95232aa280f65b986139cec67f7", size = 21470, upload-time = "2024-05-15T22:08:47.056Z" }, +] + +[[package]] +name = "gitdb" +version = "4.0.12" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "smmap" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/94/63b0fc47eb32792c7ba1fe1b694daec9a63620db1e313033d18140c2320a/gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571", size = 394684, upload-time = "2025-01-02T07:20:46.413Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf", size = 62794, upload-time = "2025-01-02T07:20:43.624Z" }, +] + +[[package]] +name = "gitpython" +version = "3.1.46" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "gitdb" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/b5/59d16470a1f0dfe8c793f9ef56fd3826093fc52b3bd96d6b9d6c26c7e27b/gitpython-3.1.46.tar.gz", hash = "sha256:400124c7d0ef4ea03f7310ac2fbf7151e09ff97f2a3288d64a440c584a29c37f", size = 215371, upload-time = "2026-01-01T15:37:32.073Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6a/09/e21df6aef1e1ffc0c816f0522ddc3f6dcded766c3261813131c78a704470/gitpython-3.1.46-py3-none-any.whl", hash = "sha256:79812ed143d9d25b6d176a10bb511de0f9c67b1fa641d82097b0ab90398a2058", size = 208620, upload-time = "2026-01-01T15:37:30.574Z" }, +] + +[[package]] +name = "identify" +version = "2.6.16" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5b/8d/e8b97e6bd3fb6fb271346f7981362f1e04d6a7463abd0de79e1fda17c067/identify-2.6.16.tar.gz", hash = "sha256:846857203b5511bbe94d5a352a48ef2359532bc8f6727b5544077a0dcfb24980", size = 99360, upload-time = "2026-01-12T18:58:58.201Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b8/58/40fbbcefeda82364720eba5cf2270f98496bdfa19ea75b4cccae79c698e6/identify-2.6.16-py2.py3-none-any.whl", hash = "sha256:391ee4d77741d994189522896270b787aed8670389bfd60f326d677d64a6dfb0", size = 99202, upload-time = "2026-01-12T18:58:56.627Z" }, +] + +[[package]] +name = "idna" +version = "3.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, +] + +[[package]] +name = "importlib-resources" +version = "6.5.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cf/8c/f834fbf984f691b4f7ff60f50b514cc3de5cc08abfc3295564dd89c5e2e7/importlib_resources-6.5.2.tar.gz", hash = "sha256:185f87adef5bcc288449d98fb4fba07cea78bc036455dd44c5fc4a2fe78fed2c", size = 44693, upload-time = "2025-01-03T18:51:56.698Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/ed/1f1afb2e9e7f38a545d628f864d562a5ae64fe6f7a10e28ffb9b185b4e89/importlib_resources-6.5.2-py3-none-any.whl", hash = "sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec", size = 37461, upload-time = "2025-01-03T18:51:54.306Z" }, ] [[package]] @@ -73,6 +411,138 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, ] +[[package]] +name = "isodate" +version = "0.7.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/4d/e940025e2ce31a8ce1202635910747e5a87cc3a6a6bb2d00973375014749/isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6", size = 29705, upload-time = "2024-10-08T23:04:11.5Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/15/aa/0aca39a37d3c7eb941ba736ede56d689e7be91cab5d9ca846bde3999eba6/isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15", size = 22320, upload-time = "2024-10-08T23:04:09.501Z" }, +] + +[[package]] +name = "jinja2" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, +] + +[[package]] +name = "jsonschema" +version = "4.26.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "jsonschema-specifications" }, + { name = "referencing" }, + { name = "rpds-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload-time = "2026-01-07T13:41:07.246Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" }, +] + +[[package]] +name = "jsonschema-specifications" +version = "2025.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "referencing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload-time = "2025-09-08T01:34:59.186Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, +] + +[[package]] +name = "markdown-it-py" +version = "4.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, +] + +[[package]] +name = "markupsafe" +version = "3.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615, upload-time = "2025-09-27T18:36:30.854Z" }, + { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020, upload-time = "2025-09-27T18:36:31.971Z" }, + { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332, upload-time = "2025-09-27T18:36:32.813Z" }, + { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947, upload-time = "2025-09-27T18:36:33.86Z" }, + { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962, upload-time = "2025-09-27T18:36:35.099Z" }, + { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760, upload-time = "2025-09-27T18:36:36.001Z" }, + { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529, upload-time = "2025-09-27T18:36:36.906Z" }, + { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015, upload-time = "2025-09-27T18:36:37.868Z" }, + { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540, upload-time = "2025-09-27T18:36:38.761Z" }, + { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105, upload-time = "2025-09-27T18:36:39.701Z" }, + { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906, upload-time = "2025-09-27T18:36:40.689Z" }, + { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622, upload-time = "2025-09-27T18:36:41.777Z" }, + { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029, upload-time = "2025-09-27T18:36:43.257Z" }, + { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374, upload-time = "2025-09-27T18:36:44.508Z" }, + { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980, upload-time = "2025-09-27T18:36:45.385Z" }, + { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990, upload-time = "2025-09-27T18:36:46.916Z" }, + { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784, upload-time = "2025-09-27T18:36:47.884Z" }, + { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588, upload-time = "2025-09-27T18:36:48.82Z" }, + { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041, upload-time = "2025-09-27T18:36:49.797Z" }, + { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543, upload-time = "2025-09-27T18:36:51.584Z" }, + { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113, upload-time = "2025-09-27T18:36:52.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911, upload-time = "2025-09-27T18:36:53.513Z" }, + { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658, upload-time = "2025-09-27T18:36:54.819Z" }, + { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066, upload-time = "2025-09-27T18:36:55.714Z" }, + { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639, upload-time = "2025-09-27T18:36:56.908Z" }, + { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569, upload-time = "2025-09-27T18:36:57.913Z" }, + { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284, upload-time = "2025-09-27T18:36:58.833Z" }, + { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801, upload-time = "2025-09-27T18:36:59.739Z" }, + { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769, upload-time = "2025-09-27T18:37:00.719Z" }, + { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642, upload-time = "2025-09-27T18:37:01.673Z" }, + { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612, upload-time = "2025-09-27T18:37:02.639Z" }, + { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200, upload-time = "2025-09-27T18:37:03.582Z" }, + { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973, upload-time = "2025-09-27T18:37:04.929Z" }, + { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload-time = "2025-09-27T18:37:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload-time = "2025-09-27T18:37:07.213Z" }, + { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload-time = "2025-09-27T18:37:09.572Z" }, + { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload-time = "2025-09-27T18:37:10.58Z" }, + { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload-time = "2025-09-27T18:37:11.547Z" }, + { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload-time = "2025-09-27T18:37:12.48Z" }, + { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload-time = "2025-09-27T18:37:13.485Z" }, + { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload-time = "2025-09-27T18:37:14.408Z" }, + { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747, upload-time = "2025-09-27T18:37:15.36Z" }, + { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341, upload-time = "2025-09-27T18:37:16.496Z" }, + { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073, upload-time = "2025-09-27T18:37:17.476Z" }, + { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload-time = "2025-09-27T18:37:18.453Z" }, + { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload-time = "2025-09-27T18:37:19.332Z" }, + { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload-time = "2025-09-27T18:37:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload-time = "2025-09-27T18:37:21.177Z" }, + { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload-time = "2025-09-27T18:37:22.167Z" }, + { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload-time = "2025-09-27T18:37:23.296Z" }, + { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload-time = "2025-09-27T18:37:24.237Z" }, + { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" }, + { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819, upload-time = "2025-09-27T18:37:26.285Z" }, + { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426, upload-time = "2025-09-27T18:37:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, +] + [[package]] name = "mslex" version = "1.3.0" @@ -82,6 +552,97 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/64/f2/66bd65ca0139675a0d7b18f0bada6e12b51a984e41a76dbe44761bf1b3ee/mslex-1.3.0-py3-none-any.whl", hash = "sha256:c7074b347201b3466fc077c5692fbce9b5f62a63a51f537a53fbbd02eff2eea4", size = 7820, upload-time = "2024-10-16T13:16:17.566Z" }, ] +[[package]] +name = "nodeenv" +version = "1.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/24/bf/d1bda4f6168e0b2e9e5958945e01910052158313224ada5ce1fb2e1113b8/nodeenv-1.10.0.tar.gz", hash = "sha256:996c191ad80897d076bdfba80a41994c2b47c68e224c542b48feba42ba00f8bb", size = 55611, upload-time = "2025-12-20T14:08:54.006Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827", size = 23438, upload-time = "2025-12-20T14:08:52.782Z" }, +] + +[[package]] +name = "numpy" +version = "2.4.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/fd/0005efbd0af48e55eb3c7208af93f2862d4b1a56cd78e84309a2d959208d/numpy-2.4.2.tar.gz", hash = "sha256:659a6107e31a83c4e33f763942275fd278b21d095094044eb35569e86a21ddae", size = 20723651, upload-time = "2026-01-31T23:13:10.135Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/6e/6f394c9c77668153e14d4da83bcc247beb5952f6ead7699a1a2992613bea/numpy-2.4.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:21982668592194c609de53ba4933a7471880ccbaadcc52352694a59ecc860b3a", size = 16667963, upload-time = "2026-01-31T23:10:52.147Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f8/55483431f2b2fd015ae6ed4fe62288823ce908437ed49db5a03d15151678/numpy-2.4.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40397bda92382fcec844066efb11f13e1c9a3e2a8e8f318fb72ed8b6db9f60f1", size = 14693571, upload-time = "2026-01-31T23:10:54.789Z" }, + { url = "https://files.pythonhosted.org/packages/2f/20/18026832b1845cdc82248208dd929ca14c9d8f2bac391f67440707fff27c/numpy-2.4.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:b3a24467af63c67829bfaa61eecf18d5432d4f11992688537be59ecd6ad32f5e", size = 5203469, upload-time = "2026-01-31T23:10:57.343Z" }, + { url = "https://files.pythonhosted.org/packages/7d/33/2eb97c8a77daaba34eaa3fa7241a14ac5f51c46a6bd5911361b644c4a1e2/numpy-2.4.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:805cc8de9fd6e7a22da5aed858e0ab16be5a4db6c873dde1d7451c541553aa27", size = 6550820, upload-time = "2026-01-31T23:10:59.429Z" }, + { url = "https://files.pythonhosted.org/packages/b1/91/b97fdfd12dc75b02c44e26c6638241cc004d4079a0321a69c62f51470c4c/numpy-2.4.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d82351358ffbcdcd7b686b90742a9b86632d6c1c051016484fa0b326a0a1548", size = 15663067, upload-time = "2026-01-31T23:11:01.291Z" }, + { url = "https://files.pythonhosted.org/packages/f5/c6/a18e59f3f0b8071cc85cbc8d80cd02d68aa9710170b2553a117203d46936/numpy-2.4.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e35d3e0144137d9fdae62912e869136164534d64a169f86438bc9561b6ad49f", size = 16619782, upload-time = "2026-01-31T23:11:03.669Z" }, + { url = "https://files.pythonhosted.org/packages/b7/83/9751502164601a79e18847309f5ceec0b1446d7b6aa12305759b72cf98b2/numpy-2.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:adb6ed2ad29b9e15321d167d152ee909ec73395901b70936f029c3bc6d7f4460", size = 17013128, upload-time = "2026-01-31T23:11:05.913Z" }, + { url = "https://files.pythonhosted.org/packages/61/c4/c4066322256ec740acc1c8923a10047818691d2f8aec254798f3dd90f5f2/numpy-2.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8906e71fd8afcb76580404e2a950caef2685df3d2a57fe82a86ac8d33cc007ba", size = 18345324, upload-time = "2026-01-31T23:11:08.248Z" }, + { url = "https://files.pythonhosted.org/packages/ab/af/6157aa6da728fa4525a755bfad486ae7e3f76d4c1864138003eb84328497/numpy-2.4.2-cp312-cp312-win32.whl", hash = "sha256:ec055f6dae239a6299cace477b479cca2fc125c5675482daf1dd886933a1076f", size = 5960282, upload-time = "2026-01-31T23:11:10.497Z" }, + { url = "https://files.pythonhosted.org/packages/92/0f/7ceaaeaacb40567071e94dbf2c9480c0ae453d5bb4f52bea3892c39dc83c/numpy-2.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:209fae046e62d0ce6435fcfe3b1a10537e858249b3d9b05829e2a05218296a85", size = 12314210, upload-time = "2026-01-31T23:11:12.176Z" }, + { url = "https://files.pythonhosted.org/packages/2f/a3/56c5c604fae6dd40fa2ed3040d005fca97e91bd320d232ac9931d77ba13c/numpy-2.4.2-cp312-cp312-win_arm64.whl", hash = "sha256:fbde1b0c6e81d56f5dccd95dd4a711d9b95df1ae4009a60887e56b27e8d903fa", size = 10220171, upload-time = "2026-01-31T23:11:14.684Z" }, + { url = "https://files.pythonhosted.org/packages/a1/22/815b9fe25d1d7ae7d492152adbc7226d3eff731dffc38fe970589fcaaa38/numpy-2.4.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:25f2059807faea4b077a2b6837391b5d830864b3543627f381821c646f31a63c", size = 16663696, upload-time = "2026-01-31T23:11:17.516Z" }, + { url = "https://files.pythonhosted.org/packages/09/f0/817d03a03f93ba9c6c8993de509277d84e69f9453601915e4a69554102a1/numpy-2.4.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bd3a7a9f5847d2fb8c2c6d1c862fa109c31a9abeca1a3c2bd5a64572955b2979", size = 14688322, upload-time = "2026-01-31T23:11:19.883Z" }, + { url = "https://files.pythonhosted.org/packages/da/b4/f805ab79293c728b9a99438775ce51885fd4f31b76178767cfc718701a39/numpy-2.4.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:8e4549f8a3c6d13d55041925e912bfd834285ef1dd64d6bc7d542583355e2e98", size = 5198157, upload-time = "2026-01-31T23:11:22.375Z" }, + { url = "https://files.pythonhosted.org/packages/74/09/826e4289844eccdcd64aac27d13b0fd3f32039915dd5b9ba01baae1f436c/numpy-2.4.2-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:aea4f66ff44dfddf8c2cffd66ba6538c5ec67d389285292fe428cb2c738c8aef", size = 6546330, upload-time = "2026-01-31T23:11:23.958Z" }, + { url = "https://files.pythonhosted.org/packages/19/fb/cbfdbfa3057a10aea5422c558ac57538e6acc87ec1669e666d32ac198da7/numpy-2.4.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3cd545784805de05aafe1dde61752ea49a359ccba9760c1e5d1c88a93bbf2b7", size = 15660968, upload-time = "2026-01-31T23:11:25.713Z" }, + { url = "https://files.pythonhosted.org/packages/04/dc/46066ce18d01645541f0186877377b9371b8fa8017fa8262002b4ef22612/numpy-2.4.2-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d0d9b7c93578baafcbc5f0b83eaf17b79d345c6f36917ba0c67f45226911d499", size = 16607311, upload-time = "2026-01-31T23:11:28.117Z" }, + { url = "https://files.pythonhosted.org/packages/14/d9/4b5adfc39a43fa6bf918c6d544bc60c05236cc2f6339847fc5b35e6cb5b0/numpy-2.4.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f74f0f7779cc7ae07d1810aab8ac6b1464c3eafb9e283a40da7309d5e6e48fbb", size = 17012850, upload-time = "2026-01-31T23:11:30.888Z" }, + { url = "https://files.pythonhosted.org/packages/b7/20/adb6e6adde6d0130046e6fdfb7675cc62bc2f6b7b02239a09eb58435753d/numpy-2.4.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c7ac672d699bf36275c035e16b65539931347d68b70667d28984c9fb34e07fa7", size = 18334210, upload-time = "2026-01-31T23:11:33.214Z" }, + { url = "https://files.pythonhosted.org/packages/78/0e/0a73b3dff26803a8c02baa76398015ea2a5434d9b8265a7898a6028c1591/numpy-2.4.2-cp313-cp313-win32.whl", hash = "sha256:8e9afaeb0beff068b4d9cd20d322ba0ee1cecfb0b08db145e4ab4dd44a6b5110", size = 5958199, upload-time = "2026-01-31T23:11:35.385Z" }, + { url = "https://files.pythonhosted.org/packages/43/bc/6352f343522fcb2c04dbaf94cb30cca6fd32c1a750c06ad6231b4293708c/numpy-2.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:7df2de1e4fba69a51c06c28f5a3de36731eb9639feb8e1cf7e4a7b0daf4cf622", size = 12310848, upload-time = "2026-01-31T23:11:38.001Z" }, + { url = "https://files.pythonhosted.org/packages/6e/8d/6da186483e308da5da1cc6918ce913dcfe14ffde98e710bfeff2a6158d4e/numpy-2.4.2-cp313-cp313-win_arm64.whl", hash = "sha256:0fece1d1f0a89c16b03442eae5c56dc0be0c7883b5d388e0c03f53019a4bfd71", size = 10221082, upload-time = "2026-01-31T23:11:40.392Z" }, + { url = "https://files.pythonhosted.org/packages/25/a1/9510aa43555b44781968935c7548a8926274f815de42ad3997e9e83680dd/numpy-2.4.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5633c0da313330fd20c484c78cdd3f9b175b55e1a766c4a174230c6b70ad8262", size = 14815866, upload-time = "2026-01-31T23:11:42.495Z" }, + { url = "https://files.pythonhosted.org/packages/36/30/6bbb5e76631a5ae46e7923dd16ca9d3f1c93cfa8d4ed79a129814a9d8db3/numpy-2.4.2-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d9f64d786b3b1dd742c946c42d15b07497ed14af1a1f3ce840cce27daa0ce913", size = 5325631, upload-time = "2026-01-31T23:11:44.7Z" }, + { url = "https://files.pythonhosted.org/packages/46/00/3a490938800c1923b567b3a15cd17896e68052e2145d8662aaf3e1ffc58f/numpy-2.4.2-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:b21041e8cb6a1eb5312dd1d2f80a94d91efffb7a06b70597d44f1bd2dfc315ab", size = 6646254, upload-time = "2026-01-31T23:11:46.341Z" }, + { url = "https://files.pythonhosted.org/packages/d3/e9/fac0890149898a9b609caa5af7455a948b544746e4b8fe7c212c8edd71f8/numpy-2.4.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:00ab83c56211a1d7c07c25e3217ea6695e50a3e2f255053686b081dc0b091a82", size = 15720138, upload-time = "2026-01-31T23:11:48.082Z" }, + { url = "https://files.pythonhosted.org/packages/ea/5c/08887c54e68e1e28df53709f1893ce92932cc6f01f7c3d4dc952f61ffd4e/numpy-2.4.2-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2fb882da679409066b4603579619341c6d6898fc83a8995199d5249f986e8e8f", size = 16655398, upload-time = "2026-01-31T23:11:50.293Z" }, + { url = "https://files.pythonhosted.org/packages/4d/89/253db0fa0e66e9129c745e4ef25631dc37d5f1314dad2b53e907b8538e6d/numpy-2.4.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:66cb9422236317f9d44b67b4d18f44efe6e9c7f8794ac0462978513359461554", size = 17079064, upload-time = "2026-01-31T23:11:52.927Z" }, + { url = "https://files.pythonhosted.org/packages/2a/d5/cbade46ce97c59c6c3da525e8d95b7abe8a42974a1dc5c1d489c10433e88/numpy-2.4.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0f01dcf33e73d80bd8dc0f20a71303abbafa26a19e23f6b68d1aa9990af90257", size = 18379680, upload-time = "2026-01-31T23:11:55.22Z" }, + { url = "https://files.pythonhosted.org/packages/40/62/48f99ae172a4b63d981babe683685030e8a3df4f246c893ea5c6ef99f018/numpy-2.4.2-cp313-cp313t-win32.whl", hash = "sha256:52b913ec40ff7ae845687b0b34d8d93b60cb66dcee06996dd5c99f2fc9328657", size = 6082433, upload-time = "2026-01-31T23:11:58.096Z" }, + { url = "https://files.pythonhosted.org/packages/07/38/e054a61cfe48ad9f1ed0d188e78b7e26859d0b60ef21cd9de4897cdb5326/numpy-2.4.2-cp313-cp313t-win_amd64.whl", hash = "sha256:5eea80d908b2c1f91486eb95b3fb6fab187e569ec9752ab7d9333d2e66bf2d6b", size = 12451181, upload-time = "2026-01-31T23:11:59.782Z" }, + { url = "https://files.pythonhosted.org/packages/6e/a4/a05c3a6418575e185dd84d0b9680b6bb2e2dc3e4202f036b7b4e22d6e9dc/numpy-2.4.2-cp313-cp313t-win_arm64.whl", hash = "sha256:fd49860271d52127d61197bb50b64f58454e9f578cb4b2c001a6de8b1f50b0b1", size = 10290756, upload-time = "2026-01-31T23:12:02.438Z" }, + { url = "https://files.pythonhosted.org/packages/18/88/b7df6050bf18fdcfb7046286c6535cabbdd2064a3440fca3f069d319c16e/numpy-2.4.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:444be170853f1f9d528428eceb55f12918e4fda5d8805480f36a002f1415e09b", size = 16663092, upload-time = "2026-01-31T23:12:04.521Z" }, + { url = "https://files.pythonhosted.org/packages/25/7a/1fee4329abc705a469a4afe6e69b1ef7e915117747886327104a8493a955/numpy-2.4.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d1240d50adff70c2a88217698ca844723068533f3f5c5fa6ee2e3220e3bdb000", size = 14698770, upload-time = "2026-01-31T23:12:06.96Z" }, + { url = "https://files.pythonhosted.org/packages/fb/0b/f9e49ba6c923678ad5bc38181c08ac5e53b7a5754dbca8e581aa1a56b1ff/numpy-2.4.2-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:7cdde6de52fb6664b00b056341265441192d1291c130e99183ec0d4b110ff8b1", size = 5208562, upload-time = "2026-01-31T23:12:09.632Z" }, + { url = "https://files.pythonhosted.org/packages/7d/12/d7de8f6f53f9bb76997e5e4c069eda2051e3fe134e9181671c4391677bb2/numpy-2.4.2-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:cda077c2e5b780200b6b3e09d0b42205a3d1c68f30c6dceb90401c13bff8fe74", size = 6543710, upload-time = "2026-01-31T23:12:11.969Z" }, + { url = "https://files.pythonhosted.org/packages/09/63/c66418c2e0268a31a4cf8a8b512685748200f8e8e8ec6c507ce14e773529/numpy-2.4.2-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d30291931c915b2ab5717c2974bb95ee891a1cf22ebc16a8006bd59cd210d40a", size = 15677205, upload-time = "2026-01-31T23:12:14.33Z" }, + { url = "https://files.pythonhosted.org/packages/5d/6c/7f237821c9642fb2a04d2f1e88b4295677144ca93285fd76eff3bcba858d/numpy-2.4.2-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bba37bc29d4d85761deed3954a1bc62be7cf462b9510b51d367b769a8c8df325", size = 16611738, upload-time = "2026-01-31T23:12:16.525Z" }, + { url = "https://files.pythonhosted.org/packages/c2/a7/39c4cdda9f019b609b5c473899d87abff092fc908cfe4d1ecb2fcff453b0/numpy-2.4.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b2f0073ed0868db1dcd86e052d37279eef185b9c8db5bf61f30f46adac63c909", size = 17028888, upload-time = "2026-01-31T23:12:19.306Z" }, + { url = "https://files.pythonhosted.org/packages/da/b3/e84bb64bdfea967cc10950d71090ec2d84b49bc691df0025dddb7c26e8e3/numpy-2.4.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7f54844851cdb630ceb623dcec4db3240d1ac13d4990532446761baede94996a", size = 18339556, upload-time = "2026-01-31T23:12:21.816Z" }, + { url = "https://files.pythonhosted.org/packages/88/f5/954a291bc1192a27081706862ac62bb5920fbecfbaa302f64682aa90beed/numpy-2.4.2-cp314-cp314-win32.whl", hash = "sha256:12e26134a0331d8dbd9351620f037ec470b7c75929cb8a1537f6bfe411152a1a", size = 6006899, upload-time = "2026-01-31T23:12:24.14Z" }, + { url = "https://files.pythonhosted.org/packages/05/cb/eff72a91b2efdd1bc98b3b8759f6a1654aa87612fc86e3d87d6fe4f948c4/numpy-2.4.2-cp314-cp314-win_amd64.whl", hash = "sha256:068cdb2d0d644cdb45670810894f6a0600797a69c05f1ac478e8d31670b8ee75", size = 12443072, upload-time = "2026-01-31T23:12:26.33Z" }, + { url = "https://files.pythonhosted.org/packages/37/75/62726948db36a56428fce4ba80a115716dc4fad6a3a4352487f8bb950966/numpy-2.4.2-cp314-cp314-win_arm64.whl", hash = "sha256:6ed0be1ee58eef41231a5c943d7d1375f093142702d5723ca2eb07db9b934b05", size = 10494886, upload-time = "2026-01-31T23:12:28.488Z" }, + { url = "https://files.pythonhosted.org/packages/36/2f/ee93744f1e0661dc267e4b21940870cabfae187c092e1433b77b09b50ac4/numpy-2.4.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:98f16a80e917003a12c0580f97b5f875853ebc33e2eaa4bccfc8201ac6869308", size = 14818567, upload-time = "2026-01-31T23:12:30.709Z" }, + { url = "https://files.pythonhosted.org/packages/a7/24/6535212add7d76ff938d8bdc654f53f88d35cddedf807a599e180dcb8e66/numpy-2.4.2-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:20abd069b9cda45874498b245c8015b18ace6de8546bf50dfa8cea1696ed06ef", size = 5328372, upload-time = "2026-01-31T23:12:32.962Z" }, + { url = "https://files.pythonhosted.org/packages/5e/9d/c48f0a035725f925634bf6b8994253b43f2047f6778a54147d7e213bc5a7/numpy-2.4.2-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:e98c97502435b53741540a5717a6749ac2ada901056c7db951d33e11c885cc7d", size = 6649306, upload-time = "2026-01-31T23:12:34.797Z" }, + { url = "https://files.pythonhosted.org/packages/81/05/7c73a9574cd4a53a25907bad38b59ac83919c0ddc8234ec157f344d57d9a/numpy-2.4.2-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:da6cad4e82cb893db4b69105c604d805e0c3ce11501a55b5e9f9083b47d2ffe8", size = 15722394, upload-time = "2026-01-31T23:12:36.565Z" }, + { url = "https://files.pythonhosted.org/packages/35/fa/4de10089f21fc7d18442c4a767ab156b25c2a6eaf187c0db6d9ecdaeb43f/numpy-2.4.2-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e4424677ce4b47fe73c8b5556d876571f7c6945d264201180db2dc34f676ab5", size = 16653343, upload-time = "2026-01-31T23:12:39.188Z" }, + { url = "https://files.pythonhosted.org/packages/b8/f9/d33e4ffc857f3763a57aa85650f2e82486832d7492280ac21ba9efda80da/numpy-2.4.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:2b8f157c8a6f20eb657e240f8985cc135598b2b46985c5bccbde7616dc9c6b1e", size = 17078045, upload-time = "2026-01-31T23:12:42.041Z" }, + { url = "https://files.pythonhosted.org/packages/c8/b8/54bdb43b6225badbea6389fa038c4ef868c44f5890f95dd530a218706da3/numpy-2.4.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5daf6f3914a733336dab21a05cdec343144600e964d2fcdabaac0c0269874b2a", size = 18380024, upload-time = "2026-01-31T23:12:44.331Z" }, + { url = "https://files.pythonhosted.org/packages/a5/55/6e1a61ded7af8df04016d81b5b02daa59f2ea9252ee0397cb9f631efe9e5/numpy-2.4.2-cp314-cp314t-win32.whl", hash = "sha256:8c50dd1fc8826f5b26a5ee4d77ca55d88a895f4e4819c7ecc2a9f5905047a443", size = 6153937, upload-time = "2026-01-31T23:12:47.229Z" }, + { url = "https://files.pythonhosted.org/packages/45/aa/fa6118d1ed6d776b0983f3ceac9b1a5558e80df9365b1c3aa6d42bf9eee4/numpy-2.4.2-cp314-cp314t-win_amd64.whl", hash = "sha256:fcf92bee92742edd401ba41135185866f7026c502617f422eb432cfeca4fe236", size = 12631844, upload-time = "2026-01-31T23:12:48.997Z" }, + { url = "https://files.pythonhosted.org/packages/32/0a/2ec5deea6dcd158f254a7b372fb09cfba5719419c8d66343bab35237b3fb/numpy-2.4.2-cp314-cp314t-win_arm64.whl", hash = "sha256:1f92f53998a17265194018d1cc321b2e96e900ca52d54c7c77837b71b9465181", size = 10565379, upload-time = "2026-01-31T23:12:51.345Z" }, +] + +[[package]] +name = "odfpy" +version = "1.4.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "defusedxml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/97/73/8ade73f6749177003f7ce3304f524774adda96e6aaab30ea79fd8fda7934/odfpy-1.4.1.tar.gz", hash = "sha256:db766a6e59c5103212f3cc92ec8dd50a0f3a02790233ed0b52148b70d3c438ec", size = 717045, upload-time = "2020-01-18T16:55:48.852Z" } + +[[package]] +name = "openpyxl" +version = "3.1.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "et-xmlfile" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464, upload-time = "2024-06-28T14:03:44.161Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910, upload-time = "2024-06-28T14:03:41.161Z" }, +] + [[package]] name = "packaging" version = "26.0" @@ -91,6 +652,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" }, ] +[[package]] +name = "platformdirs" +version = "4.5.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cf/86/0248f086a84f01b37aaec0fa567b397df1a119f73c16f6c7a9aac73ea309/platformdirs-4.5.1.tar.gz", hash = "sha256:61d5cdcc6065745cdd94f0f878977f8de9437be93de97c1c12f853c9c0cdcbda", size = 21715, upload-time = "2025-12-05T13:52:58.638Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/28/3bfe2fa5a7b9c46fe7e13c97bda14c895fb10fa2ebf1d0abb90e0cea7ee1/platformdirs-4.5.1-py3-none-any.whl", hash = "sha256:d03afa3963c806a9bed9d5125c8f4cb2fdaf74a55ab60e5d59b3fde758104d31", size = 18731, upload-time = "2025-12-05T13:52:56.823Z" }, +] + [[package]] name = "pluggy" version = "1.6.0" @@ -100,6 +670,50 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] +[[package]] +name = "polars" +version = "1.38.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "polars-runtime-32" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c6/5e/208a24471a433bcd0e9a6889ac49025fd4daad2815c8220c5bd2576e5f1b/polars-1.38.1.tar.gz", hash = "sha256:803a2be5344ef880ad625addfb8f641995cfd777413b08a10de0897345778239", size = 717667, upload-time = "2026-02-06T18:13:23.013Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/49/737c1a6273c585719858261753da0b688454d1b634438ccba8a9c4eb5aab/polars-1.38.1-py3-none-any.whl", hash = "sha256:a29479c48fed4984d88b656486d221f638cba45d3e961631a50ee5fdde38cb2c", size = 810368, upload-time = "2026-02-06T18:11:55.819Z" }, +] + +[[package]] +name = "polars-runtime-32" +version = "1.38.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/07/4b/04d6b3fb7cf336fbe12fbc4b43f36d1783e11bb0f2b1e3980ec44878df06/polars_runtime_32-1.38.1.tar.gz", hash = "sha256:04f20ed1f5c58771f34296a27029dc755a9e4b1390caeaef8f317e06fdfce2ec", size = 2812631, upload-time = "2026-02-06T18:13:25.206Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ae/a2/a00defbddadd8cf1042f52380dcba6b6592b03bac8e3b34c436b62d12d3b/polars_runtime_32-1.38.1-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:18154e96044724a0ac38ce155cf63aa03c02dd70500efbbf1a61b08cadd269ef", size = 44108001, upload-time = "2026-02-06T18:11:58.127Z" }, + { url = "https://files.pythonhosted.org/packages/a7/fb/599ff3709e6a303024efd7edfd08cf8de55c6ac39527d8f41cbc4399385f/polars_runtime_32-1.38.1-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:c49acac34cc4049ed188f1eb67d6ff3971a39b4af7f7b734b367119970f313ac", size = 40230140, upload-time = "2026-02-06T18:12:01.181Z" }, + { url = "https://files.pythonhosted.org/packages/dc/8c/3ac18d6f89dc05fe2c7c0ee1dc5b81f77a5c85ad59898232c2500fe2ebbf/polars_runtime_32-1.38.1-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fef2ef2626a954e010e006cc8e4de467ecf32d08008f130cea1c78911f545323", size = 41994039, upload-time = "2026-02-06T18:12:04.332Z" }, + { url = "https://files.pythonhosted.org/packages/f2/5a/61d60ec5cc0ab37cbd5a699edb2f9af2875b7fdfdfb2a4608ca3cc5f0448/polars_runtime_32-1.38.1-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8a5f7a8125e2d50e2e060296551c929aec09be23a9edcb2b12ca923f555a5ba", size = 45755804, upload-time = "2026-02-06T18:12:07.846Z" }, + { url = "https://files.pythonhosted.org/packages/91/54/02cd4074c98c361ccd3fec3bcb0bd68dbc639c0550c42a4436b0ff0f3ccf/polars_runtime_32-1.38.1-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:10d19cd9863e129273b18b7fcaab625b5c8143c2d22b3e549067b78efa32e4fa", size = 42159605, upload-time = "2026-02-06T18:12:10.919Z" }, + { url = "https://files.pythonhosted.org/packages/8e/f3/b2a5e720cc56eaa38b4518e63aa577b4bbd60e8b05a00fe43ca051be5879/polars_runtime_32-1.38.1-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:61e8d73c614b46a00d2f853625a7569a2e4a0999333e876354ac81d1bf1bb5e2", size = 45336615, upload-time = "2026-02-06T18:12:14.074Z" }, + { url = "https://files.pythonhosted.org/packages/f1/8d/ee2e4b7de948090cfb3df37d401c521233daf97bfc54ddec5d61d1d31618/polars_runtime_32-1.38.1-cp310-abi3-win_amd64.whl", hash = "sha256:08c2b3b93509c1141ac97891294ff5c5b0c548a373f583eaaea873a4bf506437", size = 45680732, upload-time = "2026-02-06T18:12:19.097Z" }, + { url = "https://files.pythonhosted.org/packages/bf/18/72c216f4ab0c82b907009668f79183ae029116ff0dd245d56ef58aac48e7/polars_runtime_32-1.38.1-cp310-abi3-win_arm64.whl", hash = "sha256:6d07d0cc832bfe4fb54b6e04218c2c27afcfa6b9498f9f6bbf262a00d58cc7c4", size = 41639413, upload-time = "2026-02-06T18:12:22.044Z" }, +] + +[[package]] +name = "pre-commit" +version = "4.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cfgv" }, + { name = "identify" }, + { name = "nodeenv" }, + { name = "pyyaml" }, + { name = "virtualenv" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/08/39/679ca9b26c7bb2999ff122d50faa301e49af82ca9c066ec061cfbc0c6784/pre_commit-4.2.0.tar.gz", hash = "sha256:601283b9757afd87d40c4c4a9b2b5de9637a8ea02eaff7adc2d0fb4e04841146", size = 193424, upload-time = "2025-03-18T21:35:20.987Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/74/a88bf1b1efeae488a0c0b7bdf71429c313722d1fc0f377537fbe554e6180/pre_commit-4.2.0-py2.py3-none-any.whl", hash = "sha256:a009ca7205f1eb497d10b845e52c838a98b6cdd2102a6c8e4540e94ee75c58bd", size = 220707, upload-time = "2025-03-18T21:35:19.343Z" }, +] + [[package]] name = "psutil" version = "6.1.1" @@ -226,6 +840,237 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, ] +[[package]] +name = "pytest-recording" +version = "0.13.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, + { name = "vcrpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/32/9c/f4027c5f1693847b06d11caf4b4f6bb09f22c1581ada4663877ec166b8c6/pytest_recording-0.13.4.tar.gz", hash = "sha256:568d64b2a85992eec4ae0a419c855d5fd96782c5fb016784d86f18053792768c", size = 26576, upload-time = "2025-05-08T10:41:11.231Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/42/c2/ce34735972cc42d912173e79f200fe66530225190c06655c5632a9d88f1e/pytest_recording-0.13.4-py3-none-any.whl", hash = "sha256:ad49a434b51b1c4f78e85b1e6b74fdcc2a0a581ca16e52c798c6ace971f7f439", size = 13723, upload-time = "2025-05-08T10:41:09.684Z" }, +] + +[[package]] +name = "python-gitlab" +version = "6.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, + { name = "requests-toolbelt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9a/bd/b30f1d3b303cb5d3c72e2d57a847d699e8573cbdfd67ece5f1795e49da1c/python_gitlab-6.5.0.tar.gz", hash = "sha256:97553652d94b02de343e9ca92782239aa2b5f6594c5482331a9490d9d5e8737d", size = 400591, upload-time = "2025-10-17T21:40:02.89Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/34/bd/b0d440685fbcafee462bed793a74aea88541887c4c30556a55ac64914b8d/python_gitlab-6.5.0-py3-none-any.whl", hash = "sha256:494e1e8e5edd15286eaf7c286f3a06652688f1ee20a49e2a0218ddc5cc475e32", size = 144419, upload-time = "2025-10-17T21:40:01.233Z" }, +] + +[[package]] +name = "python-semantic-release" +version = "10.5.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "click-option-group" }, + { name = "deprecated" }, + { name = "dotty-dict" }, + { name = "gitpython" }, + { name = "importlib-resources" }, + { name = "jinja2" }, + { name = "pydantic" }, + { name = "python-gitlab" }, + { name = "requests" }, + { name = "rich" }, + { name = "shellingham" }, + { name = "tomlkit" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/3a/7332b822825ed0e902c6e950e0d1e90e8f666fd12eb27855d1c8b6677eff/python_semantic_release-10.5.3.tar.gz", hash = "sha256:de4da78635fa666e5774caaca2be32063cae72431eb75e2ac23b9f2dfd190785", size = 618034, upload-time = "2025-12-14T22:37:29.782Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/01/ada29a1215df601bded0a2efd3b6d53864a0a9e0a9ea52aeaebe14fd03fd/python_semantic_release-10.5.3-py3-none-any.whl", hash = "sha256:1be0e07c36fa1f1ec9da4f438c1f6bbd7bc10eb0d6ac0089b0643103708c2823", size = 152716, upload-time = "2025-12-14T22:37:28.089Z" }, +] + +[[package]] +name = "pyyaml" +version = "6.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" }, + { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" }, + { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" }, + { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" }, + { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" }, + { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" }, + { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" }, + { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" }, + { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" }, + { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" }, + { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" }, + { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" }, + { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" }, + { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" }, + { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" }, + { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" }, + { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" }, + { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" }, + { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" }, + { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" }, + { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" }, + { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" }, + { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" }, + { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" }, + { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" }, + { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" }, + { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" }, + { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" }, + { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" }, + { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" }, + { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" }, + { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" }, + { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" }, + { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" }, + { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, +] + +[[package]] +name = "referencing" +version = "0.37.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "rpds-py" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" }, +] + +[[package]] +name = "requests" +version = "2.32.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" }, +] + +[[package]] +name = "requests-toolbelt" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f3/61/d7545dafb7ac2230c70d38d31cbfe4cc64f7144dc41f6e4e4b78ecd9f5bb/requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6", size = 206888, upload-time = "2023-05-01T04:11:33.229Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481, upload-time = "2023-05-01T04:11:28.427Z" }, +] + +[[package]] +name = "rich" +version = "14.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/74/99/a4cab2acbb884f80e558b0771e97e21e939c5dfb460f488d19df485e8298/rich-14.3.2.tar.gz", hash = "sha256:e712f11c1a562a11843306f5ed999475f09ac31ffb64281f73ab29ffdda8b3b8", size = 230143, upload-time = "2026-02-01T16:20:47.908Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/45/615f5babd880b4bd7d405cc0dc348234c5ffb6ed1ea33e152ede08b2072d/rich-14.3.2-py3-none-any.whl", hash = "sha256:08e67c3e90884651da3239ea668222d19bea7b589149d8014a21c633420dbb69", size = 309963, upload-time = "2026-02-01T16:20:46.078Z" }, +] + +[[package]] +name = "rpds-py" +version = "0.30.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/20/af/3f2f423103f1113b36230496629986e0ef7e199d2aa8392452b484b38ced/rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84", size = 69469, upload-time = "2025-11-30T20:24:38.837Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/e7/98a2f4ac921d82f33e03f3835f5bf3a4a40aa1bfdc57975e74a97b2b4bdd/rpds_py-0.30.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a161f20d9a43006833cd7068375a94d035714d73a172b681d8881820600abfad", size = 375086, upload-time = "2025-11-30T20:22:17.93Z" }, + { url = "https://files.pythonhosted.org/packages/4d/a1/bca7fd3d452b272e13335db8d6b0b3ecde0f90ad6f16f3328c6fb150c889/rpds_py-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6abc8880d9d036ecaafe709079969f56e876fcf107f7a8e9920ba6d5a3878d05", size = 359053, upload-time = "2025-11-30T20:22:19.297Z" }, + { url = "https://files.pythonhosted.org/packages/65/1c/ae157e83a6357eceff62ba7e52113e3ec4834a84cfe07fa4b0757a7d105f/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca28829ae5f5d569bb62a79512c842a03a12576375d5ece7d2cadf8abe96ec28", size = 390763, upload-time = "2025-11-30T20:22:21.661Z" }, + { url = "https://files.pythonhosted.org/packages/d4/36/eb2eb8515e2ad24c0bd43c3ee9cd74c33f7ca6430755ccdb240fd3144c44/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1010ed9524c73b94d15919ca4d41d8780980e1765babf85f9a2f90d247153dd", size = 408951, upload-time = "2025-11-30T20:22:23.408Z" }, + { url = "https://files.pythonhosted.org/packages/d6/65/ad8dc1784a331fabbd740ef6f71ce2198c7ed0890dab595adb9ea2d775a1/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8d1736cfb49381ba528cd5baa46f82fdc65c06e843dab24dd70b63d09121b3f", size = 514622, upload-time = "2025-11-30T20:22:25.16Z" }, + { url = "https://files.pythonhosted.org/packages/63/8e/0cfa7ae158e15e143fe03993b5bcd743a59f541f5952e1546b1ac1b5fd45/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d948b135c4693daff7bc2dcfc4ec57237a29bd37e60c2fabf5aff2bbacf3e2f1", size = 414492, upload-time = "2025-11-30T20:22:26.505Z" }, + { url = "https://files.pythonhosted.org/packages/60/1b/6f8f29f3f995c7ffdde46a626ddccd7c63aefc0efae881dc13b6e5d5bb16/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47f236970bccb2233267d89173d3ad2703cd36a0e2a6e92d0560d333871a3d23", size = 394080, upload-time = "2025-11-30T20:22:27.934Z" }, + { url = "https://files.pythonhosted.org/packages/6d/d5/a266341051a7a3ca2f4b750a3aa4abc986378431fc2da508c5034d081b70/rpds_py-0.30.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:2e6ecb5a5bcacf59c3f912155044479af1d0b6681280048b338b28e364aca1f6", size = 408680, upload-time = "2025-11-30T20:22:29.341Z" }, + { url = "https://files.pythonhosted.org/packages/10/3b/71b725851df9ab7a7a4e33cf36d241933da66040d195a84781f49c50490c/rpds_py-0.30.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a8fa71a2e078c527c3e9dc9fc5a98c9db40bcc8a92b4e8858e36d329f8684b51", size = 423589, upload-time = "2025-11-30T20:22:31.469Z" }, + { url = "https://files.pythonhosted.org/packages/00/2b/e59e58c544dc9bd8bd8384ecdb8ea91f6727f0e37a7131baeff8d6f51661/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73c67f2db7bc334e518d097c6d1e6fed021bbc9b7d678d6cc433478365d1d5f5", size = 573289, upload-time = "2025-11-30T20:22:32.997Z" }, + { url = "https://files.pythonhosted.org/packages/da/3e/a18e6f5b460893172a7d6a680e86d3b6bc87a54c1f0b03446a3c8c7b588f/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5ba103fb455be00f3b1c2076c9d4264bfcb037c976167a6047ed82f23153f02e", size = 599737, upload-time = "2025-11-30T20:22:34.419Z" }, + { url = "https://files.pythonhosted.org/packages/5c/e2/714694e4b87b85a18e2c243614974413c60aa107fd815b8cbc42b873d1d7/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7cee9c752c0364588353e627da8a7e808a66873672bcb5f52890c33fd965b394", size = 563120, upload-time = "2025-11-30T20:22:35.903Z" }, + { url = "https://files.pythonhosted.org/packages/6f/ab/d5d5e3bcedb0a77f4f613706b750e50a5a3ba1c15ccd3665ecc636c968fd/rpds_py-0.30.0-cp312-cp312-win32.whl", hash = "sha256:1ab5b83dbcf55acc8b08fc62b796ef672c457b17dbd7820a11d6c52c06839bdf", size = 223782, upload-time = "2025-11-30T20:22:37.271Z" }, + { url = "https://files.pythonhosted.org/packages/39/3b/f786af9957306fdc38a74cef405b7b93180f481fb48453a114bb6465744a/rpds_py-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:a090322ca841abd453d43456ac34db46e8b05fd9b3b4ac0c78bcde8b089f959b", size = 240463, upload-time = "2025-11-30T20:22:39.021Z" }, + { url = "https://files.pythonhosted.org/packages/f3/d2/b91dc748126c1559042cfe41990deb92c4ee3e2b415f6b5234969ffaf0cc/rpds_py-0.30.0-cp312-cp312-win_arm64.whl", hash = "sha256:669b1805bd639dd2989b281be2cfd951c6121b65e729d9b843e9639ef1fd555e", size = 230868, upload-time = "2025-11-30T20:22:40.493Z" }, + { url = "https://files.pythonhosted.org/packages/ed/dc/d61221eb88ff410de3c49143407f6f3147acf2538c86f2ab7ce65ae7d5f9/rpds_py-0.30.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:f83424d738204d9770830d35290ff3273fbb02b41f919870479fab14b9d303b2", size = 374887, upload-time = "2025-11-30T20:22:41.812Z" }, + { url = "https://files.pythonhosted.org/packages/fd/32/55fb50ae104061dbc564ef15cc43c013dc4a9f4527a1f4d99baddf56fe5f/rpds_py-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e7536cd91353c5273434b4e003cbda89034d67e7710eab8761fd918ec6c69cf8", size = 358904, upload-time = "2025-11-30T20:22:43.479Z" }, + { url = "https://files.pythonhosted.org/packages/58/70/faed8186300e3b9bdd138d0273109784eea2396c68458ed580f885dfe7ad/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2771c6c15973347f50fece41fc447c054b7ac2ae0502388ce3b6738cd366e3d4", size = 389945, upload-time = "2025-11-30T20:22:44.819Z" }, + { url = "https://files.pythonhosted.org/packages/bd/a8/073cac3ed2c6387df38f71296d002ab43496a96b92c823e76f46b8af0543/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0a59119fc6e3f460315fe9d08149f8102aa322299deaa5cab5b40092345c2136", size = 407783, upload-time = "2025-11-30T20:22:46.103Z" }, + { url = "https://files.pythonhosted.org/packages/77/57/5999eb8c58671f1c11eba084115e77a8899d6e694d2a18f69f0ba471ec8b/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76fec018282b4ead0364022e3c54b60bf368b9d926877957a8624b58419169b7", size = 515021, upload-time = "2025-11-30T20:22:47.458Z" }, + { url = "https://files.pythonhosted.org/packages/e0/af/5ab4833eadc36c0a8ed2bc5c0de0493c04f6c06de223170bd0798ff98ced/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:692bef75a5525db97318e8cd061542b5a79812d711ea03dbc1f6f8dbb0c5f0d2", size = 414589, upload-time = "2025-11-30T20:22:48.872Z" }, + { url = "https://files.pythonhosted.org/packages/b7/de/f7192e12b21b9e9a68a6d0f249b4af3fdcdff8418be0767a627564afa1f1/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9027da1ce107104c50c81383cae773ef5c24d296dd11c99e2629dbd7967a20c6", size = 394025, upload-time = "2025-11-30T20:22:50.196Z" }, + { url = "https://files.pythonhosted.org/packages/91/c4/fc70cd0249496493500e7cc2de87504f5aa6509de1e88623431fec76d4b6/rpds_py-0.30.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:9cf69cdda1f5968a30a359aba2f7f9aa648a9ce4b580d6826437f2b291cfc86e", size = 408895, upload-time = "2025-11-30T20:22:51.87Z" }, + { url = "https://files.pythonhosted.org/packages/58/95/d9275b05ab96556fefff73a385813eb66032e4c99f411d0795372d9abcea/rpds_py-0.30.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a4796a717bf12b9da9d3ad002519a86063dcac8988b030e405704ef7d74d2d9d", size = 422799, upload-time = "2025-11-30T20:22:53.341Z" }, + { url = "https://files.pythonhosted.org/packages/06/c1/3088fc04b6624eb12a57eb814f0d4997a44b0d208d6cace713033ff1a6ba/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5d4c2aa7c50ad4728a094ebd5eb46c452e9cb7edbfdb18f9e1221f597a73e1e7", size = 572731, upload-time = "2025-11-30T20:22:54.778Z" }, + { url = "https://files.pythonhosted.org/packages/d8/42/c612a833183b39774e8ac8fecae81263a68b9583ee343db33ab571a7ce55/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba81a9203d07805435eb06f536d95a266c21e5b2dfbf6517748ca40c98d19e31", size = 599027, upload-time = "2025-11-30T20:22:56.212Z" }, + { url = "https://files.pythonhosted.org/packages/5f/60/525a50f45b01d70005403ae0e25f43c0384369ad24ffe46e8d9068b50086/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:945dccface01af02675628334f7cf49c2af4c1c904748efc5cf7bbdf0b579f95", size = 563020, upload-time = "2025-11-30T20:22:58.2Z" }, + { url = "https://files.pythonhosted.org/packages/0b/5d/47c4655e9bcd5ca907148535c10e7d489044243cc9941c16ed7cd53be91d/rpds_py-0.30.0-cp313-cp313-win32.whl", hash = "sha256:b40fb160a2db369a194cb27943582b38f79fc4887291417685f3ad693c5a1d5d", size = 223139, upload-time = "2025-11-30T20:23:00.209Z" }, + { url = "https://files.pythonhosted.org/packages/f2/e1/485132437d20aa4d3e1d8b3fb5a5e65aa8139f1e097080c2a8443201742c/rpds_py-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:806f36b1b605e2d6a72716f321f20036b9489d29c51c91f4dd29a3e3afb73b15", size = 240224, upload-time = "2025-11-30T20:23:02.008Z" }, + { url = "https://files.pythonhosted.org/packages/24/95/ffd128ed1146a153d928617b0ef673960130be0009c77d8fbf0abe306713/rpds_py-0.30.0-cp313-cp313-win_arm64.whl", hash = "sha256:d96c2086587c7c30d44f31f42eae4eac89b60dabbac18c7669be3700f13c3ce1", size = 230645, upload-time = "2025-11-30T20:23:03.43Z" }, + { url = "https://files.pythonhosted.org/packages/ff/1b/b10de890a0def2a319a2626334a7f0ae388215eb60914dbac8a3bae54435/rpds_py-0.30.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:eb0b93f2e5c2189ee831ee43f156ed34e2a89a78a66b98cadad955972548be5a", size = 364443, upload-time = "2025-11-30T20:23:04.878Z" }, + { url = "https://files.pythonhosted.org/packages/0d/bf/27e39f5971dc4f305a4fb9c672ca06f290f7c4e261c568f3dea16a410d47/rpds_py-0.30.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:922e10f31f303c7c920da8981051ff6d8c1a56207dbdf330d9047f6d30b70e5e", size = 353375, upload-time = "2025-11-30T20:23:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/40/58/442ada3bba6e8e6615fc00483135c14a7538d2ffac30e2d933ccf6852232/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdc62c8286ba9bf7f47befdcea13ea0e26bf294bda99758fd90535cbaf408000", size = 383850, upload-time = "2025-11-30T20:23:07.825Z" }, + { url = "https://files.pythonhosted.org/packages/14/14/f59b0127409a33c6ef6f5c1ebd5ad8e32d7861c9c7adfa9a624fc3889f6c/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47f9a91efc418b54fb8190a6b4aa7813a23fb79c51f4bb84e418f5476c38b8db", size = 392812, upload-time = "2025-11-30T20:23:09.228Z" }, + { url = "https://files.pythonhosted.org/packages/b3/66/e0be3e162ac299b3a22527e8913767d869e6cc75c46bd844aa43fb81ab62/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1f3587eb9b17f3789ad50824084fa6f81921bbf9a795826570bda82cb3ed91f2", size = 517841, upload-time = "2025-11-30T20:23:11.186Z" }, + { url = "https://files.pythonhosted.org/packages/3d/55/fa3b9cf31d0c963ecf1ba777f7cf4b2a2c976795ac430d24a1f43d25a6ba/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39c02563fc592411c2c61d26b6c5fe1e51eaa44a75aa2c8735ca88b0d9599daa", size = 408149, upload-time = "2025-11-30T20:23:12.864Z" }, + { url = "https://files.pythonhosted.org/packages/60/ca/780cf3b1a32b18c0f05c441958d3758f02544f1d613abf9488cd78876378/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51a1234d8febafdfd33a42d97da7a43f5dcb120c1060e352a3fbc0c6d36e2083", size = 383843, upload-time = "2025-11-30T20:23:14.638Z" }, + { url = "https://files.pythonhosted.org/packages/82/86/d5f2e04f2aa6247c613da0c1dd87fcd08fa17107e858193566048a1e2f0a/rpds_py-0.30.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:eb2c4071ab598733724c08221091e8d80e89064cd472819285a9ab0f24bcedb9", size = 396507, upload-time = "2025-11-30T20:23:16.105Z" }, + { url = "https://files.pythonhosted.org/packages/4b/9a/453255d2f769fe44e07ea9785c8347edaf867f7026872e76c1ad9f7bed92/rpds_py-0.30.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6bdfdb946967d816e6adf9a3d8201bfad269c67efe6cefd7093ef959683c8de0", size = 414949, upload-time = "2025-11-30T20:23:17.539Z" }, + { url = "https://files.pythonhosted.org/packages/a3/31/622a86cdc0c45d6df0e9ccb6becdba5074735e7033c20e401a6d9d0e2ca0/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c77afbd5f5250bf27bf516c7c4a016813eb2d3e116139aed0096940c5982da94", size = 565790, upload-time = "2025-11-30T20:23:19.029Z" }, + { url = "https://files.pythonhosted.org/packages/1c/5d/15bbf0fb4a3f58a3b1c67855ec1efcc4ceaef4e86644665fff03e1b66d8d/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:61046904275472a76c8c90c9ccee9013d70a6d0f73eecefd38c1ae7c39045a08", size = 590217, upload-time = "2025-11-30T20:23:20.885Z" }, + { url = "https://files.pythonhosted.org/packages/6d/61/21b8c41f68e60c8cc3b2e25644f0e3681926020f11d06ab0b78e3c6bbff1/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c5f36a861bc4b7da6516dbdf302c55313afa09b81931e8280361a4f6c9a2d27", size = 555806, upload-time = "2025-11-30T20:23:22.488Z" }, + { url = "https://files.pythonhosted.org/packages/f9/39/7e067bb06c31de48de3eb200f9fc7c58982a4d3db44b07e73963e10d3be9/rpds_py-0.30.0-cp313-cp313t-win32.whl", hash = "sha256:3d4a69de7a3e50ffc214ae16d79d8fbb0922972da0356dcf4d0fdca2878559c6", size = 211341, upload-time = "2025-11-30T20:23:24.449Z" }, + { url = "https://files.pythonhosted.org/packages/0a/4d/222ef0b46443cf4cf46764d9c630f3fe4abaa7245be9417e56e9f52b8f65/rpds_py-0.30.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f14fc5df50a716f7ece6a80b6c78bb35ea2ca47c499e422aa4463455dd96d56d", size = 225768, upload-time = "2025-11-30T20:23:25.908Z" }, + { url = "https://files.pythonhosted.org/packages/86/81/dad16382ebbd3d0e0328776d8fd7ca94220e4fa0798d1dc5e7da48cb3201/rpds_py-0.30.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:68f19c879420aa08f61203801423f6cd5ac5f0ac4ac82a2368a9fcd6a9a075e0", size = 362099, upload-time = "2025-11-30T20:23:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/2b/60/19f7884db5d5603edf3c6bce35408f45ad3e97e10007df0e17dd57af18f8/rpds_py-0.30.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ec7c4490c672c1a0389d319b3a9cfcd098dcdc4783991553c332a15acf7249be", size = 353192, upload-time = "2025-11-30T20:23:29.151Z" }, + { url = "https://files.pythonhosted.org/packages/bf/c4/76eb0e1e72d1a9c4703c69607cec123c29028bff28ce41588792417098ac/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f251c812357a3fed308d684a5079ddfb9d933860fc6de89f2b7ab00da481e65f", size = 384080, upload-time = "2025-11-30T20:23:30.785Z" }, + { url = "https://files.pythonhosted.org/packages/72/87/87ea665e92f3298d1b26d78814721dc39ed8d2c74b86e83348d6b48a6f31/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac98b175585ecf4c0348fd7b29c3864bda53b805c773cbf7bfdaffc8070c976f", size = 394841, upload-time = "2025-11-30T20:23:32.209Z" }, + { url = "https://files.pythonhosted.org/packages/77/ad/7783a89ca0587c15dcbf139b4a8364a872a25f861bdb88ed99f9b0dec985/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3e62880792319dbeb7eb866547f2e35973289e7d5696c6e295476448f5b63c87", size = 516670, upload-time = "2025-11-30T20:23:33.742Z" }, + { url = "https://files.pythonhosted.org/packages/5b/3c/2882bdac942bd2172f3da574eab16f309ae10a3925644e969536553cb4ee/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e7fc54e0900ab35d041b0601431b0a0eb495f0851a0639b6ef90f7741b39a18", size = 408005, upload-time = "2025-11-30T20:23:35.253Z" }, + { url = "https://files.pythonhosted.org/packages/ce/81/9a91c0111ce1758c92516a3e44776920b579d9a7c09b2b06b642d4de3f0f/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47e77dc9822d3ad616c3d5759ea5631a75e5809d5a28707744ef79d7a1bcfcad", size = 382112, upload-time = "2025-11-30T20:23:36.842Z" }, + { url = "https://files.pythonhosted.org/packages/cf/8e/1da49d4a107027e5fbc64daeab96a0706361a2918da10cb41769244b805d/rpds_py-0.30.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b4dc1a6ff022ff85ecafef7979a2c6eb423430e05f1165d6688234e62ba99a07", size = 399049, upload-time = "2025-11-30T20:23:38.343Z" }, + { url = "https://files.pythonhosted.org/packages/df/5a/7ee239b1aa48a127570ec03becbb29c9d5a9eb092febbd1699d567cae859/rpds_py-0.30.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4559c972db3a360808309e06a74628b95eaccbf961c335c8fe0d590cf587456f", size = 415661, upload-time = "2025-11-30T20:23:40.263Z" }, + { url = "https://files.pythonhosted.org/packages/70/ea/caa143cf6b772f823bc7929a45da1fa83569ee49b11d18d0ada7f5ee6fd6/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0ed177ed9bded28f8deb6ab40c183cd1192aa0de40c12f38be4d59cd33cb5c65", size = 565606, upload-time = "2025-11-30T20:23:42.186Z" }, + { url = "https://files.pythonhosted.org/packages/64/91/ac20ba2d69303f961ad8cf55bf7dbdb4763f627291ba3d0d7d67333cced9/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ad1fa8db769b76ea911cb4e10f049d80bf518c104f15b3edb2371cc65375c46f", size = 591126, upload-time = "2025-11-30T20:23:44.086Z" }, + { url = "https://files.pythonhosted.org/packages/21/20/7ff5f3c8b00c8a95f75985128c26ba44503fb35b8e0259d812766ea966c7/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:46e83c697b1f1c72b50e5ee5adb4353eef7406fb3f2043d64c33f20ad1c2fc53", size = 553371, upload-time = "2025-11-30T20:23:46.004Z" }, + { url = "https://files.pythonhosted.org/packages/72/c7/81dadd7b27c8ee391c132a6b192111ca58d866577ce2d9b0ca157552cce0/rpds_py-0.30.0-cp314-cp314-win32.whl", hash = "sha256:ee454b2a007d57363c2dfd5b6ca4a5d7e2c518938f8ed3b706e37e5d470801ed", size = 215298, upload-time = "2025-11-30T20:23:47.696Z" }, + { url = "https://files.pythonhosted.org/packages/3e/d2/1aaac33287e8cfb07aab2e6b8ac1deca62f6f65411344f1433c55e6f3eb8/rpds_py-0.30.0-cp314-cp314-win_amd64.whl", hash = "sha256:95f0802447ac2d10bcc69f6dc28fe95fdf17940367b21d34e34c737870758950", size = 228604, upload-time = "2025-11-30T20:23:49.501Z" }, + { url = "https://files.pythonhosted.org/packages/e8/95/ab005315818cc519ad074cb7784dae60d939163108bd2b394e60dc7b5461/rpds_py-0.30.0-cp314-cp314-win_arm64.whl", hash = "sha256:613aa4771c99f03346e54c3f038e4cc574ac09a3ddfb0e8878487335e96dead6", size = 222391, upload-time = "2025-11-30T20:23:50.96Z" }, + { url = "https://files.pythonhosted.org/packages/9e/68/154fe0194d83b973cdedcdcc88947a2752411165930182ae41d983dcefa6/rpds_py-0.30.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:7e6ecfcb62edfd632e56983964e6884851786443739dbfe3582947e87274f7cb", size = 364868, upload-time = "2025-11-30T20:23:52.494Z" }, + { url = "https://files.pythonhosted.org/packages/83/69/8bbc8b07ec854d92a8b75668c24d2abcb1719ebf890f5604c61c9369a16f/rpds_py-0.30.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a1d0bc22a7cdc173fedebb73ef81e07faef93692b8c1ad3733b67e31e1b6e1b8", size = 353747, upload-time = "2025-11-30T20:23:54.036Z" }, + { url = "https://files.pythonhosted.org/packages/ab/00/ba2e50183dbd9abcce9497fa5149c62b4ff3e22d338a30d690f9af970561/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d08f00679177226c4cb8c5265012eea897c8ca3b93f429e546600c971bcbae7", size = 383795, upload-time = "2025-11-30T20:23:55.556Z" }, + { url = "https://files.pythonhosted.org/packages/05/6f/86f0272b84926bcb0e4c972262f54223e8ecc556b3224d281e6598fc9268/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5965af57d5848192c13534f90f9dd16464f3c37aaf166cc1da1cae1fd5a34898", size = 393330, upload-time = "2025-11-30T20:23:57.033Z" }, + { url = "https://files.pythonhosted.org/packages/cb/e9/0e02bb2e6dc63d212641da45df2b0bf29699d01715913e0d0f017ee29438/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a4e86e34e9ab6b667c27f3211ca48f73dba7cd3d90f8d5b11be56e5dbc3fb4e", size = 518194, upload-time = "2025-11-30T20:23:58.637Z" }, + { url = "https://files.pythonhosted.org/packages/ee/ca/be7bca14cf21513bdf9c0606aba17d1f389ea2b6987035eb4f62bd923f25/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d3e6b26f2c785d65cc25ef1e5267ccbe1b069c5c21b8cc724efee290554419", size = 408340, upload-time = "2025-11-30T20:24:00.2Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c7/736e00ebf39ed81d75544c0da6ef7b0998f8201b369acf842f9a90dc8fce/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:626a7433c34566535b6e56a1b39a7b17ba961e97ce3b80ec62e6f1312c025551", size = 383765, upload-time = "2025-11-30T20:24:01.759Z" }, + { url = "https://files.pythonhosted.org/packages/4a/3f/da50dfde9956aaf365c4adc9533b100008ed31aea635f2b8d7b627e25b49/rpds_py-0.30.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:acd7eb3f4471577b9b5a41baf02a978e8bdeb08b4b355273994f8b87032000a8", size = 396834, upload-time = "2025-11-30T20:24:03.687Z" }, + { url = "https://files.pythonhosted.org/packages/4e/00/34bcc2565b6020eab2623349efbdec810676ad571995911f1abdae62a3a0/rpds_py-0.30.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fe5fa731a1fa8a0a56b0977413f8cacac1768dad38d16b3a296712709476fbd5", size = 415470, upload-time = "2025-11-30T20:24:05.232Z" }, + { url = "https://files.pythonhosted.org/packages/8c/28/882e72b5b3e6f718d5453bd4d0d9cf8df36fddeb4ddbbab17869d5868616/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:74a3243a411126362712ee1524dfc90c650a503502f135d54d1b352bd01f2404", size = 565630, upload-time = "2025-11-30T20:24:06.878Z" }, + { url = "https://files.pythonhosted.org/packages/3b/97/04a65539c17692de5b85c6e293520fd01317fd878ea1995f0367d4532fb1/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:3e8eeb0544f2eb0d2581774be4c3410356eba189529a6b3e36bbbf9696175856", size = 591148, upload-time = "2025-11-30T20:24:08.445Z" }, + { url = "https://files.pythonhosted.org/packages/85/70/92482ccffb96f5441aab93e26c4d66489eb599efdcf96fad90c14bbfb976/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:dbd936cde57abfee19ab3213cf9c26be06d60750e60a8e4dd85d1ab12c8b1f40", size = 556030, upload-time = "2025-11-30T20:24:10.956Z" }, + { url = "https://files.pythonhosted.org/packages/20/53/7c7e784abfa500a2b6b583b147ee4bb5a2b3747a9166bab52fec4b5b5e7d/rpds_py-0.30.0-cp314-cp314t-win32.whl", hash = "sha256:dc824125c72246d924f7f796b4f63c1e9dc810c7d9e2355864b3c3a73d59ade0", size = 211570, upload-time = "2025-11-30T20:24:12.735Z" }, + { url = "https://files.pythonhosted.org/packages/d0/02/fa464cdfbe6b26e0600b62c528b72d8608f5cc49f96b8d6e38c95d60c676/rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3", size = 226532, upload-time = "2025-11-30T20:24:14.634Z" }, +] + [[package]] name = "ruff" version = "0.15.0" @@ -251,6 +1096,75 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f6/b0/2d823f6e77ebe560f4e397d078487e8d52c1516b331e3521bc75db4272ca/ruff-0.15.0-py3-none-win_arm64.whl", hash = "sha256:c480d632cc0ca3f0727acac8b7d053542d9e114a462a145d0b00e7cd658c515a", size = 10865753, upload-time = "2026-02-03T17:53:03.014Z" }, ] +[[package]] +name = "shapely" +version = "2.1.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4d/bc/0989043118a27cccb4e906a46b7565ce36ca7b57f5a18b78f4f1b0f72d9d/shapely-2.1.2.tar.gz", hash = "sha256:2ed4ecb28320a433db18a5bf029986aa8afcfd740745e78847e330d5d94922a9", size = 315489, upload-time = "2025-09-24T13:51:41.432Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/24/c0/f3b6453cf2dfa99adc0ba6675f9aaff9e526d2224cbd7ff9c1a879238693/shapely-2.1.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fe2533caae6a91a543dec62e8360fe86ffcdc42a7c55f9dfd0128a977a896b94", size = 1833550, upload-time = "2025-09-24T13:50:30.019Z" }, + { url = "https://files.pythonhosted.org/packages/86/07/59dee0bc4b913b7ab59ab1086225baca5b8f19865e6101db9ebb7243e132/shapely-2.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ba4d1333cc0bc94381d6d4308d2e4e008e0bd128bdcff5573199742ee3634359", size = 1643556, upload-time = "2025-09-24T13:50:32.291Z" }, + { url = "https://files.pythonhosted.org/packages/26/29/a5397e75b435b9895cd53e165083faed5d12fd9626eadec15a83a2411f0f/shapely-2.1.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0bd308103340030feef6c111d3eb98d50dc13feea33affc8a6f9fa549e9458a3", size = 2988308, upload-time = "2025-09-24T13:50:33.862Z" }, + { url = "https://files.pythonhosted.org/packages/b9/37/e781683abac55dde9771e086b790e554811a71ed0b2b8a1e789b7430dd44/shapely-2.1.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1e7d4d7ad262a48bb44277ca12c7c78cb1b0f56b32c10734ec9a1d30c0b0c54b", size = 3099844, upload-time = "2025-09-24T13:50:35.459Z" }, + { url = "https://files.pythonhosted.org/packages/d8/f3/9876b64d4a5a321b9dc482c92bb6f061f2fa42131cba643c699f39317cb9/shapely-2.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e9eddfe513096a71896441a7c37db72da0687b34752c4e193577a145c71736fc", size = 3988842, upload-time = "2025-09-24T13:50:37.478Z" }, + { url = "https://files.pythonhosted.org/packages/d1/a0/704c7292f7014c7e74ec84eddb7b109e1fbae74a16deae9c1504b1d15565/shapely-2.1.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:980c777c612514c0cf99bc8a9de6d286f5e186dcaf9091252fcd444e5638193d", size = 4152714, upload-time = "2025-09-24T13:50:39.9Z" }, + { url = "https://files.pythonhosted.org/packages/53/46/319c9dc788884ad0785242543cdffac0e6530e4d0deb6c4862bc4143dcf3/shapely-2.1.2-cp312-cp312-win32.whl", hash = "sha256:9111274b88e4d7b54a95218e243282709b330ef52b7b86bc6aaf4f805306f454", size = 1542745, upload-time = "2025-09-24T13:50:41.414Z" }, + { url = "https://files.pythonhosted.org/packages/ec/bf/cb6c1c505cb31e818e900b9312d514f381fbfa5c4363edfce0fcc4f8c1a4/shapely-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:743044b4cfb34f9a67205cee9279feaf60ba7d02e69febc2afc609047cb49179", size = 1722861, upload-time = "2025-09-24T13:50:43.35Z" }, + { url = "https://files.pythonhosted.org/packages/c3/90/98ef257c23c46425dc4d1d31005ad7c8d649fe423a38b917db02c30f1f5a/shapely-2.1.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b510dda1a3672d6879beb319bc7c5fd302c6c354584690973c838f46ec3e0fa8", size = 1832644, upload-time = "2025-09-24T13:50:44.886Z" }, + { url = "https://files.pythonhosted.org/packages/6d/ab/0bee5a830d209adcd3a01f2d4b70e587cdd9fd7380d5198c064091005af8/shapely-2.1.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8cff473e81017594d20ec55d86b54bc635544897e13a7cfc12e36909c5309a2a", size = 1642887, upload-time = "2025-09-24T13:50:46.735Z" }, + { url = "https://files.pythonhosted.org/packages/2d/5e/7d7f54ba960c13302584c73704d8c4d15404a51024631adb60b126a4ae88/shapely-2.1.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fe7b77dc63d707c09726b7908f575fc04ff1d1ad0f3fb92aec212396bc6cfe5e", size = 2970931, upload-time = "2025-09-24T13:50:48.374Z" }, + { url = "https://files.pythonhosted.org/packages/f2/a2/83fc37e2a58090e3d2ff79175a95493c664bcd0b653dd75cb9134645a4e5/shapely-2.1.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7ed1a5bbfb386ee8332713bf7508bc24e32d24b74fc9a7b9f8529a55db9f4ee6", size = 3082855, upload-time = "2025-09-24T13:50:50.037Z" }, + { url = "https://files.pythonhosted.org/packages/44/2b/578faf235a5b09f16b5f02833c53822294d7f21b242f8e2d0cf03fb64321/shapely-2.1.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a84e0582858d841d54355246ddfcbd1fce3179f185da7470f41ce39d001ee1af", size = 3979960, upload-time = "2025-09-24T13:50:51.74Z" }, + { url = "https://files.pythonhosted.org/packages/4d/04/167f096386120f692cc4ca02f75a17b961858997a95e67a3cb6a7bbd6b53/shapely-2.1.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dc3487447a43d42adcdf52d7ac73804f2312cbfa5d433a7d2c506dcab0033dfd", size = 4142851, upload-time = "2025-09-24T13:50:53.49Z" }, + { url = "https://files.pythonhosted.org/packages/48/74/fb402c5a6235d1c65a97348b48cdedb75fb19eca2b1d66d04969fc1c6091/shapely-2.1.2-cp313-cp313-win32.whl", hash = "sha256:9c3a3c648aedc9f99c09263b39f2d8252f199cb3ac154fadc173283d7d111350", size = 1541890, upload-time = "2025-09-24T13:50:55.337Z" }, + { url = "https://files.pythonhosted.org/packages/41/47/3647fe7ad990af60ad98b889657a976042c9988c2807cf322a9d6685f462/shapely-2.1.2-cp313-cp313-win_amd64.whl", hash = "sha256:ca2591bff6645c216695bdf1614fca9c82ea1144d4a7591a466fef64f28f0715", size = 1722151, upload-time = "2025-09-24T13:50:57.153Z" }, + { url = "https://files.pythonhosted.org/packages/3c/49/63953754faa51ffe7d8189bfbe9ca34def29f8c0e34c67cbe2a2795f269d/shapely-2.1.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2d93d23bdd2ed9dc157b46bc2f19b7da143ca8714464249bef6771c679d5ff40", size = 1834130, upload-time = "2025-09-24T13:50:58.49Z" }, + { url = "https://files.pythonhosted.org/packages/7f/ee/dce001c1984052970ff60eb4727164892fb2d08052c575042a47f5a9e88f/shapely-2.1.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:01d0d304b25634d60bd7cf291828119ab55a3bab87dc4af1e44b07fb225f188b", size = 1642802, upload-time = "2025-09-24T13:50:59.871Z" }, + { url = "https://files.pythonhosted.org/packages/da/e7/fc4e9a19929522877fa602f705706b96e78376afb7fad09cad5b9af1553c/shapely-2.1.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8d8382dd120d64b03698b7298b89611a6ea6f55ada9d39942838b79c9bc89801", size = 3018460, upload-time = "2025-09-24T13:51:02.08Z" }, + { url = "https://files.pythonhosted.org/packages/a1/18/7519a25db21847b525696883ddc8e6a0ecaa36159ea88e0fef11466384d0/shapely-2.1.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:19efa3611eef966e776183e338b2d7ea43569ae99ab34f8d17c2c054d3205cc0", size = 3095223, upload-time = "2025-09-24T13:51:04.472Z" }, + { url = "https://files.pythonhosted.org/packages/48/de/b59a620b1f3a129c3fecc2737104a0a7e04e79335bd3b0a1f1609744cf17/shapely-2.1.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:346ec0c1a0fcd32f57f00e4134d1200e14bf3f5ae12af87ba83ca275c502498c", size = 4030760, upload-time = "2025-09-24T13:51:06.455Z" }, + { url = "https://files.pythonhosted.org/packages/96/b3/c6655ee7232b417562bae192ae0d3ceaadb1cc0ffc2088a2ddf415456cc2/shapely-2.1.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6305993a35989391bd3476ee538a5c9a845861462327efe00dd11a5c8c709a99", size = 4170078, upload-time = "2025-09-24T13:51:08.584Z" }, + { url = "https://files.pythonhosted.org/packages/a0/8e/605c76808d73503c9333af8f6cbe7e1354d2d238bda5f88eea36bfe0f42a/shapely-2.1.2-cp313-cp313t-win32.whl", hash = "sha256:c8876673449f3401f278c86eb33224c5764582f72b653a415d0e6672fde887bf", size = 1559178, upload-time = "2025-09-24T13:51:10.73Z" }, + { url = "https://files.pythonhosted.org/packages/36/f7/d317eb232352a1f1444d11002d477e54514a4a6045536d49d0c59783c0da/shapely-2.1.2-cp313-cp313t-win_amd64.whl", hash = "sha256:4a44bc62a10d84c11a7a3d7c1c4fe857f7477c3506e24c9062da0db0ae0c449c", size = 1739756, upload-time = "2025-09-24T13:51:12.105Z" }, + { url = "https://files.pythonhosted.org/packages/fc/c4/3ce4c2d9b6aabd27d26ec988f08cb877ba9e6e96086eff81bfea93e688c7/shapely-2.1.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:9a522f460d28e2bf4e12396240a5fc1518788b2fcd73535166d748399ef0c223", size = 1831290, upload-time = "2025-09-24T13:51:13.56Z" }, + { url = "https://files.pythonhosted.org/packages/17/b9/f6ab8918fc15429f79cb04afa9f9913546212d7fb5e5196132a2af46676b/shapely-2.1.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1ff629e00818033b8d71139565527ced7d776c269a49bd78c9df84e8f852190c", size = 1641463, upload-time = "2025-09-24T13:51:14.972Z" }, + { url = "https://files.pythonhosted.org/packages/a5/57/91d59ae525ca641e7ac5551c04c9503aee6f29b92b392f31790fcb1a4358/shapely-2.1.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f67b34271dedc3c653eba4e3d7111aa421d5be9b4c4c7d38d30907f796cb30df", size = 2970145, upload-time = "2025-09-24T13:51:16.961Z" }, + { url = "https://files.pythonhosted.org/packages/8a/cb/4948be52ee1da6927831ab59e10d4c29baa2a714f599f1f0d1bc747f5777/shapely-2.1.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21952dc00df38a2c28375659b07a3979d22641aeb104751e769c3ee825aadecf", size = 3073806, upload-time = "2025-09-24T13:51:18.712Z" }, + { url = "https://files.pythonhosted.org/packages/03/83/f768a54af775eb41ef2e7bec8a0a0dbe7d2431c3e78c0a8bdba7ab17e446/shapely-2.1.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1f2f33f486777456586948e333a56ae21f35ae273be99255a191f5c1fa302eb4", size = 3980803, upload-time = "2025-09-24T13:51:20.37Z" }, + { url = "https://files.pythonhosted.org/packages/9f/cb/559c7c195807c91c79d38a1f6901384a2878a76fbdf3f1048893a9b7534d/shapely-2.1.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cf831a13e0d5a7eb519e96f58ec26e049b1fad411fc6fc23b162a7ce04d9cffc", size = 4133301, upload-time = "2025-09-24T13:51:21.887Z" }, + { url = "https://files.pythonhosted.org/packages/80/cd/60d5ae203241c53ef3abd2ef27c6800e21afd6c94e39db5315ea0cbafb4a/shapely-2.1.2-cp314-cp314-win32.whl", hash = "sha256:61edcd8d0d17dd99075d320a1dd39c0cb9616f7572f10ef91b4b5b00c4aeb566", size = 1583247, upload-time = "2025-09-24T13:51:23.401Z" }, + { url = "https://files.pythonhosted.org/packages/74/d4/135684f342e909330e50d31d441ace06bf83c7dc0777e11043f99167b123/shapely-2.1.2-cp314-cp314-win_amd64.whl", hash = "sha256:a444e7afccdb0999e203b976adb37ea633725333e5b119ad40b1ca291ecf311c", size = 1773019, upload-time = "2025-09-24T13:51:24.873Z" }, + { url = "https://files.pythonhosted.org/packages/a3/05/a44f3f9f695fa3ada22786dc9da33c933da1cbc4bfe876fe3a100bafe263/shapely-2.1.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:5ebe3f84c6112ad3d4632b1fd2290665aa75d4cef5f6c5d77c4c95b324527c6a", size = 1834137, upload-time = "2025-09-24T13:51:26.665Z" }, + { url = "https://files.pythonhosted.org/packages/52/7e/4d57db45bf314573427b0a70dfca15d912d108e6023f623947fa69f39b72/shapely-2.1.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5860eb9f00a1d49ebb14e881f5caf6c2cf472c7fd38bd7f253bbd34f934eb076", size = 1642884, upload-time = "2025-09-24T13:51:28.029Z" }, + { url = "https://files.pythonhosted.org/packages/5a/27/4e29c0a55d6d14ad7422bf86995d7ff3f54af0eba59617eb95caf84b9680/shapely-2.1.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b705c99c76695702656327b819c9660768ec33f5ce01fa32b2af62b56ba400a1", size = 3018320, upload-time = "2025-09-24T13:51:29.903Z" }, + { url = "https://files.pythonhosted.org/packages/9f/bb/992e6a3c463f4d29d4cd6ab8963b75b1b1040199edbd72beada4af46bde5/shapely-2.1.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a1fd0ea855b2cf7c9cddaf25543e914dd75af9de08785f20ca3085f2c9ca60b0", size = 3094931, upload-time = "2025-09-24T13:51:32.699Z" }, + { url = "https://files.pythonhosted.org/packages/9c/16/82e65e21070e473f0ed6451224ed9fa0be85033d17e0c6e7213a12f59d12/shapely-2.1.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:df90e2db118c3671a0754f38e36802db75fe0920d211a27481daf50a711fdf26", size = 4030406, upload-time = "2025-09-24T13:51:34.189Z" }, + { url = "https://files.pythonhosted.org/packages/7c/75/c24ed871c576d7e2b64b04b1fe3d075157f6eb54e59670d3f5ffb36e25c7/shapely-2.1.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:361b6d45030b4ac64ddd0a26046906c8202eb60d0f9f53085f5179f1d23021a0", size = 4169511, upload-time = "2025-09-24T13:51:36.297Z" }, + { url = "https://files.pythonhosted.org/packages/b1/f7/b3d1d6d18ebf55236eec1c681ce5e665742aab3c0b7b232720a7d43df7b6/shapely-2.1.2-cp314-cp314t-win32.whl", hash = "sha256:b54df60f1fbdecc8ebc2c5b11870461a6417b3d617f555e5033f1505d36e5735", size = 1602607, upload-time = "2025-09-24T13:51:37.757Z" }, + { url = "https://files.pythonhosted.org/packages/9a/f6/f09272a71976dfc138129b8faf435d064a811ae2f708cb147dccdf7aacdb/shapely-2.1.2-cp314-cp314t-win_amd64.whl", hash = "sha256:0036ac886e0923417932c2e6369b6c52e38e0ff5d9120b90eef5cd9a5fc5cae9", size = 1796682, upload-time = "2025-09-24T13:51:39.233Z" }, +] + +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, +] + +[[package]] +name = "smmap" +version = "5.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/44/cd/a040c4b3119bbe532e5b0732286f805445375489fceaec1f48306068ee3b/smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5", size = 22329, upload-time = "2025-01-02T07:14:40.909Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/be/d09147ad1ec7934636ad912901c5fd7667e1c858e19d355237db0d0cd5e4/smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e", size = 24303, upload-time = "2025-01-02T07:14:38.724Z" }, +] + [[package]] name = "taskipy" version = "1.14.1" @@ -311,6 +1225,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/23/d1/136eb2cb77520a31e1f64cbae9d33ec6df0d78bdf4160398e86eec8a8754/tomli-2.4.0-py3-none-any.whl", hash = "sha256:1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a", size = 14477, upload-time = "2026-01-11T11:22:37.446Z" }, ] +[[package]] +name = "tomlkit" +version = "0.13.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cc/18/0bbf3884e9eaa38819ebe46a7bd25dcd56b67434402b66a58c4b8e552575/tomlkit-0.13.3.tar.gz", hash = "sha256:430cf247ee57df2b94ee3fbe588e71d362a941ebb545dec29b53961d61add2a1", size = 185207, upload-time = "2025-06-05T07:13:44.947Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bd/75/8539d011f6be8e29f339c42e633aae3cb73bffa95dd0f9adec09b9c58e85/tomlkit-0.13.3-py3-none-any.whl", hash = "sha256:c89c649d79ee40629a9fda55f8ace8c6a1b42deb912b2a8fd8d942ddadb606b0", size = 38901, upload-time = "2025-06-05T07:13:43.546Z" }, +] + [[package]] name = "ty" version = "0.0.15" @@ -335,6 +1258,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/be/1991f2bc12847ae2d4f1e3ac5dcff8bb7bc1261390645c0755bb55616355/ty-0.0.15-py3-none-win_arm64.whl", hash = "sha256:e5a98d4119e77d6136461e16ae505f8f8069002874ab073de03fbcb1a5e8bf25", size = 9937490, upload-time = "2026-02-05T01:06:32.388Z" }, ] +[[package]] +name = "typer" +version = "0.23.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "click" }, + { name = "rich" }, + { name = "shellingham" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7e/e6/44e073787aa57cd71c151f44855232feb0f748428fd5242d7366e3c4ae8b/typer-0.23.0.tar.gz", hash = "sha256:d8378833e47ada5d3d093fa20c4c63427cc4e27127f6b349a6c359463087d8cc", size = 120181, upload-time = "2026-02-11T15:22:18.637Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/ed/d6fca788b51d0d4640c4bc82d0e85bad4b49809bca36bf4af01b4dcb66a7/typer-0.23.0-py3-none-any.whl", hash = "sha256:79f4bc262b6c37872091072a3cb7cb6d7d79ee98c0c658b4364bdcde3c42c913", size = 56668, upload-time = "2026-02-11T15:22:21.075Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0" @@ -355,3 +1293,102 @@ sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac wheels = [ { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, ] + +[[package]] +name = "tzdata" +version = "2025.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5e/a7/c202b344c5ca7daf398f3b8a477eeb205cf3b6f32e7ec3a6bac0629ca975/tzdata-2025.3.tar.gz", hash = "sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7", size = 196772, upload-time = "2025-12-13T17:45:35.667Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/b0/003792df09decd6849a5e39c28b513c06e84436a54440380862b5aeff25d/tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1", size = 348521, upload-time = "2025-12-13T17:45:33.889Z" }, +] + +[[package]] +name = "urllib3" +version = "2.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, +] + +[[package]] +name = "vcrpy" +version = "8.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyyaml" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/07/bcfd5ebd7cb308026ab78a353e091bd699593358be49197d39d004e5ad83/vcrpy-8.1.1.tar.gz", hash = "sha256:58e3053e33b423f3594031cb758c3f4d1df931307f1e67928e30cf352df7709f", size = 85770, upload-time = "2026-01-04T19:22:03.886Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3a/d7/f79b05a5d728f8786876a7d75dfb0c5cae27e428081b2d60152fb52f155f/vcrpy-8.1.1-py3-none-any.whl", hash = "sha256:2d16f31ad56493efb6165182dd99767207031b0da3f68b18f975545ede8ac4b9", size = 42445, upload-time = "2026-01-04T19:22:02.532Z" }, +] + +[[package]] +name = "virtualenv" +version = "20.36.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "distlib" }, + { name = "filelock" }, + { name = "platformdirs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/aa/a3/4d310fa5f00863544e1d0f4de93bddec248499ccf97d4791bc3122c9d4f3/virtualenv-20.36.1.tar.gz", hash = "sha256:8befb5c81842c641f8ee658481e42641c68b5eab3521d8e092d18320902466ba", size = 6032239, upload-time = "2026-01-09T18:21:01.296Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6a/2a/dc2228b2888f51192c7dc766106cd475f1b768c10caaf9727659726f7391/virtualenv-20.36.1-py3-none-any.whl", hash = "sha256:575a8d6b124ef88f6f51d56d656132389f961062a9177016a50e4f507bbcc19f", size = 6008258, upload-time = "2026-01-09T18:20:59.425Z" }, +] + +[[package]] +name = "wrapt" +version = "2.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f7/37/ae31f40bec90de2f88d9597d0b5281e23ffe85b893a47ca5d9c05c63a4f6/wrapt-2.1.1.tar.gz", hash = "sha256:5fdcb09bf6db023d88f312bd0767594b414655d58090fc1c46b3414415f67fac", size = 81329, upload-time = "2026-02-03T02:12:13.786Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/cb/4d5255d19bbd12be7f8ee2c1fb4269dddec9cef777ef17174d357468efaa/wrapt-2.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ab8e3793b239db021a18782a5823fcdea63b9fe75d0e340957f5828ef55fcc02", size = 61143, upload-time = "2026-02-03T02:11:46.313Z" }, + { url = "https://files.pythonhosted.org/packages/6f/07/7ed02daa35542023464e3c8b7cb937fa61f6c61c0361ecf8f5fecf8ad8da/wrapt-2.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7c0300007836373d1c2df105b40777986accb738053a92fe09b615a7a4547e9f", size = 61740, upload-time = "2026-02-03T02:12:51.966Z" }, + { url = "https://files.pythonhosted.org/packages/c4/60/a237a4e4a36f6d966061ccc9b017627d448161b19e0a3ab80a7c7c97f859/wrapt-2.1.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2b27c070fd1132ab23957bcd4ee3ba707a91e653a9268dc1afbd39b77b2799f7", size = 121327, upload-time = "2026-02-03T02:11:06.796Z" }, + { url = "https://files.pythonhosted.org/packages/ae/fe/9139058a3daa8818fc67e6460a2340e8bbcf3aef8b15d0301338bbe181ca/wrapt-2.1.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b0e36d845e8b6f50949b6b65fc6cd279f47a1944582ed4ec8258cd136d89a64", size = 122903, upload-time = "2026-02-03T02:12:48.657Z" }, + { url = "https://files.pythonhosted.org/packages/91/10/b8479202b4164649675846a531763531f0a6608339558b5a0a718fc49a8d/wrapt-2.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4aeea04a9889370fcfb1ef828c4cc583f36a875061505cd6cd9ba24d8b43cc36", size = 121333, upload-time = "2026-02-03T02:11:32.148Z" }, + { url = "https://files.pythonhosted.org/packages/5f/75/75fc793b791d79444aca2c03ccde64e8b99eda321b003f267d570b7b0985/wrapt-2.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d88b46bb0dce9f74b6817bc1758ff2125e1ca9e1377d62ea35b6896142ab6825", size = 120458, upload-time = "2026-02-03T02:11:16.039Z" }, + { url = "https://files.pythonhosted.org/packages/d7/8f/c3f30d511082ca6d947c405f9d8f6c8eaf83cfde527c439ec2c9a30eb5ea/wrapt-2.1.1-cp312-cp312-win32.whl", hash = "sha256:63decff76ca685b5c557082dfbea865f3f5f6d45766a89bff8dc61d336348833", size = 58086, upload-time = "2026-02-03T02:12:35.041Z" }, + { url = "https://files.pythonhosted.org/packages/0a/c8/37625b643eea2849f10c3b90f69c7462faa4134448d4443234adaf122ae5/wrapt-2.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:b828235d26c1e35aca4107039802ae4b1411be0fe0367dd5b7e4d90e562fcbcd", size = 60328, upload-time = "2026-02-03T02:12:45.808Z" }, + { url = "https://files.pythonhosted.org/packages/ce/79/56242f07572d5682ba8065a9d4d9c2218313f576e3c3471873c2a5355ffd/wrapt-2.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:75128507413a9f1bcbe2db88fd18fbdbf80f264b82fa33a6996cdeaf01c52352", size = 58722, upload-time = "2026-02-03T02:12:27.949Z" }, + { url = "https://files.pythonhosted.org/packages/f7/ca/3cf290212855b19af9fcc41b725b5620b32f470d6aad970c2593500817eb/wrapt-2.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ce9646e17fa7c3e2e7a87e696c7de66512c2b4f789a8db95c613588985a2e139", size = 61150, upload-time = "2026-02-03T02:12:50.575Z" }, + { url = "https://files.pythonhosted.org/packages/9d/33/5b8f89a82a9859ce82da4870c799ad11ce15648b6e1c820fec3e23f4a19f/wrapt-2.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:428cfc801925454395aa468ba7ddb3ed63dc0d881df7b81626cdd433b4e2b11b", size = 61743, upload-time = "2026-02-03T02:11:55.733Z" }, + { url = "https://files.pythonhosted.org/packages/1e/2f/60c51304fbdf47ce992d9eefa61fbd2c0e64feee60aaa439baf42ea6f40b/wrapt-2.1.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5797f65e4d58065a49088c3b32af5410751cd485e83ba89e5a45e2aa8905af98", size = 121341, upload-time = "2026-02-03T02:11:20.461Z" }, + { url = "https://files.pythonhosted.org/packages/ad/03/ce5256e66dd94e521ad5e753c78185c01b6eddbed3147be541f4d38c0cb7/wrapt-2.1.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5a2db44a71202c5ae4bb5f27c6d3afbc5b23053f2e7e78aa29704541b5dad789", size = 122947, upload-time = "2026-02-03T02:11:33.596Z" }, + { url = "https://files.pythonhosted.org/packages/eb/ae/50ca8854b81b946a11a36fcd6ead32336e6db2c14b6e4a8b092b80741178/wrapt-2.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8d5350c3590af09c1703dd60ec78a7370c0186e11eaafb9dda025a30eee6492d", size = 121370, upload-time = "2026-02-03T02:11:09.886Z" }, + { url = "https://files.pythonhosted.org/packages/fb/d9/d6a7c654e0043319b4cc137a4caaf7aa16b46b51ee8df98d1060254705b7/wrapt-2.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2d9b076411bed964e752c01b49fd224cc385f3a96f520c797d38412d70d08359", size = 120465, upload-time = "2026-02-03T02:11:37.592Z" }, + { url = "https://files.pythonhosted.org/packages/55/90/65be41e40845d951f714b5a77e84f377a3787b1e8eee6555a680da6d0db5/wrapt-2.1.1-cp313-cp313-win32.whl", hash = "sha256:0bb7207130ce6486727baa85373503bf3334cc28016f6928a0fa7e19d7ecdc06", size = 58090, upload-time = "2026-02-03T02:12:53.342Z" }, + { url = "https://files.pythonhosted.org/packages/5f/66/6a09e0294c4fc8c26028a03a15191721c9271672467cc33e6617ee0d91d2/wrapt-2.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:cbfee35c711046b15147b0ae7db9b976f01c9520e6636d992cd9e69e5e2b03b1", size = 60341, upload-time = "2026-02-03T02:12:36.384Z" }, + { url = "https://files.pythonhosted.org/packages/7a/f0/20ceb8b701e9a71555c87a5ddecbed76ec16742cf1e4b87bbaf26735f998/wrapt-2.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:7d2756061022aebbf57ba14af9c16e8044e055c22d38de7bf40d92b565ecd2b0", size = 58731, upload-time = "2026-02-03T02:12:01.328Z" }, + { url = "https://files.pythonhosted.org/packages/80/b4/fe95beb8946700b3db371f6ce25115217e7075ca063663b8cca2888ba55c/wrapt-2.1.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4814a3e58bc6971e46baa910ecee69699110a2bf06c201e24277c65115a20c20", size = 62969, upload-time = "2026-02-03T02:11:51.245Z" }, + { url = "https://files.pythonhosted.org/packages/b8/89/477b0bdc784e3299edf69c279697372b8bd4c31d9c6966eae405442899df/wrapt-2.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:106c5123232ab9b9f4903692e1fa0bdc231510098f04c13c3081f8ad71c3d612", size = 63606, upload-time = "2026-02-03T02:12:02.64Z" }, + { url = "https://files.pythonhosted.org/packages/ed/55/9d0c1269ab76de87715b3b905df54dd25d55bbffd0b98696893eb613469f/wrapt-2.1.1-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1a40b83ff2535e6e56f190aff123821eea89a24c589f7af33413b9c19eb2c738", size = 152536, upload-time = "2026-02-03T02:11:24.492Z" }, + { url = "https://files.pythonhosted.org/packages/44/18/2004766030462f79ad86efaa62000b5e39b1ff001dcce86650e1625f40ae/wrapt-2.1.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:789cea26e740d71cf1882e3a42bb29052bc4ada15770c90072cb47bf73fb3dbf", size = 158697, upload-time = "2026-02-03T02:12:32.214Z" }, + { url = "https://files.pythonhosted.org/packages/e1/bb/0a880fa0f35e94ee843df4ee4dd52a699c9263f36881311cfb412c09c3e5/wrapt-2.1.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:ba49c14222d5e5c0ee394495a8655e991dc06cbca5398153aefa5ac08cd6ccd7", size = 155563, upload-time = "2026-02-03T02:11:49.737Z" }, + { url = "https://files.pythonhosted.org/packages/42/ff/cd1b7c4846c8678fac359a6eb975dc7ab5bd606030adb22acc8b4a9f53f1/wrapt-2.1.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ac8cda531fe55be838a17c62c806824472bb962b3afa47ecbd59b27b78496f4e", size = 150161, upload-time = "2026-02-03T02:12:33.613Z" }, + { url = "https://files.pythonhosted.org/packages/38/ec/67c90a7082f452964b4621e4890e9a490f1add23cdeb7483cc1706743291/wrapt-2.1.1-cp313-cp313t-win32.whl", hash = "sha256:b8af75fe20d381dd5bcc9db2e86a86d7fcfbf615383a7147b85da97c1182225b", size = 59783, upload-time = "2026-02-03T02:11:39.863Z" }, + { url = "https://files.pythonhosted.org/packages/ec/08/466afe4855847d8febdfa2c57c87e991fc5820afbdef01a273683dfd15a0/wrapt-2.1.1-cp313-cp313t-win_amd64.whl", hash = "sha256:45c5631c9b6c792b78be2d7352129f776dd72c605be2c3a4e9be346be8376d83", size = 63082, upload-time = "2026-02-03T02:12:09.075Z" }, + { url = "https://files.pythonhosted.org/packages/9a/62/60b629463c28b15b1eeadb3a0691e17568622b12aa5bfa7ebe9b514bfbeb/wrapt-2.1.1-cp313-cp313t-win_arm64.whl", hash = "sha256:da815b9263947ac98d088b6414ac83507809a1d385e4632d9489867228d6d81c", size = 60251, upload-time = "2026-02-03T02:11:21.794Z" }, + { url = "https://files.pythonhosted.org/packages/95/a0/1c2396e272f91efe6b16a6a8bce7ad53856c8f9ae4f34ceaa711d63ec9e1/wrapt-2.1.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:9aa1765054245bb01a37f615503290d4e207e3fd59226e78341afb587e9c1236", size = 61311, upload-time = "2026-02-03T02:12:44.41Z" }, + { url = "https://files.pythonhosted.org/packages/b0/9a/d2faba7e61072a7507b5722db63562fdb22f5a24e237d460d18755627f15/wrapt-2.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:feff14b63a6d86c1eee33a57f77573649f2550935981625be7ff3cb7342efe05", size = 61805, upload-time = "2026-02-03T02:11:59.905Z" }, + { url = "https://files.pythonhosted.org/packages/db/56/073989deb4b5d7d6e7ea424476a4ae4bda02140f2dbeaafb14ba4864dd60/wrapt-2.1.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:81fc5f22d5fcfdbabde96bb3f5379b9f4476d05c6d524d7259dc5dfb501d3281", size = 120308, upload-time = "2026-02-03T02:12:04.46Z" }, + { url = "https://files.pythonhosted.org/packages/d1/b6/84f37261295e38167a29eb82affaf1dc15948dc416925fe2091beee8e4ac/wrapt-2.1.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:951b228ecf66def855d22e006ab9a1fc12535111ae7db2ec576c728f8ddb39e8", size = 122688, upload-time = "2026-02-03T02:11:23.148Z" }, + { url = "https://files.pythonhosted.org/packages/ea/80/32db2eec6671f80c65b7ff175be61bc73d7f5223f6910b0c921bbc4bd11c/wrapt-2.1.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0ddf582a95641b9a8c8bd643e83f34ecbbfe1b68bc3850093605e469ab680ae3", size = 121115, upload-time = "2026-02-03T02:12:39.068Z" }, + { url = "https://files.pythonhosted.org/packages/49/ef/dcd00383df0cd696614127902153bf067971a5aabcd3c9dcb2d8ef354b2a/wrapt-2.1.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:fc5c500966bf48913f795f1984704e6d452ba2414207b15e1f8c339a059d5b16", size = 119484, upload-time = "2026-02-03T02:11:48.419Z" }, + { url = "https://files.pythonhosted.org/packages/76/29/0630280cdd2bd8f86f35cb6854abee1c9d6d1a28a0c6b6417cd15d378325/wrapt-2.1.1-cp314-cp314-win32.whl", hash = "sha256:4aa4baadb1f94b71151b8e44a0c044f6af37396c3b8bcd474b78b49e2130a23b", size = 58514, upload-time = "2026-02-03T02:11:58.616Z" }, + { url = "https://files.pythonhosted.org/packages/db/19/5bed84f9089ed2065f6aeda5dfc4f043743f642bc871454b261c3d7d322b/wrapt-2.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:860e9d3fd81816a9f4e40812f28be4439ab01f260603c749d14be3c0a1170d19", size = 60763, upload-time = "2026-02-03T02:12:24.553Z" }, + { url = "https://files.pythonhosted.org/packages/e4/cb/b967f2f9669e4249b4fe82e630d2a01bc6b9e362b9b12ed91bbe23ae8df4/wrapt-2.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:3c59e103017a2c1ea0ddf589cbefd63f91081d7ce9d491d69ff2512bb1157e23", size = 59051, upload-time = "2026-02-03T02:11:29.602Z" }, + { url = "https://files.pythonhosted.org/packages/eb/19/6fed62be29f97eb8a56aff236c3f960a4b4a86e8379dc7046a8005901a97/wrapt-2.1.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:9fa7c7e1bee9278fc4f5dd8275bc8d25493281a8ec6c61959e37cc46acf02007", size = 63059, upload-time = "2026-02-03T02:12:06.368Z" }, + { url = "https://files.pythonhosted.org/packages/0a/1c/b757fd0adb53d91547ed8fad76ba14a5932d83dde4c994846a2804596378/wrapt-2.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:39c35e12e8215628984248bd9c8897ce0a474be2a773db207eb93414219d8469", size = 63618, upload-time = "2026-02-03T02:12:23.197Z" }, + { url = "https://files.pythonhosted.org/packages/10/fe/e5ae17b1480957c7988d991b93df9f2425fc51f128cf88144d6a18d0eb12/wrapt-2.1.1-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:94ded4540cac9125eaa8ddf5f651a7ec0da6f5b9f248fe0347b597098f8ec14c", size = 152544, upload-time = "2026-02-03T02:11:43.915Z" }, + { url = "https://files.pythonhosted.org/packages/3e/cc/99aed210c6b547b8a6e4cb9d1425e4466727158a6aeb833aa7997e9e08dd/wrapt-2.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:da0af328373f97ed9bdfea24549ac1b944096a5a71b30e41c9b8b53ab3eec04a", size = 158700, upload-time = "2026-02-03T02:12:30.684Z" }, + { url = "https://files.pythonhosted.org/packages/81/0e/d442f745f4957944d5f8ad38bc3a96620bfff3562533b87e486e979f3d99/wrapt-2.1.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:4ad839b55f0bf235f8e337ce060572d7a06592592f600f3a3029168e838469d3", size = 155561, upload-time = "2026-02-03T02:11:28.164Z" }, + { url = "https://files.pythonhosted.org/packages/51/ac/9891816280e0018c48f8dfd61b136af7b0dcb4a088895db2531acde5631b/wrapt-2.1.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0d89c49356e5e2a50fa86b40e0510082abcd0530f926cbd71cf25bee6b9d82d7", size = 150188, upload-time = "2026-02-03T02:11:57.053Z" }, + { url = "https://files.pythonhosted.org/packages/24/98/e2f273b6d70d41f98d0739aa9a269d0b633684a5fb17b9229709375748d4/wrapt-2.1.1-cp314-cp314t-win32.whl", hash = "sha256:f4c7dd22cf7f36aafe772f3d88656559205c3af1b7900adfccb70edeb0d2abc4", size = 60425, upload-time = "2026-02-03T02:11:35.007Z" }, + { url = "https://files.pythonhosted.org/packages/1e/06/b500bfc38a4f82d89f34a13069e748c82c5430d365d9e6b75afb3ab74457/wrapt-2.1.1-cp314-cp314t-win_amd64.whl", hash = "sha256:f76bc12c583ab01e73ba0ea585465a41e48d968f6d1311b4daec4f8654e356e3", size = 63855, upload-time = "2026-02-03T02:12:15.47Z" }, + { url = "https://files.pythonhosted.org/packages/d9/cc/5f6193c32166faee1d2a613f278608e6f3b95b96589d020f0088459c46c9/wrapt-2.1.1-cp314-cp314t-win_arm64.whl", hash = "sha256:7ea74fc0bec172f1ae5f3505b6655c541786a5cabe4bbc0d9723a56ac32eb9b9", size = 60443, upload-time = "2026-02-03T02:11:30.869Z" }, + { url = "https://files.pythonhosted.org/packages/c4/da/5a086bf4c22a41995312db104ec2ffeee2cf6accca9faaee5315c790377d/wrapt-2.1.1-py3-none-any.whl", hash = "sha256:3b0f4629eb954394a3d7c7a1c8cca25f0b07cefe6aa8545e862e9778152de5b7", size = 43886, upload-time = "2026-02-03T02:11:45.048Z" }, +] diff --git a/website/.astro/collections/changelogs.schema.json b/website/.astro/collections/changelogs.schema.json new file mode 100644 index 0000000..3524d08 --- /dev/null +++ b/website/.astro/collections/changelogs.schema.json @@ -0,0 +1,66 @@ +{ + "$ref": "#/definitions/changelogs", + "definitions": { + "changelogs": { + "type": "object", + "properties": { + "base": { + "type": "string" + }, + "date": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "string", + "format": "date" + }, + { + "type": "integer", + "format": "unix-time" + } + ] + }, + "link": { + "type": "string", + "format": "uri" + }, + "provider": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "label": { + "type": "string" + } + }, + "required": [ + "name", + "label" + ], + "additionalProperties": false + }, + "slug": { + "type": "string" + }, + "title": { + "type": "string" + }, + "$schema": { + "type": "string" + } + }, + "required": [ + "base", + "provider", + "slug", + "title" + ], + "additionalProperties": false + } + }, + "$schema": "http://json-schema.org/draft-07/schema#" +} \ No newline at end of file diff --git a/website/.astro/collections/docs.schema.json b/website/.astro/collections/docs.schema.json new file mode 100644 index 0000000..9500aa0 --- /dev/null +++ b/website/.astro/collections/docs.schema.json @@ -0,0 +1,646 @@ +{ + "$ref": "#/definitions/docs", + "definitions": { + "docs": { + "type": "object", + "properties": { + "title": { + "type": "string" + }, + "description": { + "type": "string" + }, + "editUrl": { + "anyOf": [ + { + "type": "string", + "format": "uri" + }, + { + "type": "boolean" + } + ], + "default": true + }, + "head": { + "type": "array", + "items": { + "type": "object", + "properties": { + "tag": { + "type": "string", + "enum": [ + "title", + "base", + "link", + "style", + "meta", + "script", + "noscript", + "template" + ] + }, + "attrs": { + "type": "object", + "additionalProperties": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "boolean" + }, + { + "not": {} + } + ] + } + }, + "content": { + "type": "string" + } + }, + "required": [ + "tag" + ], + "additionalProperties": false + }, + "default": [] + }, + "tableOfContents": { + "anyOf": [ + { + "type": "object", + "properties": { + "minHeadingLevel": { + "type": "integer", + "minimum": 1, + "maximum": 6, + "default": 2 + }, + "maxHeadingLevel": { + "type": "integer", + "minimum": 1, + "maximum": 6, + "default": 3 + } + }, + "additionalProperties": false + }, + { + "type": "boolean" + } + ], + "default": { + "minHeadingLevel": 2, + "maxHeadingLevel": 3 + } + }, + "template": { + "type": "string", + "enum": [ + "doc", + "splash" + ], + "default": "doc" + }, + "hero": { + "type": "object", + "properties": { + "title": { + "type": "string" + }, + "tagline": { + "type": "string" + }, + "image": { + "anyOf": [ + { + "type": "object", + "properties": { + "alt": { + "type": "string", + "default": "" + }, + "file": { + "type": "string" + } + }, + "required": [ + "file" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "alt": { + "type": "string", + "default": "" + }, + "dark": { + "type": "string" + }, + "light": { + "type": "string" + } + }, + "required": [ + "dark", + "light" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "html": { + "type": "string" + } + }, + "required": [ + "html" + ], + "additionalProperties": false + } + ] + }, + "actions": { + "type": "array", + "items": { + "type": "object", + "properties": { + "text": { + "type": "string" + }, + "link": { + "type": "string" + }, + "variant": { + "type": "string", + "enum": [ + "primary", + "secondary", + "minimal" + ], + "default": "primary" + }, + "icon": { + "anyOf": [ + { + "type": "string", + "enum": [ + "up-caret", + "down-caret", + "right-caret", + "left-caret", + "up-arrow", + "down-arrow", + "right-arrow", + "left-arrow", + "bars", + "translate", + "pencil", + "pen", + "document", + "add-document", + "setting", + "external", + "download", + "cloud-download", + "moon", + "sun", + "laptop", + "open-book", + "information", + "magnifier", + "forward-slash", + "close", + "error", + "warning", + "approve-check-circle", + "approve-check", + "rocket", + "star", + "puzzle", + "list-format", + "random", + "comment", + "comment-alt", + "heart", + "github", + "gitlab", + "bitbucket", + "codePen", + "farcaster", + "discord", + "gitter", + "twitter", + "x.com", + "mastodon", + "codeberg", + "youtube", + "threads", + "linkedin", + "twitch", + "azureDevOps", + "microsoftTeams", + "instagram", + "stackOverflow", + "telegram", + "rss", + "facebook", + "email", + "phone", + "reddit", + "patreon", + "signal", + "slack", + "matrix", + "hackerOne", + "openCollective", + "blueSky", + "discourse", + "zulip", + "pinterest", + "tiktok", + "astro", + "alpine", + "pnpm", + "biome", + "bun", + "mdx", + "apple", + "linux", + "homebrew", + "nix", + "starlight", + "pkl", + "node", + "cloudflare", + "vercel", + "netlify", + "deno", + "jsr", + "nostr", + "backstage", + "confluence", + "jira", + "storybook", + "vscode", + "jetbrains", + "zed", + "vim", + "figma", + "sketch", + "npm", + "sourcehut", + "substack", + "seti:folder", + "seti:bsl", + "seti:mdo", + "seti:salesforce", + "seti:asm", + "seti:bicep", + "seti:bazel", + "seti:c", + "seti:c-sharp", + "seti:html", + "seti:cpp", + "seti:clojure", + "seti:coldfusion", + "seti:config", + "seti:crystal", + "seti:crystal_embedded", + "seti:json", + "seti:css", + "seti:csv", + "seti:xls", + "seti:cu", + "seti:cake", + "seti:cake_php", + "seti:d", + "seti:word", + "seti:elixir", + "seti:elixir_script", + "seti:hex", + "seti:elm", + "seti:favicon", + "seti:f-sharp", + "seti:git", + "seti:go", + "seti:godot", + "seti:gradle", + "seti:grails", + "seti:graphql", + "seti:hacklang", + "seti:haml", + "seti:mustache", + "seti:haskell", + "seti:haxe", + "seti:jade", + "seti:java", + "seti:javascript", + "seti:jinja", + "seti:julia", + "seti:karma", + "seti:kotlin", + "seti:dart", + "seti:liquid", + "seti:livescript", + "seti:lua", + "seti:markdown", + "seti:argdown", + "seti:info", + "seti:clock", + "seti:maven", + "seti:nim", + "seti:github", + "seti:notebook", + "seti:nunjucks", + "seti:npm", + "seti:ocaml", + "seti:odata", + "seti:perl", + "seti:php", + "seti:pipeline", + "seti:pddl", + "seti:plan", + "seti:happenings", + "seti:powershell", + "seti:prisma", + "seti:pug", + "seti:puppet", + "seti:purescript", + "seti:python", + "seti:react", + "seti:rescript", + "seti:R", + "seti:ruby", + "seti:rust", + "seti:sass", + "seti:spring", + "seti:slim", + "seti:smarty", + "seti:sbt", + "seti:scala", + "seti:ethereum", + "seti:stylus", + "seti:svelte", + "seti:swift", + "seti:db", + "seti:terraform", + "seti:tex", + "seti:default", + "seti:twig", + "seti:typescript", + "seti:tsconfig", + "seti:vala", + "seti:vite", + "seti:vue", + "seti:wasm", + "seti:wat", + "seti:xml", + "seti:yml", + "seti:prolog", + "seti:zig", + "seti:zip", + "seti:wgt", + "seti:illustrator", + "seti:photoshop", + "seti:pdf", + "seti:font", + "seti:image", + "seti:svg", + "seti:sublime", + "seti:code-search", + "seti:shell", + "seti:video", + "seti:audio", + "seti:windows", + "seti:jenkins", + "seti:babel", + "seti:bower", + "seti:docker", + "seti:code-climate", + "seti:eslint", + "seti:firebase", + "seti:firefox", + "seti:gitlab", + "seti:grunt", + "seti:gulp", + "seti:ionic", + "seti:platformio", + "seti:rollup", + "seti:stylelint", + "seti:yarn", + "seti:webpack", + "seti:lock", + "seti:license", + "seti:makefile", + "seti:heroku", + "seti:todo", + "seti:ignored" + ] + }, + { + "type": "string", + "pattern": "^\\; + components: import('astro').MDXInstance<{}>['components']; + }>; + } +} + +declare module 'astro:content' { + export interface RenderResult { + Content: import('astro/runtime/server/index.js').AstroComponentFactory; + headings: import('astro').MarkdownHeading[]; + remarkPluginFrontmatter: Record; + } + interface Render { + '.md': Promise; + } + + export interface RenderedContent { + html: string; + metadata?: { + imagePaths: Array; + [key: string]: unknown; + }; + } +} + +declare module 'astro:content' { + type Flatten = T extends { [K: string]: infer U } ? U : never; + + export type CollectionKey = keyof AnyEntryMap; + export type CollectionEntry = Flatten; + + export type ContentCollectionKey = keyof ContentEntryMap; + export type DataCollectionKey = keyof DataEntryMap; + + type AllValuesOf = T extends any ? T[keyof T] : never; + type ValidContentEntrySlug = AllValuesOf< + ContentEntryMap[C] + >['slug']; + + export type ReferenceDataEntry< + C extends CollectionKey, + E extends keyof DataEntryMap[C] = string, + > = { + collection: C; + id: E; + }; + export type ReferenceContentEntry< + C extends keyof ContentEntryMap, + E extends ValidContentEntrySlug | (string & {}) = string, + > = { + collection: C; + slug: E; + }; + export type ReferenceLiveEntry = { + collection: C; + id: string; + }; + + /** @deprecated Use `getEntry` instead. */ + export function getEntryBySlug< + C extends keyof ContentEntryMap, + E extends ValidContentEntrySlug | (string & {}), + >( + collection: C, + // Note that this has to accept a regular string too, for SSR + entrySlug: E, + ): E extends ValidContentEntrySlug + ? Promise> + : Promise | undefined>; + + /** @deprecated Use `getEntry` instead. */ + export function getDataEntryById( + collection: C, + entryId: E, + ): Promise>; + + export function getCollection>( + collection: C, + filter?: (entry: CollectionEntry) => entry is E, + ): Promise; + export function getCollection( + collection: C, + filter?: (entry: CollectionEntry) => unknown, + ): Promise[]>; + + export function getLiveCollection( + collection: C, + filter?: LiveLoaderCollectionFilterType, + ): Promise< + import('astro').LiveDataCollectionResult, LiveLoaderErrorType> + >; + + export function getEntry< + C extends keyof ContentEntryMap, + E extends ValidContentEntrySlug | (string & {}), + >( + entry: ReferenceContentEntry, + ): E extends ValidContentEntrySlug + ? Promise> + : Promise | undefined>; + export function getEntry< + C extends keyof DataEntryMap, + E extends keyof DataEntryMap[C] | (string & {}), + >( + entry: ReferenceDataEntry, + ): E extends keyof DataEntryMap[C] + ? Promise + : Promise | undefined>; + export function getEntry< + C extends keyof ContentEntryMap, + E extends ValidContentEntrySlug | (string & {}), + >( + collection: C, + slug: E, + ): E extends ValidContentEntrySlug + ? Promise> + : Promise | undefined>; + export function getEntry< + C extends keyof DataEntryMap, + E extends keyof DataEntryMap[C] | (string & {}), + >( + collection: C, + id: E, + ): E extends keyof DataEntryMap[C] + ? string extends keyof DataEntryMap[C] + ? Promise | undefined + : Promise + : Promise | undefined>; + export function getLiveEntry( + collection: C, + filter: string | LiveLoaderEntryFilterType, + ): Promise, LiveLoaderErrorType>>; + + /** Resolve an array of entry references from the same collection */ + export function getEntries( + entries: ReferenceContentEntry>[], + ): Promise[]>; + export function getEntries( + entries: ReferenceDataEntry[], + ): Promise[]>; + + export function render( + entry: AnyEntryMap[C][string], + ): Promise; + + export function reference( + collection: C, + ): import('astro/zod').ZodEffects< + import('astro/zod').ZodString, + C extends keyof ContentEntryMap + ? ReferenceContentEntry> + : ReferenceDataEntry + >; + // Allow generic `string` to avoid excessive type errors in the config + // if `dev` is not running to update as you edit. + // Invalid collection names will be caught at build time. + export function reference( + collection: C, + ): import('astro/zod').ZodEffects; + + type ReturnTypeOrOriginal = T extends (...args: any[]) => infer R ? R : T; + type InferEntrySchema = import('astro/zod').infer< + ReturnTypeOrOriginal['schema']> + >; + + type ContentEntryMap = { + + }; + + type DataEntryMap = { + "changelogs": Record; +"docs": Record; + rendered?: RenderedContent; + filePath?: string; +}>; + + }; + + type AnyEntryMap = ContentEntryMap & DataEntryMap; + + type ExtractLoaderTypes = T extends import('astro/loaders').LiveLoader< + infer TData, + infer TEntryFilter, + infer TCollectionFilter, + infer TError + > + ? { data: TData; entryFilter: TEntryFilter; collectionFilter: TCollectionFilter; error: TError } + : { data: never; entryFilter: never; collectionFilter: never; error: never }; + type ExtractDataType = ExtractLoaderTypes['data']; + type ExtractEntryFilterType = ExtractLoaderTypes['entryFilter']; + type ExtractCollectionFilterType = ExtractLoaderTypes['collectionFilter']; + type ExtractErrorType = ExtractLoaderTypes['error']; + + type LiveLoaderDataType = + LiveContentConfig['collections'][C]['schema'] extends undefined + ? ExtractDataType + : import('astro/zod').infer< + Exclude + >; + type LiveLoaderEntryFilterType = + ExtractEntryFilterType; + type LiveLoaderCollectionFilterType = + ExtractCollectionFilterType; + type LiveLoaderErrorType = ExtractErrorType< + LiveContentConfig['collections'][C]['loader'] + >; + + export type ContentConfig = typeof import("../content.config.js"); + export type LiveContentConfig = never; +} diff --git a/website/.astro/data-store.json b/website/.astro/data-store.json new file mode 100644 index 0000000..ac678b5 --- /dev/null +++ b/website/.astro/data-store.json @@ -0,0 +1 @@ +[["Map",1,2,9,10,936,937],"meta::meta",["Map",3,4,5,6,7,8],"astro-version","5.14.1","content-config-digest","d2702aee2cb0314c","astro-config-digest","{\"root\":{},\"srcDir\":{},\"publicDir\":{},\"outDir\":{},\"cacheDir\":{},\"site\":\"https://python.fairspec.org\",\"compressHTML\":true,\"base\":\"/\",\"trailingSlash\":\"ignore\",\"output\":\"static\",\"scopedStyleStrategy\":\"where\",\"build\":{\"format\":\"directory\",\"client\":{},\"server\":{},\"assets\":\"_astro\",\"serverEntry\":\"entry.mjs\",\"redirects\":true,\"inlineStylesheets\":\"auto\",\"concurrency\":1},\"server\":{\"open\":false,\"host\":false,\"port\":5000,\"streaming\":true,\"allowedHosts\":[]},\"redirects\":{},\"image\":{\"endpoint\":{\"route\":\"/_image\"},\"service\":{\"entrypoint\":\"astro/assets/services/sharp\",\"config\":{}},\"domains\":[],\"remotePatterns\":[],\"responsiveStyles\":false},\"devToolbar\":{\"enabled\":true},\"markdown\":{\"syntaxHighlight\":false,\"shikiConfig\":{\"langs\":[],\"langAlias\":{},\"theme\":\"github-dark\",\"themes\":{},\"wrap\":false,\"transformers\":[]},\"remarkPlugins\":[[null,{\"config\":{\"types\":{\"caution\":\"danger\",\"important\":\"caution\",\"note\":\"note\",\"tip\":\"tip\",\"warning\":\"caution\"}},\"docsCollectionPath\":\"/home/roll/projects/fairspec-python/website/content/docs/\"}],null,null,null],\"rehypePlugins\":[null,[null,{\"experimentalHeadingIdCompat\":false}],null,[null,{\"themes\":[{\"name\":\"Night Owl No Italics\",\"type\":\"dark\",\"colors\":{\"focusBorder\":\"#122d42\",\"foreground\":\"#d6deeb\",\"disabledForeground\":\"#cccccc80\",\"descriptionForeground\":\"#d6deebb3\",\"errorForeground\":\"#ef5350\",\"icon.foreground\":\"#c5c5c5\",\"contrastActiveBorder\":null,\"contrastBorder\":\"#122d42\",\"textBlockQuote.background\":\"#7f7f7f1a\",\"textBlockQuote.border\":\"#007acc80\",\"textCodeBlock.background\":\"#4f4f4f\",\"textLink.activeForeground\":\"#3794ff\",\"textLink.foreground\":\"#3794ff\",\"textPreformat.foreground\":\"#d7ba7d\",\"textSeparator.foreground\":\"#ffffff2e\",\"editor.background\":\"#011627\",\"editor.foreground\":\"#d6deeb\",\"editorLineNumber.foreground\":\"#4b6479\",\"editorLineNumber.activeForeground\":\"#c5e4fd\",\"editorActiveLineNumber.foreground\":\"#c6c6c6\",\"editor.selectionBackground\":\"#1d3b53\",\"editor.inactiveSelectionBackground\":\"#7e57c25a\",\"editor.selectionHighlightBackground\":\"#5f7e9779\",\"editorError.foreground\":\"#ef5350\",\"editorWarning.foreground\":\"#b39554\",\"editorInfo.foreground\":\"#3794ff\",\"editorHint.foreground\":\"#eeeeeeb2\",\"problemsErrorIcon.foreground\":\"#ef5350\",\"problemsWarningIcon.foreground\":\"#b39554\",\"problemsInfoIcon.foreground\":\"#3794ff\",\"editor.findMatchBackground\":\"#5f7e9779\",\"editor.findMatchHighlightBackground\":\"#1085bb5d\",\"editor.findRangeHighlightBackground\":\"#3a3d4166\",\"editorLink.activeForeground\":\"#4e94ce\",\"editorLightBulb.foreground\":\"#ffcc00\",\"editorLightBulbAutoFix.foreground\":\"#75beff\",\"diffEditor.insertedTextBackground\":\"#99b76d23\",\"diffEditor.insertedTextBorder\":\"#c5e47833\",\"diffEditor.removedTextBackground\":\"#ef535033\",\"diffEditor.removedTextBorder\":\"#ef53504d\",\"diffEditor.insertedLineBackground\":\"#9bb95533\",\"diffEditor.removedLineBackground\":\"#ff000033\",\"editorStickyScroll.background\":\"#011627\",\"editorStickyScrollHover.background\":\"#2a2d2e\",\"editorInlayHint.background\":\"#5f7e97cc\",\"editorInlayHint.foreground\":\"#ffffff\",\"editorInlayHint.typeBackground\":\"#5f7e97cc\",\"editorInlayHint.typeForeground\":\"#ffffff\",\"editorInlayHint.parameterBackground\":\"#5f7e97cc\",\"editorInlayHint.parameterForeground\":\"#ffffff\",\"editorPane.background\":\"#011627\",\"editorGroup.emptyBackground\":\"#011627\",\"editorGroup.focusedEmptyBorder\":null,\"editorGroupHeader.tabsBackground\":\"#011627\",\"editorGroupHeader.tabsBorder\":\"#0b2942\",\"editorGroupHeader.noTabsBackground\":\"#011627\",\"editorGroupHeader.border\":null,\"editorGroup.border\":\"#011627\",\"editorGroup.dropBackground\":\"#7e57c273\",\"editorGroup.dropIntoPromptForeground\":\"#d6deeb\",\"editorGroup.dropIntoPromptBackground\":\"#021320\",\"editorGroup.dropIntoPromptBorder\":null,\"sideBySideEditor.horizontalBorder\":\"#011627\",\"sideBySideEditor.verticalBorder\":\"#011627\",\"scrollbar.shadow\":\"#010b14\",\"scrollbarSlider.background\":\"#084d8180\",\"scrollbarSlider.hoverBackground\":\"#084d8180\",\"scrollbarSlider.activeBackground\":\"#084d8180\",\"panel.background\":\"#011627\",\"panel.border\":\"#5f7e97\",\"panelTitle.activeBorder\":\"#5f7e97\",\"panelTitle.activeForeground\":\"#ffffffcc\",\"panelTitle.inactiveForeground\":\"#d6deeb80\",\"panelSectionHeader.background\":\"#80808051\",\"terminal.background\":\"#011627\",\"widget.shadow\":\"#011627\",\"editorWidget.background\":\"#021320\",\"editorWidget.foreground\":\"#d6deeb\",\"editorWidget.border\":\"#5f7e97\",\"quickInput.background\":\"#021320\",\"quickInput.foreground\":\"#d6deeb\",\"quickInputTitle.background\":\"#ffffff1a\",\"pickerGroup.foreground\":\"#d1aaff\",\"pickerGroup.border\":\"#011627\",\"editor.hoverHighlightBackground\":\"#7e57c25a\",\"editorHoverWidget.background\":\"#011627\",\"editorHoverWidget.foreground\":\"#d6deeb\",\"editorHoverWidget.border\":\"#5f7e97\",\"editorHoverWidget.statusBarBackground\":\"#011a2f\",\"titleBar.activeBackground\":\"#011627\",\"titleBar.activeForeground\":\"#eeefff\",\"titleBar.inactiveBackground\":\"#010e1a\",\"titleBar.inactiveForeground\":\"#eeefff99\",\"titleBar.border\":\"#0b2942\",\"toolbar.hoverBackground\":\"#5a5d5e50\",\"toolbar.activeBackground\":\"#63666750\",\"tab.activeBackground\":\"#0b2942\",\"tab.unfocusedActiveBackground\":\"#0b2942\",\"tab.inactiveBackground\":\"#01111d\",\"tab.unfocusedInactiveBackground\":\"#01111d\",\"tab.activeForeground\":\"#d2dee7\",\"tab.inactiveForeground\":\"#5f7e97\",\"tab.unfocusedActiveForeground\":\"#5f7e97\",\"tab.unfocusedInactiveForeground\":\"#5f7e97\",\"tab.hoverBackground\":null,\"tab.unfocusedHoverBackground\":null,\"tab.hoverForeground\":null,\"tab.unfocusedHoverForeground\":null,\"tab.border\":\"#272b3b\",\"tab.lastPinnedBorder\":\"#585858\",\"tab.activeBorder\":\"#262a39\",\"tab.unfocusedActiveBorder\":\"#262a39\",\"tab.activeBorderTop\":null,\"tab.unfocusedActiveBorderTop\":null,\"tab.hoverBorder\":null,\"tab.unfocusedHoverBorder\":null,\"tab.activeModifiedBorder\":\"#3399cc\",\"tab.inactiveModifiedBorder\":\"#3399cc80\",\"tab.unfocusedActiveModifiedBorder\":\"#3399cc80\",\"tab.unfocusedInactiveModifiedBorder\":\"#3399cc40\",\"badge.background\":\"#5f7e97\",\"badge.foreground\":\"#ffffff\",\"button.background\":\"#7e57c2cc\",\"button.foreground\":\"#ffffffcc\",\"button.border\":\"#122d42\",\"button.separator\":\"#ffffff52\",\"button.hoverBackground\":\"#7e57c2\",\"button.secondaryBackground\":\"#3a3d41\",\"button.secondaryForeground\":\"#ffffff\",\"button.secondaryHoverBackground\":\"#46494e\",\"dropdown.background\":\"#011627\",\"dropdown.foreground\":\"#ffffffcc\",\"dropdown.border\":\"#5f7e97\",\"list.activeSelectionBackground\":\"#234d708c\",\"list.activeSelectionForeground\":\"#ffffff\",\"tree.indentGuidesStroke\":\"#585858\",\"input.background\":\"#0b253a\",\"input.foreground\":\"#ffffffcc\",\"input.placeholderForeground\":\"#5f7e97\",\"inputOption.activeBorder\":\"#ffffffcc\",\"inputOption.hoverBackground\":\"#5a5d5e80\",\"inputOption.activeBackground\":\"#122d4266\",\"inputOption.activeForeground\":\"#ffffff\",\"inputValidation.infoBackground\":\"#00589ef2\",\"inputValidation.infoBorder\":\"#64b5f6\",\"inputValidation.warningBackground\":\"#675700f2\",\"inputValidation.warningBorder\":\"#ffca28\",\"inputValidation.errorBackground\":\"#ab0300f2\",\"inputValidation.errorBorder\":\"#ef5350\",\"keybindingLabel.background\":\"#8080802b\",\"keybindingLabel.foreground\":\"#cccccc\",\"keybindingLabel.border\":\"#33333399\",\"keybindingLabel.bottomBorder\":\"#44444499\",\"menu.foreground\":\"#ffffffcc\",\"menu.background\":\"#011627\",\"menu.selectionForeground\":\"#ffffff\",\"menu.selectionBackground\":\"#234d708c\",\"menu.separatorBackground\":\"#606060\",\"editor.snippetTabstopHighlightBackground\":\"#7c7c74c\",\"editor.snippetFinalTabstopHighlightBorder\":\"#525252\",\"terminal.ansiBlack\":\"#011627\",\"terminal.ansiRed\":\"#ef5350\",\"terminal.ansiGreen\":\"#22da6e\",\"terminal.ansiYellow\":\"#c5e478\",\"terminal.ansiBlue\":\"#82aaff\",\"terminal.ansiMagenta\":\"#c792ea\",\"terminal.ansiCyan\":\"#21c7a8\",\"terminal.ansiWhite\":\"#ffffff\",\"terminal.ansiBrightBlack\":\"#575656\",\"terminal.ansiBrightRed\":\"#ef5350\",\"terminal.ansiBrightGreen\":\"#22da6e\",\"terminal.ansiBrightYellow\":\"#ffeb95\",\"terminal.ansiBrightBlue\":\"#82aaff\",\"terminal.ansiBrightMagenta\":\"#c792ea\",\"terminal.ansiBrightCyan\":\"#7fdbca\",\"terminal.ansiBrightWhite\":\"#ffffff\",\"selection.background\":\"#4373c2\",\"input.border\":\"#5f7e97\",\"punctuation.definition.generic.begin.html\":\"#ef5350f2\",\"progress.background\":\"#7e57c2\",\"breadcrumb.foreground\":\"#a599e9\",\"breadcrumb.focusForeground\":\"#ffffff\",\"breadcrumb.activeSelectionForeground\":\"#ffffff\",\"breadcrumbPicker.background\":\"#001122\",\"list.invalidItemForeground\":\"#975f94\",\"list.dropBackground\":\"#011627\",\"list.focusBackground\":\"#010d18\",\"list.focusForeground\":\"#ffffff\",\"list.highlightForeground\":\"#ffffff\",\"list.hoverBackground\":\"#011627\",\"list.hoverForeground\":\"#ffffff\",\"list.inactiveSelectionBackground\":\"#0e293f\",\"list.inactiveSelectionForeground\":\"#5f7e97\",\"activityBar.background\":\"#011627\",\"activityBar.dropBackground\":\"#5f7e97\",\"activityBar.foreground\":\"#5f7e97\",\"activityBar.border\":\"#011627\",\"activityBarBadge.background\":\"#44596b\",\"activityBarBadge.foreground\":\"#ffffff\",\"sideBar.background\":\"#011627\",\"sideBar.foreground\":\"#89a4bb\",\"sideBar.border\":\"#011627\",\"sideBarTitle.foreground\":\"#5f7e97\",\"sideBarSectionHeader.background\":\"#011627\",\"sideBarSectionHeader.foreground\":\"#5f7e97\",\"editorCursor.foreground\":\"#80a4c2\",\"editor.wordHighlightBackground\":\"#f6bbe533\",\"editor.wordHighlightStrongBackground\":\"#e2a2f433\",\"editor.lineHighlightBackground\":\"#0003\",\"editor.rangeHighlightBackground\":\"#7e57c25a\",\"editorIndentGuide.background\":\"#5e81ce52\",\"editorIndentGuide.activeBackground\":\"#7e97ac\",\"editorRuler.foreground\":\"#5e81ce52\",\"editorCodeLens.foreground\":\"#5e82ceb4\",\"editorBracketMatch.background\":\"#5f7e974d\",\"editorOverviewRuler.currentContentForeground\":\"#7e57c2\",\"editorOverviewRuler.incomingContentForeground\":\"#7e57c2\",\"editorOverviewRuler.commonContentForeground\":\"#7e57c2\",\"editorGutter.background\":\"#011627\",\"editorGutter.modifiedBackground\":\"#e2b93d\",\"editorGutter.addedBackground\":\"#9ccc65\",\"editorGutter.deletedBackground\":\"#ef5350\",\"editorSuggestWidget.background\":\"#2c3043\",\"editorSuggestWidget.border\":\"#2b2f40\",\"editorSuggestWidget.foreground\":\"#d6deeb\",\"editorSuggestWidget.highlightForeground\":\"#ffffff\",\"editorSuggestWidget.selectedBackground\":\"#5f7e97\",\"debugExceptionWidget.background\":\"#011627\",\"debugExceptionWidget.border\":\"#5f7e97\",\"editorMarkerNavigation.background\":\"#0b2942\",\"editorMarkerNavigationError.background\":\"#ef5350\",\"editorMarkerNavigationWarning.background\":\"#ffca28\",\"peekView.border\":\"#5f7e97\",\"peekViewEditor.background\":\"#011627\",\"peekViewEditor.matchHighlightBackground\":\"#7e57c25a\",\"peekViewResult.background\":\"#011627\",\"peekViewResult.fileForeground\":\"#5f7e97\",\"peekViewResult.lineForeground\":\"#5f7e97\",\"peekViewResult.matchHighlightBackground\":\"#ffffffcc\",\"peekViewResult.selectionBackground\":\"#2e3250\",\"peekViewResult.selectionForeground\":\"#5f7e97\",\"peekViewTitle.background\":\"#011627\",\"peekViewTitleDescription.foreground\":\"#697098\",\"peekViewTitleLabel.foreground\":\"#5f7e97\",\"merge.currentHeaderBackground\":\"#5f7e97\",\"merge.incomingHeaderBackground\":\"#7e57c25a\",\"statusBar.background\":\"#011627\",\"statusBar.foreground\":\"#5f7e97\",\"statusBar.border\":\"#262a39\",\"statusBar.debuggingBackground\":\"#202431\",\"statusBar.debuggingBorder\":\"#1f2330\",\"statusBar.noFolderBackground\":\"#011627\",\"statusBar.noFolderBorder\":\"#25293a\",\"statusBarItem.activeBackground\":\"#202431\",\"statusBarItem.hoverBackground\":\"#202431\",\"statusBarItem.prominentBackground\":\"#202431\",\"statusBarItem.prominentHoverBackground\":\"#202431\",\"notifications.background\":\"#01111d\",\"notifications.border\":\"#262a39\",\"notificationCenter.border\":\"#262a39\",\"notificationToast.border\":\"#262a39\",\"notifications.foreground\":\"#ffffffcc\",\"notificationLink.foreground\":\"#80cbc4\",\"extensionButton.prominentForeground\":\"#ffffffcc\",\"extensionButton.prominentBackground\":\"#7e57c2cc\",\"extensionButton.prominentHoverBackground\":\"#7e57c2\",\"terminal.selectionBackground\":\"#1b90dd4d\",\"terminalCursor.background\":\"#234d70\",\"debugToolBar.background\":\"#011627\",\"welcomePage.buttonBackground\":\"#011627\",\"welcomePage.buttonHoverBackground\":\"#011627\",\"walkThrough.embeddedEditorBackground\":\"#011627\",\"gitDecoration.modifiedResourceForeground\":\"#a2bffc\",\"gitDecoration.deletedResourceForeground\":\"#ef535090\",\"gitDecoration.untrackedResourceForeground\":\"#c5e478ff\",\"gitDecoration.ignoredResourceForeground\":\"#395a75\",\"gitDecoration.conflictingResourceForeground\":\"#ffeb95cc\",\"source.elm\":\"#5f7e97\",\"string.quoted.single.js\":\"#ffffff\",\"meta.objectliteral.js\":\"#82aaff\"},\"fg\":\"#d6deeb\",\"bg\":\"#011627\",\"semanticHighlighting\":false,\"settings\":[{\"name\":\"Changed\",\"scope\":[\"markup.changed\",\"meta.diff.header.git\",\"meta.diff.header.from-file\",\"meta.diff.header.to-file\"],\"settings\":{\"foreground\":\"#a2bffc\"}},{\"name\":\"Deleted\",\"scope\":[\"markup.deleted.diff\"],\"settings\":{\"foreground\":\"#f05b58fe\"}},{\"name\":\"Inserted\",\"scope\":[\"markup.inserted.diff\"],\"settings\":{\"foreground\":\"#c5e478\"}},{\"name\":\"Global settings\",\"settings\":{\"background\":\"#011627\",\"foreground\":\"#d6deeb\"}},{\"name\":\"Comment\",\"scope\":[\"comment\"],\"settings\":{\"foreground\":\"#809191\",\"fontStyle\":\"\"}},{\"name\":\"String\",\"scope\":[\"string\"],\"settings\":{\"foreground\":\"#ecc48d\"}},{\"name\":\"String Quoted\",\"scope\":[\"string.quoted\",\"variable.other.readwrite.js\"],\"settings\":{\"foreground\":\"#ecc48d\"}},{\"name\":\"Support Constant Math\",\"scope\":[\"support.constant.math\"],\"settings\":{\"foreground\":\"#c5e478\"}},{\"name\":\"Number\",\"scope\":[\"constant.numeric\",\"constant.character.numeric\"],\"settings\":{\"foreground\":\"#f78c6c\",\"fontStyle\":\"\"}},{\"name\":\"Built-in constant\",\"scope\":[\"constant.language\",\"punctuation.definition.constant\",\"variable.other.constant\"],\"settings\":{\"foreground\":\"#82aaff\"}},{\"name\":\"User-defined constant\",\"scope\":[\"constant.character\",\"constant.other\"],\"settings\":{\"foreground\":\"#82aaff\"}},{\"name\":\"Constant Character Escape\",\"scope\":[\"constant.character.escape\"],\"settings\":{\"foreground\":\"#f78c6c\"}},{\"name\":\"RegExp String\",\"scope\":[\"string.regexp\",\"string.regexp keyword.other\"],\"settings\":{\"foreground\":\"#5ca7e4\"}},{\"name\":\"Comma in functions\",\"scope\":[\"meta.function punctuation.separator.comma\"],\"settings\":{\"foreground\":\"#7690a6\"}},{\"name\":\"Variable\",\"scope\":[\"variable\"],\"settings\":{\"foreground\":\"#c5e478\"}},{\"name\":\"Keyword\",\"scope\":[\"punctuation.accessor\",\"keyword\"],\"settings\":{\"foreground\":\"#c792ea\",\"fontStyle\":\"\"}},{\"name\":\"Storage\",\"scope\":[\"storage\",\"meta.var.expr\",\"meta.class meta.method.declaration meta.var.expr storage.type.js\",\"storage.type.property.js\",\"storage.type.property.ts\",\"storage.type.property.tsx\"],\"settings\":{\"foreground\":\"#c792ea\",\"fontStyle\":\"\"}},{\"name\":\"Storage type\",\"scope\":[\"storage.type\"],\"settings\":{\"foreground\":\"#c792ea\"}},{\"name\":\"Storage type\",\"scope\":[\"storage.type.function.arrow.js\"],\"settings\":{\"fontStyle\":\"\"}},{\"name\":\"Class name\",\"scope\":[\"entity.name.class\",\"meta.class entity.name.type.class\"],\"settings\":{\"foreground\":\"#ffcb8b\"}},{\"name\":\"Inherited class\",\"scope\":[\"entity.other.inherited-class\"],\"settings\":{\"foreground\":\"#c5e478\"}},{\"name\":\"Function name\",\"scope\":[\"entity.name.function\"],\"settings\":{\"foreground\":\"#c792ea\",\"fontStyle\":\"\"}},{\"name\":\"Meta Tag\",\"scope\":[\"punctuation.definition.tag\",\"meta.tag\"],\"settings\":{\"foreground\":\"#7fdbca\"}},{\"name\":\"HTML Tag names\",\"scope\":[\"entity.name.tag\",\"meta.tag.other.html\",\"meta.tag.other.js\",\"meta.tag.other.tsx\",\"entity.name.tag.tsx\",\"entity.name.tag.js\",\"entity.name.tag\",\"meta.tag.js\",\"meta.tag.tsx\",\"meta.tag.html\"],\"settings\":{\"foreground\":\"#caece6\",\"fontStyle\":\"\"}},{\"name\":\"Tag attribute\",\"scope\":[\"entity.other.attribute-name\"],\"settings\":{\"fontStyle\":\"\",\"foreground\":\"#c5e478\"}},{\"name\":\"Entity Name Tag Custom\",\"scope\":[\"entity.name.tag.custom\"],\"settings\":{\"foreground\":\"#c5e478\"}},{\"name\":\"Library (function & constant)\",\"scope\":[\"support.function\",\"support.constant\"],\"settings\":{\"foreground\":\"#82aaff\"}},{\"name\":\"Support Constant Property Value meta\",\"scope\":[\"support.constant.meta.property-value\"],\"settings\":{\"foreground\":\"#7fdbca\"}},{\"name\":\"Library class/type\",\"scope\":[\"support.type\",\"support.class\"],\"settings\":{\"foreground\":\"#c5e478\"}},{\"name\":\"Support Variable DOM\",\"scope\":[\"support.variable.dom\"],\"settings\":{\"foreground\":\"#c5e478\"}},{\"name\":\"Invalid\",\"scope\":[\"invalid\"],\"settings\":{\"background\":\"#ff2c83\",\"foreground\":\"#ffffff\"}},{\"name\":\"Invalid deprecated\",\"scope\":[\"invalid.deprecated\"],\"settings\":{\"foreground\":\"#ffffff\",\"background\":\"#d3423e\"}},{\"name\":\"Keyword Operator\",\"scope\":[\"keyword.operator\"],\"settings\":{\"foreground\":\"#7fdbca\",\"fontStyle\":\"\"}},{\"name\":\"Keyword Operator Relational\",\"scope\":[\"keyword.operator.relational\"],\"settings\":{\"foreground\":\"#c792ea\",\"fontStyle\":\"\"}},{\"name\":\"Keyword Operator Assignment\",\"scope\":[\"keyword.operator.assignment\"],\"settings\":{\"foreground\":\"#c792ea\"}},{\"name\":\"Keyword Operator Arithmetic\",\"scope\":[\"keyword.operator.arithmetic\"],\"settings\":{\"foreground\":\"#c792ea\"}},{\"name\":\"Keyword Operator Bitwise\",\"scope\":[\"keyword.operator.bitwise\"],\"settings\":{\"foreground\":\"#c792ea\"}},{\"name\":\"Keyword Operator Increment\",\"scope\":[\"keyword.operator.increment\"],\"settings\":{\"foreground\":\"#c792ea\"}},{\"name\":\"Keyword Operator Ternary\",\"scope\":[\"keyword.operator.ternary\"],\"settings\":{\"foreground\":\"#c792ea\"}},{\"name\":\"Double-Slashed Comment\",\"scope\":[\"comment.line.double-slash\"],\"settings\":{\"foreground\":\"#809191\"}},{\"name\":\"Object\",\"scope\":[\"object\"],\"settings\":{\"foreground\":\"#cdebf7\"}},{\"name\":\"Null\",\"scope\":[\"constant.language.null\"],\"settings\":{\"foreground\":\"#ff5874\"}},{\"name\":\"Meta Brace\",\"scope\":[\"meta.brace\"],\"settings\":{\"foreground\":\"#d6deeb\"}},{\"name\":\"Meta Delimiter Period\",\"scope\":[\"meta.delimiter.period\"],\"settings\":{\"foreground\":\"#c792ea\",\"fontStyle\":\"\"}},{\"name\":\"Punctuation Definition String\",\"scope\":[\"punctuation.definition.string\"],\"settings\":{\"foreground\":\"#d9f5dd\"}},{\"name\":\"Punctuation Definition String Markdown\",\"scope\":[\"punctuation.definition.string.begin.markdown\"],\"settings\":{\"foreground\":\"#ff5874\"}},{\"name\":\"Boolean\",\"scope\":[\"constant.language.boolean\"],\"settings\":{\"foreground\":\"#ff5874\"}},{\"name\":\"Object Comma\",\"scope\":[\"object.comma\"],\"settings\":{\"foreground\":\"#ffffff\"}},{\"name\":\"Variable Parameter Function\",\"scope\":[\"variable.parameter.function\"],\"settings\":{\"foreground\":\"#7fdbca\",\"fontStyle\":\"\"}},{\"name\":\"Support Type Property Name & entity name tags\",\"scope\":[\"support.type.vendor.property-name\",\"support.constant.vendor.property-value\",\"support.type.property-name\",\"meta.property-list entity.name.tag\"],\"settings\":{\"foreground\":\"#80cbc4\",\"fontStyle\":\"\"}},{\"name\":\"Entity Name tag reference in stylesheets\",\"scope\":[\"meta.property-list entity.name.tag.reference\"],\"settings\":{\"foreground\":\"#57eaf1\"}},{\"name\":\"Constant Other Color RGB Value Punctuation Definition Constant\",\"scope\":[\"constant.other.color.rgb-value punctuation.definition.constant\"],\"settings\":{\"foreground\":\"#f78c6c\"}},{\"name\":\"Constant Other Color\",\"scope\":[\"constant.other.color\"],\"settings\":{\"foreground\":\"#ffeb95\"}},{\"name\":\"Keyword Other Unit\",\"scope\":[\"keyword.other.unit\"],\"settings\":{\"foreground\":\"#ffeb95\"}},{\"name\":\"Meta Selector\",\"scope\":[\"meta.selector\"],\"settings\":{\"foreground\":\"#c792ea\",\"fontStyle\":\"\"}},{\"name\":\"Entity Other Attribute Name Id\",\"scope\":[\"entity.other.attribute-name.id\"],\"settings\":{\"foreground\":\"#fad430\"}},{\"name\":\"Meta Property Name\",\"scope\":[\"meta.property-name\"],\"settings\":{\"foreground\":\"#80cbc4\"}},{\"name\":\"Doctypes\",\"scope\":[\"entity.name.tag.doctype\",\"meta.tag.sgml.doctype\"],\"settings\":{\"foreground\":\"#c792ea\",\"fontStyle\":\"\"}},{\"name\":\"Punctuation Definition Parameters\",\"scope\":[\"punctuation.definition.parameters\"],\"settings\":{\"foreground\":\"#d9f5dd\"}},{\"name\":\"Keyword Control Operator\",\"scope\":[\"keyword.control.operator\"],\"settings\":{\"foreground\":\"#7fdbca\"}},{\"name\":\"Keyword Operator Logical\",\"scope\":[\"keyword.operator.logical\"],\"settings\":{\"foreground\":\"#c792ea\",\"fontStyle\":\"\"}},{\"name\":\"Variable Instances\",\"scope\":[\"variable.instance\",\"variable.other.instance\",\"variable.readwrite.instance\",\"variable.other.readwrite.instance\",\"variable.other.property\"],\"settings\":{\"foreground\":\"#7fdbca\"}},{\"name\":\"Variable Property Other object property\",\"scope\":[\"variable.other.object.property\"],\"settings\":{\"foreground\":\"#faf39f\",\"fontStyle\":\"\"}},{\"name\":\"Variable Property Other object\",\"scope\":[\"variable.other.object.js\"],\"settings\":{\"fontStyle\":\"\"}},{\"name\":\"Entity Name Function\",\"scope\":[\"entity.name.function\"],\"settings\":{\"foreground\":\"#82aaff\",\"fontStyle\":\"\"}},{\"name\":\"Keyword Operator Comparison, returns, imports, and Keyword Operator Ruby\",\"scope\":[\"keyword.control.conditional.js\",\"keyword.operator.comparison\",\"keyword.control.flow.js\",\"keyword.control.flow.ts\",\"keyword.control.flow.tsx\",\"keyword.control.ruby\",\"keyword.control.def.ruby\",\"keyword.control.loop.js\",\"keyword.control.loop.ts\",\"keyword.control.import.js\",\"keyword.control.import.ts\",\"keyword.control.import.tsx\",\"keyword.control.from.js\",\"keyword.control.from.ts\",\"keyword.control.from.tsx\",\"keyword.control.conditional.js\",\"keyword.control.conditional.ts\",\"keyword.control.switch.js\",\"keyword.control.switch.ts\",\"keyword.operator.instanceof.js\",\"keyword.operator.expression.instanceof.ts\",\"keyword.operator.expression.instanceof.tsx\"],\"settings\":{\"foreground\":\"#c792ea\",\"fontStyle\":\"\"}},{\"name\":\"Support Constant, `new` keyword, Special Method Keyword, `debugger`, other keywords\",\"scope\":[\"support.constant\",\"keyword.other.special-method\",\"keyword.other.new\",\"keyword.other.debugger\",\"keyword.control\"],\"settings\":{\"foreground\":\"#7fdbca\"}},{\"name\":\"Support Function\",\"scope\":[\"support.function\"],\"settings\":{\"foreground\":\"#c5e478\"}},{\"name\":\"Invalid Broken\",\"scope\":[\"invalid.broken\"],\"settings\":{\"foreground\":\"#888e91\",\"background\":\"#F78C6C\"}},{\"name\":\"Invalid Unimplemented\",\"scope\":[\"invalid.unimplemented\"],\"settings\":{\"background\":\"#8BD649\",\"foreground\":\"#ffffff\"}},{\"name\":\"Invalid Illegal\",\"scope\":[\"invalid.illegal\"],\"settings\":{\"foreground\":\"#ffffff\",\"background\":\"#ec5f67\"}},{\"name\":\"Language Variable\",\"scope\":[\"variable.language\"],\"settings\":{\"foreground\":\"#7fdbca\"}},{\"name\":\"Support Variable Property\",\"scope\":[\"support.variable.property\"],\"settings\":{\"foreground\":\"#7fdbca\"}},{\"name\":\"Variable Function\",\"scope\":[\"variable.function\"],\"settings\":{\"foreground\":\"#82aaff\"}},{\"name\":\"Variable Interpolation\",\"scope\":[\"variable.interpolation\"],\"settings\":{\"foreground\":\"#ec5f67\"}},{\"name\":\"Meta Function Call\",\"scope\":[\"meta.function-call\"],\"settings\":{\"foreground\":\"#82aaff\"}},{\"name\":\"Punctuation Section Embedded\",\"scope\":[\"punctuation.section.embedded\"],\"settings\":{\"foreground\":\"#dd6b68\"}},{\"name\":\"Punctuation Tweaks\",\"scope\":[\"punctuation.terminator.expression\",\"punctuation.definition.arguments\",\"punctuation.definition.array\",\"punctuation.section.array\",\"meta.array\"],\"settings\":{\"foreground\":\"#d6deeb\"}},{\"name\":\"More Punctuation Tweaks\",\"scope\":[\"punctuation.definition.list.begin\",\"punctuation.definition.list.end\",\"punctuation.separator.arguments\",\"punctuation.definition.list\"],\"settings\":{\"foreground\":\"#d9f5dd\"}},{\"name\":\"Template Strings\",\"scope\":[\"string.template meta.template.expression\"],\"settings\":{\"foreground\":\"#dd6b68\"}},{\"name\":\"Backtics(``) in Template Strings\",\"scope\":[\"string.template punctuation.definition.string\"],\"settings\":{\"foreground\":\"#d6deeb\"}},{\"name\":\"Italics\",\"scope\":[\"italic\"],\"settings\":{\"foreground\":\"#c792ea\",\"fontStyle\":\"italic\"}},{\"name\":\"Bold\",\"scope\":[\"bold\"],\"settings\":{\"foreground\":\"#c5e478\",\"fontStyle\":\"bold\"}},{\"name\":\"Quote\",\"scope\":[\"quote\"],\"settings\":{\"foreground\":\"#868cac\",\"fontStyle\":\"\"}},{\"name\":\"Raw Code\",\"scope\":[\"raw\"],\"settings\":{\"foreground\":\"#80cbc4\"}},{\"name\":\"CoffeScript Variable Assignment\",\"scope\":[\"variable.assignment.coffee\"],\"settings\":{\"foreground\":\"#31e1eb\"}},{\"name\":\"CoffeScript Parameter Function\",\"scope\":[\"variable.parameter.function.coffee\"],\"settings\":{\"foreground\":\"#d6deeb\"}},{\"name\":\"CoffeeScript Assignments\",\"scope\":[\"variable.assignment.coffee\"],\"settings\":{\"foreground\":\"#7fdbca\"}},{\"name\":\"C# Readwrite Variables\",\"scope\":[\"variable.other.readwrite.cs\"],\"settings\":{\"foreground\":\"#d6deeb\"}},{\"name\":\"C# Classes & Storage types\",\"scope\":[\"entity.name.type.class.cs\",\"storage.type.cs\"],\"settings\":{\"foreground\":\"#ffcb8b\"}},{\"name\":\"C# Namespaces\",\"scope\":[\"entity.name.type.namespace.cs\"],\"settings\":{\"foreground\":\"#b2ccd6\"}},{\"name\":\"C# Unquoted String Zone\",\"scope\":[\"string.unquoted.preprocessor.message.cs\"],\"settings\":{\"foreground\":\"#d6deeb\"}},{\"name\":\"C# Region\",\"scope\":[\"punctuation.separator.hash.cs\",\"keyword.preprocessor.region.cs\",\"keyword.preprocessor.endregion.cs\"],\"settings\":{\"foreground\":\"#ffcb8b\",\"fontStyle\":\"bold\"}},{\"name\":\"C# Other Variables\",\"scope\":[\"variable.other.object.cs\"],\"settings\":{\"foreground\":\"#b2ccd6\"}},{\"name\":\"C# Enum\",\"scope\":[\"entity.name.type.enum.cs\"],\"settings\":{\"foreground\":\"#c5e478\"}},{\"name\":\"Dart String\",\"scope\":[\"string.interpolated.single.dart\",\"string.interpolated.double.dart\"],\"settings\":{\"foreground\":\"#ffcb8b\"}},{\"name\":\"Dart Class\",\"scope\":[\"support.class.dart\"],\"settings\":{\"foreground\":\"#ffcb8b\"}},{\"name\":\"Tag names in Stylesheets\",\"scope\":[\"entity.name.tag.css\",\"entity.name.tag.less\",\"entity.name.tag.custom.css\",\"support.constant.property-value.css\"],\"settings\":{\"foreground\":\"#ff6363\",\"fontStyle\":\"\"}},{\"name\":\"Wildcard(*) selector in Stylesheets\",\"scope\":[\"entity.name.tag.wildcard.css\",\"entity.name.tag.wildcard.less\",\"entity.name.tag.wildcard.scss\",\"entity.name.tag.wildcard.sass\"],\"settings\":{\"foreground\":\"#7fdbca\"}},{\"name\":\"CSS Keyword Other Unit\",\"scope\":[\"keyword.other.unit.css\"],\"settings\":{\"foreground\":\"#ffeb95\"}},{\"name\":\"Attribute Name for CSS\",\"scope\":[\"meta.attribute-selector.css entity.other.attribute-name.attribute\",\"variable.other.readwrite.js\"],\"settings\":{\"foreground\":\"#f78c6c\"}},{\"name\":\"Elixir Classes\",\"scope\":[\"source.elixir support.type.elixir\",\"source.elixir meta.module.elixir entity.name.class.elixir\"],\"settings\":{\"foreground\":\"#82aaff\"}},{\"name\":\"Elixir Functions\",\"scope\":[\"source.elixir entity.name.function\"],\"settings\":{\"foreground\":\"#c5e478\"}},{\"name\":\"Elixir Constants\",\"scope\":[\"source.elixir constant.other.symbol.elixir\",\"source.elixir constant.other.keywords.elixir\"],\"settings\":{\"foreground\":\"#82aaff\"}},{\"name\":\"Elixir String Punctuations\",\"scope\":[\"source.elixir punctuation.definition.string\"],\"settings\":{\"foreground\":\"#c5e478\"}},{\"name\":\"Elixir\",\"scope\":[\"source.elixir variable.other.readwrite.module.elixir\",\"source.elixir variable.other.readwrite.module.elixir punctuation.definition.variable.elixir\"],\"settings\":{\"foreground\":\"#c5e478\"}},{\"name\":\"Elixir Binary Punctuations\",\"scope\":[\"source.elixir .punctuation.binary.elixir\"],\"settings\":{\"foreground\":\"#c792ea\",\"fontStyle\":\"\"}},{\"name\":\"Closure Constant Keyword\",\"scope\":[\"constant.keyword.clojure\"],\"settings\":{\"foreground\":\"#7fdbca\"}},{\"name\":\"Go Function Calls\",\"scope\":[\"source.go meta.function-call.go\"],\"settings\":{\"foreground\":\"#dddddd\"}},{\"name\":\"Go Keywords\",\"scope\":[\"source.go keyword.package.go\",\"source.go keyword.import.go\",\"source.go keyword.function.go\",\"source.go keyword.type.go\",\"source.go keyword.struct.go\",\"source.go keyword.interface.go\",\"source.go keyword.const.go\",\"source.go keyword.var.go\",\"source.go keyword.map.go\",\"source.go keyword.channel.go\",\"source.go keyword.control.go\"],\"settings\":{\"foreground\":\"#c792ea\"}},{\"name\":\"Go Constants e.g. nil, string format (%s, %d, etc.)\",\"scope\":[\"source.go constant.language.go\",\"source.go constant.other.placeholder.go\"],\"settings\":{\"foreground\":\"#ff5874\"}},{\"name\":\"C++ Functions\",\"scope\":[\"entity.name.function.preprocessor.cpp\",\"entity.scope.name.cpp\"],\"settings\":{\"foreground\":\"#7fdbca\"}},{\"name\":\"C++ Meta Namespace\",\"scope\":[\"meta.namespace-block.cpp\"],\"settings\":{\"foreground\":\"#e0dec6\"}},{\"name\":\"C++ Language Primitive Storage\",\"scope\":[\"storage.type.language.primitive.cpp\"],\"settings\":{\"foreground\":\"#ff5874\"}},{\"name\":\"C++ Preprocessor Macro\",\"scope\":[\"meta.preprocessor.macro.cpp\"],\"settings\":{\"foreground\":\"#d6deeb\"}},{\"name\":\"C++ Variable Parameter\",\"scope\":[\"variable.parameter\"],\"settings\":{\"foreground\":\"#ffcb8b\"}},{\"name\":\"Powershell Variables\",\"scope\":[\"variable.other.readwrite.powershell\"],\"settings\":{\"foreground\":\"#82aaff\"}},{\"name\":\"Powershell Function\",\"scope\":[\"support.function.powershell\"],\"settings\":{\"foreground\":\"#7fdbca\"}},{\"name\":\"ID Attribute Name in HTML\",\"scope\":[\"entity.other.attribute-name.id.html\"],\"settings\":{\"foreground\":\"#c5e478\"}},{\"name\":\"HTML Punctuation Definition Tag\",\"scope\":[\"punctuation.definition.tag.html\"],\"settings\":{\"foreground\":\"#6ae9f0\"}},{\"name\":\"HTML Doctype\",\"scope\":[\"meta.tag.sgml.doctype.html\"],\"settings\":{\"foreground\":\"#c792ea\",\"fontStyle\":\"\"}},{\"name\":\"JavaScript Classes\",\"scope\":[\"meta.class entity.name.type.class.js\"],\"settings\":{\"foreground\":\"#ffcb8b\"}},{\"name\":\"JavaScript Method Declaration e.g. `constructor`\",\"scope\":[\"meta.method.declaration storage.type.js\"],\"settings\":{\"foreground\":\"#82aaff\"}},{\"name\":\"JavaScript Terminator\",\"scope\":[\"terminator.js\"],\"settings\":{\"foreground\":\"#d6deeb\"}},{\"name\":\"JavaScript Meta Punctuation Definition\",\"scope\":[\"meta.js punctuation.definition.js\"],\"settings\":{\"foreground\":\"#d6deeb\"}},{\"name\":\"Entity Names in Code Documentations\",\"scope\":[\"entity.name.type.instance.jsdoc\",\"entity.name.type.instance.phpdoc\"],\"settings\":{\"foreground\":\"#7690a6\"}},{\"name\":\"Other Variables in Code Documentations\",\"scope\":[\"variable.other.jsdoc\",\"variable.other.phpdoc\"],\"settings\":{\"foreground\":\"#78ccf0\"}},{\"name\":\"JavaScript module imports and exports\",\"scope\":[\"variable.other.meta.import.js\",\"meta.import.js variable.other\",\"variable.other.meta.export.js\",\"meta.export.js variable.other\"],\"settings\":{\"foreground\":\"#d6deeb\"}},{\"name\":\"JavaScript Variable Parameter Function\",\"scope\":[\"variable.parameter.function.js\"],\"settings\":{\"foreground\":\"#7986e7\"}},{\"name\":\"JavaScript[React] Variable Other Object\",\"scope\":[\"variable.other.object.js\",\"variable.other.object.jsx\",\"variable.object.property.js\",\"variable.object.property.jsx\"],\"settings\":{\"foreground\":\"#d6deeb\"}},{\"name\":\"JavaScript Variables\",\"scope\":[\"variable.js\",\"variable.other.js\"],\"settings\":{\"foreground\":\"#d6deeb\"}},{\"name\":\"JavaScript Entity Name Type\",\"scope\":[\"entity.name.type.js\",\"entity.name.type.module.js\"],\"settings\":{\"foreground\":\"#ffcb8b\",\"fontStyle\":\"\"}},{\"name\":\"JavaScript Support Classes\",\"scope\":[\"support.class.js\"],\"settings\":{\"foreground\":\"#d6deeb\"}},{\"name\":\"JSON Property Names\",\"scope\":[\"support.type.property-name.json\"],\"settings\":{\"foreground\":\"#7fdbca\"}},{\"name\":\"JSON Support Constants\",\"scope\":[\"support.constant.json\"],\"settings\":{\"foreground\":\"#c5e478\"}},{\"name\":\"JSON Property values (string)\",\"scope\":[\"meta.structure.dictionary.value.json string.quoted.double\"],\"settings\":{\"foreground\":\"#c789d6\"}},{\"name\":\"Strings in JSON values\",\"scope\":[\"string.quoted.double.json punctuation.definition.string.json\"],\"settings\":{\"foreground\":\"#80cbc4\"}},{\"name\":\"Specific JSON Property values like null\",\"scope\":[\"meta.structure.dictionary.json meta.structure.dictionary.value constant.language\"],\"settings\":{\"foreground\":\"#ff5874\"}},{\"name\":\"JavaScript Other Variable\",\"scope\":[\"variable.other.object.js\"],\"settings\":{\"foreground\":\"#7fdbca\"}},{\"name\":\"Ruby Variables\",\"scope\":[\"variable.other.ruby\"],\"settings\":{\"foreground\":\"#d6deeb\"}},{\"name\":\"Ruby Class\",\"scope\":[\"entity.name.type.class.ruby\"],\"settings\":{\"foreground\":\"#ecc48d\"}},{\"name\":\"Ruby Hashkeys\",\"scope\":[\"constant.language.symbol.hashkey.ruby\"],\"settings\":{\"foreground\":\"#7fdbca\"}},{\"name\":\"LESS Tag names\",\"scope\":[\"entity.name.tag.less\"],\"settings\":{\"foreground\":\"#7fdbca\"}},{\"name\":\"LESS Keyword Other Unit\",\"scope\":[\"keyword.other.unit.css\"],\"settings\":{\"foreground\":\"#ffeb95\"}},{\"name\":\"Attribute Name for LESS\",\"scope\":[\"meta.attribute-selector.less entity.other.attribute-name.attribute\"],\"settings\":{\"foreground\":\"#f78c6c\"}},{\"name\":\"Markdown Headings\",\"scope\":[\"markup.heading.markdown\",\"markup.heading.setext.1.markdown\",\"markup.heading.setext.2.markdown\"],\"settings\":{\"foreground\":\"#82b1ff\"}},{\"name\":\"Markdown Italics\",\"scope\":[\"markup.italic.markdown\"],\"settings\":{\"foreground\":\"#c792ea\",\"fontStyle\":\"italic\"}},{\"name\":\"Markdown Bold\",\"scope\":[\"markup.bold.markdown\"],\"settings\":{\"foreground\":\"#c5e478\",\"fontStyle\":\"bold\"}},{\"name\":\"Markdown Quote + others\",\"scope\":[\"markup.quote.markdown\"],\"settings\":{\"foreground\":\"#868cac\",\"fontStyle\":\"\"}},{\"name\":\"Markdown Raw Code + others\",\"scope\":[\"markup.inline.raw.markdown\"],\"settings\":{\"foreground\":\"#80cbc4\"}},{\"name\":\"Markdown Links\",\"scope\":[\"markup.underline.link.markdown\",\"markup.underline.link.image.markdown\"],\"settings\":{\"foreground\":\"#ff869a\",\"fontStyle\":\"underline\"}},{\"name\":\"Markdown Link Title and Description\",\"scope\":[\"string.other.link.title.markdown\",\"string.other.link.description.markdown\"],\"settings\":{\"foreground\":\"#d6deeb\",\"fontStyle\":\"underline\"}},{\"name\":\"Markdown Punctuation\",\"scope\":[\"punctuation.definition.string.markdown\",\"punctuation.definition.string.begin.markdown\",\"punctuation.definition.string.end.markdown\",\"meta.link.inline.markdown punctuation.definition.string\"],\"settings\":{\"foreground\":\"#82b1ff\"}},{\"name\":\"Markdown MetaData Punctuation\",\"scope\":[\"punctuation.definition.metadata.markdown\"],\"settings\":{\"foreground\":\"#7fdbca\"}},{\"name\":\"Markdown List Punctuation\",\"scope\":[\"beginning.punctuation.definition.list.markdown\"],\"settings\":{\"foreground\":\"#82b1ff\"}},{\"name\":\"Markdown Inline Raw String\",\"scope\":[\"markup.inline.raw.string.markdown\"],\"settings\":{\"foreground\":\"#c5e478\"}},{\"name\":\"PHP Variables\",\"scope\":[\"variable.other.php\"],\"settings\":{\"foreground\":\"#bec5d4\"}},{\"name\":\"Support Classes in PHP\",\"scope\":[\"support.class.php\"],\"settings\":{\"foreground\":\"#ffcb8b\"}},{\"name\":\"Punctuations in PHP function calls\",\"scope\":[\"meta.function-call.php punctuation\"],\"settings\":{\"foreground\":\"#d6deeb\"}},{\"name\":\"PHP Global Variables\",\"scope\":[\"variable.other.global.php\"],\"settings\":{\"foreground\":\"#c5e478\"}},{\"name\":\"Declaration Punctuation in PHP Global Variables\",\"scope\":[\"variable.other.global.php punctuation.definition.variable\"],\"settings\":{\"foreground\":\"#c5e478\"}},{\"name\":\"Language Constants in Python\",\"scope\":[\"constant.language.python\"],\"settings\":{\"foreground\":\"#ff5874\"}},{\"name\":\"Python Function Parameter and Arguments\",\"scope\":[\"variable.parameter.function.python\",\"meta.function-call.arguments.python\"],\"settings\":{\"foreground\":\"#82aaff\"}},{\"name\":\"Python Function Call\",\"scope\":[\"meta.function-call.python\",\"meta.function-call.generic.python\"],\"settings\":{\"foreground\":\"#b2ccd6\"}},{\"name\":\"Punctuations in Python\",\"scope\":[\"punctuation.python\"],\"settings\":{\"foreground\":\"#d6deeb\"}},{\"name\":\"Decorator Functions in Python\",\"scope\":[\"entity.name.function.decorator.python\"],\"settings\":{\"foreground\":\"#c5e478\"}},{\"name\":\"Python Language Variable\",\"scope\":[\"source.python variable.language.special\"],\"settings\":{\"foreground\":\"#8eace3\"}},{\"name\":\"Python import control keyword\",\"scope\":[\"keyword.control\"],\"settings\":{\"foreground\":\"#c792ea\"}},{\"name\":\"SCSS Variable\",\"scope\":[\"variable.scss\",\"variable.sass\",\"variable.parameter.url.scss\",\"variable.parameter.url.sass\"],\"settings\":{\"foreground\":\"#c5e478\"}},{\"name\":\"Variables in SASS At-Rules\",\"scope\":[\"source.css.scss meta.at-rule variable\",\"source.css.sass meta.at-rule variable\"],\"settings\":{\"foreground\":\"#82aaff\"}},{\"name\":\"Variables in SASS At-Rules\",\"scope\":[\"source.css.scss meta.at-rule variable\",\"source.css.sass meta.at-rule variable\"],\"settings\":{\"foreground\":\"#bec5d4\"}},{\"name\":\"Attribute Name for SASS\",\"scope\":[\"meta.attribute-selector.scss entity.other.attribute-name.attribute\",\"meta.attribute-selector.sass entity.other.attribute-name.attribute\"],\"settings\":{\"foreground\":\"#f78c6c\"}},{\"name\":\"Tag names in SASS\",\"scope\":[\"entity.name.tag.scss\",\"entity.name.tag.sass\"],\"settings\":{\"foreground\":\"#7fdbca\"}},{\"name\":\"SASS Keyword Other Unit\",\"scope\":[\"keyword.other.unit.scss\",\"keyword.other.unit.sass\"],\"settings\":{\"foreground\":\"#ffeb95\"}},{\"name\":\"TypeScript[React] Variables and Object Properties\",\"scope\":[\"variable.other.readwrite.alias.ts\",\"variable.other.readwrite.alias.tsx\",\"variable.other.readwrite.ts\",\"variable.other.readwrite.tsx\",\"variable.other.object.ts\",\"variable.other.object.tsx\",\"variable.object.property.ts\",\"variable.object.property.tsx\",\"variable.other.ts\",\"variable.other.tsx\",\"variable.tsx\",\"variable.ts\"],\"settings\":{\"foreground\":\"#d6deeb\"}},{\"name\":\"TypeScript[React] Entity Name Types\",\"scope\":[\"entity.name.type.ts\",\"entity.name.type.tsx\"],\"settings\":{\"foreground\":\"#ffcb8b\"}},{\"name\":\"TypeScript[React] Node Classes\",\"scope\":[\"support.class.node.ts\",\"support.class.node.tsx\"],\"settings\":{\"foreground\":\"#82aaff\"}},{\"name\":\"TypeScript[React] Entity Name Types as Parameters\",\"scope\":[\"meta.type.parameters.ts entity.name.type\",\"meta.type.parameters.tsx entity.name.type\"],\"settings\":{\"foreground\":\"#7690a6\"}},{\"name\":\"TypeScript[React] Import/Export Punctuations\",\"scope\":[\"meta.import.ts punctuation.definition.block\",\"meta.import.tsx punctuation.definition.block\",\"meta.export.ts punctuation.definition.block\",\"meta.export.tsx punctuation.definition.block\"],\"settings\":{\"foreground\":\"#d6deeb\"}},{\"name\":\"TypeScript[React] Punctuation Decorators\",\"scope\":[\"meta.decorator punctuation.decorator.ts\",\"meta.decorator punctuation.decorator.tsx\"],\"settings\":{\"foreground\":\"#82aaff\"}},{\"name\":\"TypeScript[React] Punctuation Decorators\",\"scope\":[\"meta.tag.js meta.jsx.children.tsx\"],\"settings\":{\"foreground\":\"#82aaff\"}},{\"name\":\"YAML Entity Name Tags\",\"scope\":[\"entity.name.tag.yaml\"],\"settings\":{\"foreground\":\"#7fdbca\"}},{\"name\":\"JavaScript Variable Other ReadWrite\",\"scope\":[\"variable.other.readwrite.js\",\"variable.parameter\"],\"settings\":{\"foreground\":\"#d7dbe0\"}},{\"name\":\"Support Class Component\",\"scope\":[\"support.class.component.js\",\"support.class.component.tsx\"],\"settings\":{\"foreground\":\"#f78c6c\",\"fontStyle\":\"\"}},{\"name\":\"Text nested in React tags\",\"scope\":[\"meta.jsx.children\",\"meta.jsx.children.js\",\"meta.jsx.children.tsx\"],\"settings\":{\"foreground\":\"#d6deeb\"}},{\"name\":\"TypeScript Classes\",\"scope\":[\"meta.class entity.name.type.class.tsx\"],\"settings\":{\"foreground\":\"#ffcb8b\"}},{\"name\":\"TypeScript Entity Name Type\",\"scope\":[\"entity.name.type.tsx\",\"entity.name.type.module.tsx\"],\"settings\":{\"foreground\":\"#ffcb8b\"}},{\"name\":\"TypeScript Class Variable Keyword\",\"scope\":[\"meta.class.ts meta.var.expr.ts storage.type.ts\",\"meta.class.tsx meta.var.expr.tsx storage.type.tsx\"],\"settings\":{\"foreground\":\"#c792ea\"}},{\"name\":\"TypeScript Method Declaration e.g. `constructor`\",\"scope\":[\"meta.method.declaration storage.type.ts\",\"meta.method.declaration storage.type.tsx\"],\"settings\":{\"foreground\":\"#82aaff\"}},{\"name\":\"normalize font style of certain components\",\"scope\":[\"meta.property-list.css meta.property-value.css variable.other.less\",\"meta.property-list.scss variable.scss\",\"meta.property-list.sass variable.sass\",\"meta.brace\",\"keyword.operator.operator\",\"keyword.operator.or.regexp\",\"keyword.operator.expression.in\",\"keyword.operator.relational\",\"keyword.operator.assignment\",\"keyword.operator.comparison\",\"keyword.operator.type\",\"keyword.operator\",\"keyword\",\"punctuation.definintion.string\",\"punctuation\",\"variable.other.readwrite.js\",\"storage.type\",\"source.css\",\"string.quoted\"],\"settings\":{\"fontStyle\":\"\"}}],\"styleOverrides\":{}},{\"name\":\"Night Owl Light\",\"type\":\"light\",\"colors\":{\"focusBorder\":\"#93a1a1\",\"foreground\":\"#403f53\",\"disabledForeground\":\"#61616180\",\"descriptionForeground\":\"#403f53\",\"errorForeground\":\"#403f53\",\"icon.foreground\":\"#424242\",\"contrastActiveBorder\":null,\"contrastBorder\":null,\"textBlockQuote.background\":\"#7f7f7f1a\",\"textBlockQuote.border\":\"#007acc80\",\"textCodeBlock.background\":\"#dcdcdc66\",\"textLink.activeForeground\":\"#006ab1\",\"textLink.foreground\":\"#006ab1\",\"textPreformat.foreground\":\"#a31515\",\"textSeparator.foreground\":\"#0000002e\",\"editor.background\":\"#fbfbfb\",\"editor.foreground\":\"#403f53\",\"editorLineNumber.foreground\":\"#90a7b2\",\"editorLineNumber.activeForeground\":\"#403f53\",\"editorActiveLineNumber.foreground\":\"#0b216f\",\"editor.selectionBackground\":\"#e0e0e0\",\"editor.inactiveSelectionBackground\":\"#e0e0e080\",\"editor.selectionHighlightBackground\":\"#339cec33\",\"editorError.foreground\":\"#e64d49\",\"editorWarning.foreground\":\"#daaa01\",\"editorInfo.foreground\":\"#1a85ff\",\"editorHint.foreground\":\"#6c6c6c\",\"problemsErrorIcon.foreground\":\"#e64d49\",\"problemsWarningIcon.foreground\":\"#daaa01\",\"problemsInfoIcon.foreground\":\"#1a85ff\",\"editor.findMatchBackground\":\"#93a1a16c\",\"editor.findMatchHighlightBackground\":\"#93a1a16c\",\"editor.findRangeHighlightBackground\":\"#7497a633\",\"editorLink.activeForeground\":\"#0000ff\",\"editorLightBulb.foreground\":\"#ddb100\",\"editorLightBulbAutoFix.foreground\":\"#007acc\",\"diffEditor.insertedTextBackground\":\"#9ccc2c40\",\"diffEditor.insertedTextBorder\":null,\"diffEditor.removedTextBackground\":\"#ff000033\",\"diffEditor.removedTextBorder\":null,\"diffEditor.insertedLineBackground\":\"#9bb95533\",\"diffEditor.removedLineBackground\":\"#ff000033\",\"editorStickyScroll.background\":\"#fbfbfb\",\"editorStickyScrollHover.background\":\"#f0f0f0\",\"editorInlayHint.background\":\"#2aa29899\",\"editorInlayHint.foreground\":\"#f0f0f0\",\"editorInlayHint.typeBackground\":\"#2aa29899\",\"editorInlayHint.typeForeground\":\"#f0f0f0\",\"editorInlayHint.parameterBackground\":\"#2aa29899\",\"editorInlayHint.parameterForeground\":\"#f0f0f0\",\"editorPane.background\":\"#fbfbfb\",\"editorGroup.emptyBackground\":null,\"editorGroup.focusedEmptyBorder\":null,\"editorGroupHeader.tabsBackground\":\"#f0f0f0\",\"editorGroupHeader.tabsBorder\":\"#f6f6f6\",\"editorGroupHeader.noTabsBackground\":\"#f0f0f0\",\"editorGroupHeader.border\":null,\"editorGroup.border\":\"#f0f0f0\",\"editorGroup.dropBackground\":\"#2677cb2d\",\"editorGroup.dropIntoPromptForeground\":\"#403f53\",\"editorGroup.dropIntoPromptBackground\":\"#f0f0f0\",\"editorGroup.dropIntoPromptBorder\":null,\"sideBySideEditor.horizontalBorder\":\"#f0f0f0\",\"sideBySideEditor.verticalBorder\":\"#f0f0f0\",\"scrollbar.shadow\":\"#cccccc\",\"scrollbarSlider.background\":\"#64646466\",\"scrollbarSlider.hoverBackground\":\"#646464b2\",\"scrollbarSlider.activeBackground\":\"#00000099\",\"panel.background\":\"#f0f0f0\",\"panel.border\":\"#d9d9d9\",\"panelTitle.activeBorder\":\"#424242\",\"panelTitle.activeForeground\":\"#424242\",\"panelTitle.inactiveForeground\":\"#424242bf\",\"panelSectionHeader.background\":\"#80808051\",\"terminal.background\":\"#f6f6f6\",\"widget.shadow\":\"#d9d9d9\",\"editorWidget.background\":\"#f0f0f0\",\"editorWidget.foreground\":\"#403f53\",\"editorWidget.border\":\"#d9d9d9\",\"quickInput.background\":\"#f0f0f0\",\"quickInput.foreground\":\"#403f53\",\"quickInputTitle.background\":\"#0000000f\",\"pickerGroup.foreground\":\"#403f53\",\"pickerGroup.border\":\"#d9d9d9\",\"editor.hoverHighlightBackground\":\"#339cec33\",\"editorHoverWidget.background\":\"#f0f0f0\",\"editorHoverWidget.foreground\":\"#403f53\",\"editorHoverWidget.border\":\"#d9d9d9\",\"editorHoverWidget.statusBarBackground\":\"#e4e4e4\",\"titleBar.activeBackground\":\"#f0f0f0\",\"titleBar.activeForeground\":\"#333333\",\"titleBar.inactiveBackground\":\"#f0f0f099\",\"titleBar.inactiveForeground\":\"#33333399\",\"titleBar.border\":\"#f6f6f6\",\"toolbar.hoverBackground\":\"#b8b8b850\",\"toolbar.activeBackground\":\"#a6a6a650\",\"tab.activeBackground\":\"#f6f6f6\",\"tab.unfocusedActiveBackground\":\"#f6f6f6\",\"tab.inactiveBackground\":\"#f0f0f0\",\"tab.unfocusedInactiveBackground\":\"#f0f0f0\",\"tab.activeForeground\":\"#403f53\",\"tab.inactiveForeground\":\"#403f53\",\"tab.unfocusedActiveForeground\":\"#403f53b3\",\"tab.unfocusedInactiveForeground\":\"#403f5380\",\"tab.hoverBackground\":null,\"tab.unfocusedHoverBackground\":null,\"tab.hoverForeground\":null,\"tab.unfocusedHoverForeground\":null,\"tab.border\":\"#f0f0f0\",\"tab.lastPinnedBorder\":\"#a9a9a9\",\"tab.activeBorder\":null,\"tab.unfocusedActiveBorder\":null,\"tab.activeBorderTop\":null,\"tab.unfocusedActiveBorderTop\":null,\"tab.hoverBorder\":null,\"tab.unfocusedHoverBorder\":null,\"tab.activeModifiedBorder\":\"#2aa298\",\"tab.inactiveModifiedBorder\":\"#93a1a1\",\"tab.unfocusedActiveModifiedBorder\":\"#93a1a1\",\"tab.unfocusedInactiveModifiedBorder\":\"#93a1a1\",\"badge.background\":\"#2aa298\",\"badge.foreground\":\"#f0f0f0\",\"button.background\":\"#2aa298\",\"button.foreground\":\"#f0f0f0\",\"button.border\":null,\"button.separator\":\"#f0f0f066\",\"button.hoverBackground\":\"#22827a\",\"button.secondaryBackground\":\"#5f6a79\",\"button.secondaryForeground\":\"#ffffff\",\"button.secondaryHoverBackground\":\"#4c5561\",\"dropdown.background\":\"#f0f0f0\",\"dropdown.foreground\":\"#403f53\",\"dropdown.border\":\"#d9d9d9\",\"list.activeSelectionBackground\":\"#d3e8f8\",\"list.activeSelectionForeground\":\"#403f53\",\"tree.indentGuidesStroke\":\"#a9a9a9\",\"input.background\":\"#f0f0f0\",\"input.foreground\":\"#403f53\",\"input.placeholderForeground\":\"#93a1a1\",\"inputOption.activeBorder\":\"#2aa298\",\"inputOption.hoverBackground\":\"#b8b8b850\",\"inputOption.activeBackground\":\"#93a1a133\",\"inputOption.activeForeground\":\"#000000\",\"inputValidation.infoBackground\":\"#f0f0f0\",\"inputValidation.infoBorder\":\"#d0d0d0\",\"inputValidation.warningBackground\":\"#daaa01\",\"inputValidation.warningBorder\":\"#e0af02\",\"inputValidation.errorBackground\":\"#f76e6e\",\"inputValidation.errorBorder\":\"#de3d3b\",\"keybindingLabel.background\":\"#dddddd66\",\"keybindingLabel.foreground\":\"#555555\",\"keybindingLabel.border\":\"#cccccc66\",\"keybindingLabel.bottomBorder\":\"#bbbbbb66\",\"menu.foreground\":\"#403f53\",\"menu.background\":\"#f0f0f0\",\"menu.selectionForeground\":\"#403f53\",\"menu.selectionBackground\":\"#d3e8f8\",\"menu.separatorBackground\":\"#d4d4d4\",\"editor.snippetTabstopHighlightBackground\":\"#0a326433\",\"editor.snippetFinalTabstopHighlightBorder\":\"#0a326480\",\"terminal.ansiBlack\":\"#403f53\",\"terminal.ansiRed\":\"#de3d3b\",\"terminal.ansiGreen\":\"#08916a\",\"terminal.ansiYellow\":\"#e0af02\",\"terminal.ansiBlue\":\"#288ed7\",\"terminal.ansiMagenta\":\"#d6438a\",\"terminal.ansiCyan\":\"#2aa298\",\"terminal.ansiWhite\":\"#f0f0f0\",\"terminal.ansiBrightBlack\":\"#403f53\",\"terminal.ansiBrightRed\":\"#de3d3b\",\"terminal.ansiBrightGreen\":\"#08916a\",\"terminal.ansiBrightYellow\":\"#daaa01\",\"terminal.ansiBrightBlue\":\"#288ed7\",\"terminal.ansiBrightMagenta\":\"#d6438a\",\"terminal.ansiBrightCyan\":\"#2aa298\",\"terminal.ansiBrightWhite\":\"#f0f0f0\",\"selection.background\":\"#7a8181ad\",\"notifications.background\":\"#f0f0f0\",\"notifications.foreground\":\"#403f53\",\"notificationLink.foreground\":\"#994cc3\",\"notifications.border\":\"#cccccc\",\"notificationCenter.border\":\"#cccccc\",\"notificationToast.border\":\"#cccccc\",\"notificationCenterHeader.foreground\":\"#403f53\",\"notificationCenterHeader.background\":\"#f0f0f0\",\"input.border\":\"#d9d9d9\",\"progressBar.background\":\"#2aa298\",\"list.inactiveSelectionBackground\":\"#e0e7ea\",\"list.inactiveSelectionForeground\":\"#403f53\",\"list.focusBackground\":\"#d3e8f8\",\"list.hoverBackground\":\"#d3e8f8\",\"list.focusForeground\":\"#403f53\",\"list.hoverForeground\":\"#403f53\",\"list.highlightForeground\":\"#403f53\",\"list.errorForeground\":\"#e64d49\",\"list.warningForeground\":\"#daaa01\",\"activityBar.background\":\"#f0f0f0\",\"activityBar.foreground\":\"#403f53\",\"activityBar.dropBackground\":\"#d0d0d0\",\"activityBarBadge.background\":\"#403f53\",\"activityBarBadge.foreground\":\"#f0f0f0\",\"activityBar.border\":\"#f0f0f0\",\"sideBar.background\":\"#f0f0f0\",\"sideBar.foreground\":\"#403f53\",\"sideBarTitle.foreground\":\"#403f53\",\"sideBar.border\":\"#f0f0f0\",\"editorGroup.background\":\"#f6f6f6\",\"editorCursor.foreground\":\"#90a7b2\",\"editor.wordHighlightBackground\":\"#339cec33\",\"editor.wordHighlightStrongBackground\":\"#007dd659\",\"editor.lineHighlightBackground\":\"#f0f0f0\",\"editor.rangeHighlightBackground\":\"#7497a633\",\"editorWhitespace.foreground\":\"#d9d9d9\",\"editorIndentGuide.background\":\"#d9d9d9\",\"editorCodeLens.foreground\":\"#403f53\",\"editorBracketMatch.background\":\"#d3e8f8\",\"editorBracketMatch.border\":\"#2aa298\",\"editorError.border\":\"#fbfbfb\",\"editorWarning.border\":\"#daaa01\",\"editorGutter.addedBackground\":\"#49d0c5\",\"editorGutter.modifiedBackground\":\"#6fbef6\",\"editorGutter.deletedBackground\":\"#f76e6e\",\"editorRuler.foreground\":\"#d9d9d9\",\"editorOverviewRuler.errorForeground\":\"#e64d49\",\"editorOverviewRuler.warningForeground\":\"#daaa01\",\"editorSuggestWidget.background\":\"#f0f0f0\",\"editorSuggestWidget.foreground\":\"#403f53\",\"editorSuggestWidget.highlightForeground\":\"#403f53\",\"editorSuggestWidget.selectedBackground\":\"#d3e8f8\",\"editorSuggestWidget.border\":\"#d9d9d9\",\"debugExceptionWidget.background\":\"#f0f0f0\",\"debugExceptionWidget.border\":\"#d9d9d9\",\"editorMarkerNavigation.background\":\"#d0d0d0\",\"editorMarkerNavigationError.background\":\"#f76e6e\",\"editorMarkerNavigationWarning.background\":\"#daaa01\",\"debugToolBar.background\":\"#f0f0f0\",\"extensionButton.prominentBackground\":\"#2aa298\",\"extensionButton.prominentForeground\":\"#f0f0f0\",\"statusBar.background\":\"#f0f0f0\",\"statusBar.border\":\"#f0f0f0\",\"statusBar.debuggingBackground\":\"#f0f0f0\",\"statusBar.debuggingForeground\":\"#403f53\",\"statusBar.foreground\":\"#403f53\",\"statusBar.noFolderBackground\":\"#f0f0f0\",\"statusBar.noFolderForeground\":\"#403f53\",\"peekView.border\":\"#d9d9d9\",\"peekViewEditor.background\":\"#f6f6f6\",\"peekViewEditorGutter.background\":\"#f6f6f6\",\"peekViewEditor.matchHighlightBackground\":\"#49d0c5\",\"peekViewResult.background\":\"#f0f0f0\",\"peekViewResult.fileForeground\":\"#403f53\",\"peekViewResult.lineForeground\":\"#403f53\",\"peekViewResult.matchHighlightBackground\":\"#49d0c5\",\"peekViewResult.selectionBackground\":\"#e0e7ea\",\"peekViewResult.selectionForeground\":\"#403f53\",\"peekViewTitle.background\":\"#f0f0f0\",\"peekViewTitleLabel.foreground\":\"#403f53\",\"peekViewTitleDescription.foreground\":\"#403f53\",\"terminal.foreground\":\"#403f53\"},\"fg\":\"#403f53\",\"bg\":\"#fbfbfb\",\"semanticHighlighting\":false,\"settings\":[{\"name\":\"Changed\",\"scope\":[\"markup.changed\",\"meta.diff.header.git\",\"meta.diff.header.from-file\",\"meta.diff.header.to-file\"],\"settings\":{\"foreground\":\"#576687\"}},{\"name\":\"Deleted\",\"scope\":[\"markup.deleted.diff\"],\"settings\":{\"foreground\":\"#b33e3cfe\"}},{\"name\":\"Inserted\",\"scope\":[\"markup.inserted.diff\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Global settings\",\"settings\":{\"background\":\"#011627\",\"foreground\":\"#403f53\"}},{\"name\":\"Comment\",\"scope\":[\"comment\"],\"settings\":{\"foreground\":\"#616671\"}},{\"name\":\"String\",\"scope\":[\"string\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"String Quoted\",\"scope\":[\"string.quoted\",\"variable.other.readwrite.js\"],\"settings\":{\"foreground\":\"#9b504e\"}},{\"name\":\"Support Constant Math\",\"scope\":[\"support.constant.math\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Number\",\"scope\":[\"constant.numeric\",\"constant.character.numeric\"],\"settings\":{\"foreground\":\"#aa0982\",\"fontStyle\":\"\"}},{\"name\":\"Built-in constant\",\"scope\":[\"constant.language\",\"punctuation.definition.constant\",\"variable.other.constant\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"User-defined constant\",\"scope\":[\"constant.character\",\"constant.other\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Constant Character Escape\",\"scope\":[\"constant.character.escape\"],\"settings\":{\"foreground\":\"#aa0982\"}},{\"name\":\"RegExp String\",\"scope\":[\"string.regexp\",\"string.regexp keyword.other\"],\"settings\":{\"foreground\":\"#3a6a90\"}},{\"name\":\"Comma in functions\",\"scope\":[\"meta.function punctuation.separator.comma\"],\"settings\":{\"foreground\":\"#4f687d\"}},{\"name\":\"Variable\",\"scope\":[\"variable\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Keyword\",\"scope\":[\"punctuation.accessor\",\"keyword\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"Storage\",\"scope\":[\"storage\",\"meta.var.expr\",\"meta.class meta.method.declaration meta.var.expr storage.type.js\",\"storage.type.property.js\",\"storage.type.property.ts\",\"storage.type.property.tsx\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"Storage type\",\"scope\":[\"storage.type\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"Storage type\",\"scope\":[\"storage.type.function.arrow.js\"],\"settings\":{\"fontStyle\":\"\"}},{\"name\":\"Class name\",\"scope\":[\"entity.name.class\",\"meta.class entity.name.type.class\"],\"settings\":{\"foreground\":\"#111111\"}},{\"name\":\"Inherited class\",\"scope\":[\"entity.other.inherited-class\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Function name\",\"scope\":[\"entity.name.function\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"Meta Tag\",\"scope\":[\"punctuation.definition.tag\",\"meta.tag\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"HTML Tag names\",\"scope\":[\"entity.name.tag\",\"meta.tag.other.html\",\"meta.tag.other.js\",\"meta.tag.other.tsx\",\"entity.name.tag.tsx\",\"entity.name.tag.js\",\"entity.name.tag\",\"meta.tag.js\",\"meta.tag.tsx\",\"meta.tag.html\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"Tag attribute\",\"scope\":[\"entity.other.attribute-name\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Entity Name Tag Custom\",\"scope\":[\"entity.name.tag.custom\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Library (function & constant)\",\"scope\":[\"support.function\",\"support.constant\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Support Constant Property Value meta\",\"scope\":[\"support.constant.meta.property-value\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"Library class/type\",\"scope\":[\"support.type\",\"support.class\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Support Variable DOM\",\"scope\":[\"support.variable.dom\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Invalid\",\"scope\":[\"invalid\"],\"settings\":{\"foreground\":\"#bf2162\"}},{\"name\":\"Invalid deprecated\",\"scope\":[\"invalid.deprecated\"],\"settings\":{\"foreground\":\"#b73936\"}},{\"name\":\"Keyword Operator\",\"scope\":[\"keyword.operator\"],\"settings\":{\"foreground\":\"#097174\",\"fontStyle\":\"\"}},{\"name\":\"Keyword Operator Relational\",\"scope\":[\"keyword.operator.relational\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"Keyword Operator Assignment\",\"scope\":[\"keyword.operator.assignment\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"Keyword Operator Arithmetic\",\"scope\":[\"keyword.operator.arithmetic\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"Keyword Operator Bitwise\",\"scope\":[\"keyword.operator.bitwise\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"Keyword Operator Increment\",\"scope\":[\"keyword.operator.increment\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"Keyword Operator Ternary\",\"scope\":[\"keyword.operator.ternary\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"Double-Slashed Comment\",\"scope\":[\"comment.line.double-slash\"],\"settings\":{\"foreground\":\"#5e6578\"}},{\"name\":\"Object\",\"scope\":[\"object\"],\"settings\":{\"foreground\":\"#5a686d\"}},{\"name\":\"Null\",\"scope\":[\"constant.language.null\"],\"settings\":{\"foreground\":\"#a54a4a\"}},{\"name\":\"Meta Brace\",\"scope\":[\"meta.brace\"],\"settings\":{\"foreground\":\"#403f53\"}},{\"name\":\"Meta Delimiter Period\",\"scope\":[\"meta.delimiter.period\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"Punctuation Definition String\",\"scope\":[\"punctuation.definition.string\"],\"settings\":{\"foreground\":\"#111111\"}},{\"name\":\"Punctuation Definition String Markdown\",\"scope\":[\"punctuation.definition.string.begin.markdown\"],\"settings\":{\"foreground\":\"#a54a4a\"}},{\"name\":\"Boolean\",\"scope\":[\"constant.language.boolean\"],\"settings\":{\"foreground\":\"#a54a4a\"}},{\"name\":\"Object Comma\",\"scope\":[\"object.comma\"],\"settings\":{\"foreground\":\"#666666\"}},{\"name\":\"Variable Parameter Function\",\"scope\":[\"variable.parameter.function\"],\"settings\":{\"foreground\":\"#097174\",\"fontStyle\":\"\"}},{\"name\":\"Support Type Property Name & entity name tags\",\"scope\":[\"support.type.vendor.property-name\",\"support.constant.vendor.property-value\",\"support.type.property-name\",\"meta.property-list entity.name.tag\"],\"settings\":{\"foreground\":\"#097174\",\"fontStyle\":\"\"}},{\"name\":\"Entity Name tag reference in stylesheets\",\"scope\":[\"meta.property-list entity.name.tag.reference\"],\"settings\":{\"foreground\":\"#297073\"}},{\"name\":\"Constant Other Color RGB Value Punctuation Definition Constant\",\"scope\":[\"constant.other.color.rgb-value punctuation.definition.constant\"],\"settings\":{\"foreground\":\"#aa0982\"}},{\"name\":\"Constant Other Color\",\"scope\":[\"constant.other.color\"],\"settings\":{\"foreground\":\"#aa0982\"}},{\"name\":\"Keyword Other Unit\",\"scope\":[\"keyword.other.unit\"],\"settings\":{\"foreground\":\"#aa0982\"}},{\"name\":\"Meta Selector\",\"scope\":[\"meta.selector\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"Entity Other Attribute Name Id\",\"scope\":[\"entity.other.attribute-name.id\"],\"settings\":{\"foreground\":\"#aa0982\"}},{\"name\":\"Meta Property Name\",\"scope\":[\"meta.property-name\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"Doctypes\",\"scope\":[\"entity.name.tag.doctype\",\"meta.tag.sgml.doctype\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"Punctuation Definition Parameters\",\"scope\":[\"punctuation.definition.parameters\"],\"settings\":{\"foreground\":\"#111111\"}},{\"name\":\"Keyword Control Operator\",\"scope\":[\"keyword.control.operator\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"Keyword Operator Logical\",\"scope\":[\"keyword.operator.logical\"],\"settings\":{\"foreground\":\"#8d46b4\",\"fontStyle\":\"\"}},{\"name\":\"Variable Instances\",\"scope\":[\"variable.instance\",\"variable.other.instance\",\"variable.readwrite.instance\",\"variable.other.readwrite.instance\",\"variable.other.property\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"Variable Property Other object property\",\"scope\":[\"variable.other.object.property\"],\"settings\":{\"foreground\":\"#111111\"}},{\"name\":\"Variable Property Other object\",\"scope\":[\"variable.other.object.js\"],\"settings\":{\"fontStyle\":\"\"}},{\"name\":\"Entity Name Function\",\"scope\":[\"entity.name.function\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Keyword Operator Comparison, imports, returns and Keyword Operator Ruby\",\"scope\":[\"keyword.operator.comparison\",\"keyword.control.flow.js\",\"keyword.control.flow.ts\",\"keyword.control.flow.tsx\",\"keyword.control.ruby\",\"keyword.control.module.ruby\",\"keyword.control.class.ruby\",\"keyword.control.def.ruby\",\"keyword.control.loop.js\",\"keyword.control.loop.ts\",\"keyword.control.import.js\",\"keyword.control.import.ts\",\"keyword.control.import.tsx\",\"keyword.control.from.js\",\"keyword.control.from.ts\",\"keyword.control.from.tsx\",\"keyword.operator.instanceof.js\",\"keyword.operator.expression.instanceof.ts\",\"keyword.operator.expression.instanceof.tsx\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"Keyword Control Conditional\",\"scope\":[\"keyword.control.conditional.js\",\"keyword.control.conditional.ts\",\"keyword.control.switch.js\",\"keyword.control.switch.ts\"],\"settings\":{\"foreground\":\"#8d46b4\",\"fontStyle\":\"\"}},{\"name\":\"Support Constant, `new` keyword, Special Method Keyword, `debugger`, other keywords\",\"scope\":[\"support.constant\",\"keyword.other.special-method\",\"keyword.other.new\",\"keyword.other.debugger\",\"keyword.control\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"Support Function\",\"scope\":[\"support.function\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Invalid Broken\",\"scope\":[\"invalid.broken\"],\"settings\":{\"foreground\":\"#aa0982\"}},{\"name\":\"Invalid Unimplemented\",\"scope\":[\"invalid.unimplemented\"],\"settings\":{\"foreground\":\"#497026\"}},{\"name\":\"Invalid Illegal\",\"scope\":[\"invalid.illegal\"],\"settings\":{\"foreground\":\"#9b504e\"}},{\"name\":\"Language Variable\",\"scope\":[\"variable.language\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"Support Variable Property\",\"scope\":[\"support.variable.property\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"Variable Function\",\"scope\":[\"variable.function\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Variable Interpolation\",\"scope\":[\"variable.interpolation\"],\"settings\":{\"foreground\":\"#aa444a\"}},{\"name\":\"Meta Function Call\",\"scope\":[\"meta.function-call\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Punctuation Section Embedded\",\"scope\":[\"punctuation.section.embedded\"],\"settings\":{\"foreground\":\"#b73936\"}},{\"name\":\"Punctuation Tweaks\",\"scope\":[\"punctuation.terminator.expression\",\"punctuation.definition.arguments\",\"punctuation.definition.array\",\"punctuation.section.array\",\"meta.array\"],\"settings\":{\"foreground\":\"#403f53\"}},{\"name\":\"More Punctuation Tweaks\",\"scope\":[\"punctuation.definition.list.begin\",\"punctuation.definition.list.end\",\"punctuation.separator.arguments\",\"punctuation.definition.list\"],\"settings\":{\"foreground\":\"#111111\"}},{\"name\":\"Template Strings\",\"scope\":[\"string.template meta.template.expression\"],\"settings\":{\"foreground\":\"#b73936\"}},{\"name\":\"Backtics(``) in Template Strings\",\"scope\":[\"string.template punctuation.definition.string\"],\"settings\":{\"foreground\":\"#403f53\"}},{\"name\":\"Italics\",\"scope\":[\"italic\"],\"settings\":{\"foreground\":\"#8d46b4\",\"fontStyle\":\"italic\"}},{\"name\":\"Bold\",\"scope\":[\"bold\"],\"settings\":{\"foreground\":\"#3c63b3\",\"fontStyle\":\"bold\"}},{\"name\":\"Quote\",\"scope\":[\"quote\"],\"settings\":{\"foreground\":\"#5e6487\"}},{\"name\":\"Raw Code\",\"scope\":[\"raw\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"CoffeScript Variable Assignment\",\"scope\":[\"variable.assignment.coffee\"],\"settings\":{\"foreground\":\"#197176\"}},{\"name\":\"CoffeScript Parameter Function\",\"scope\":[\"variable.parameter.function.coffee\"],\"settings\":{\"foreground\":\"#403f53\"}},{\"name\":\"CoffeeScript Assignments\",\"scope\":[\"variable.assignment.coffee\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"C# Readwrite Variables\",\"scope\":[\"variable.other.readwrite.cs\"],\"settings\":{\"foreground\":\"#403f53\"}},{\"name\":\"C# Classes & Storage types\",\"scope\":[\"entity.name.type.class.cs\",\"storage.type.cs\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"C# Namespaces\",\"scope\":[\"entity.name.type.namespace.cs\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"Tag names in Stylesheets\",\"scope\":[\"entity.name.tag.css\",\"entity.name.tag.less\",\"entity.name.tag.custom.css\",\"support.constant.property-value.css\"],\"settings\":{\"foreground\":\"#9b504e\",\"fontStyle\":\"\"}},{\"name\":\"Wildcard(*) selector in Stylesheets\",\"scope\":[\"entity.name.tag.wildcard.css\",\"entity.name.tag.wildcard.less\",\"entity.name.tag.wildcard.scss\",\"entity.name.tag.wildcard.sass\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"CSS Keyword Other Unit\",\"scope\":[\"keyword.other.unit.css\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Attribute Name for CSS\",\"scope\":[\"meta.attribute-selector.css entity.other.attribute-name.attribute\",\"variable.other.readwrite.js\"],\"settings\":{\"foreground\":\"#aa0982\"}},{\"name\":\"Elixir Classes\",\"scope\":[\"source.elixir support.type.elixir\",\"source.elixir meta.module.elixir entity.name.class.elixir\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Elixir Functions\",\"scope\":[\"source.elixir entity.name.function\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Elixir Constants\",\"scope\":[\"source.elixir constant.other.symbol.elixir\",\"source.elixir constant.other.keywords.elixir\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Elixir String Punctuations\",\"scope\":[\"source.elixir punctuation.definition.string\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Elixir\",\"scope\":[\"source.elixir variable.other.readwrite.module.elixir\",\"source.elixir variable.other.readwrite.module.elixir punctuation.definition.variable.elixir\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Elixir Binary Punctuations\",\"scope\":[\"source.elixir .punctuation.binary.elixir\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"Closure Constant Keyword\",\"scope\":[\"constant.keyword.clojure\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"Go Function Calls\",\"scope\":[\"source.go meta.function-call.go\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"Go Keywords\",\"scope\":[\"source.go keyword.package.go\",\"source.go keyword.import.go\",\"source.go keyword.function.go\",\"source.go keyword.type.go\",\"source.go keyword.struct.go\",\"source.go keyword.interface.go\",\"source.go keyword.const.go\",\"source.go keyword.var.go\",\"source.go keyword.map.go\",\"source.go keyword.channel.go\",\"source.go keyword.control.go\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"Go Constants e.g. nil, string format (%s, %d, etc.)\",\"scope\":[\"source.go constant.language.go\",\"source.go constant.other.placeholder.go\"],\"settings\":{\"foreground\":\"#a54a4a\"}},{\"name\":\"C++ Functions\",\"scope\":[\"entity.name.function.preprocessor.cpp\",\"entity.scope.name.cpp\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"C++ Meta Namespace\",\"scope\":[\"meta.namespace-block.cpp\"],\"settings\":{\"foreground\":\"#111111\"}},{\"name\":\"C++ Language Primitive Storage\",\"scope\":[\"storage.type.language.primitive.cpp\"],\"settings\":{\"foreground\":\"#a54a4a\"}},{\"name\":\"C++ Preprocessor Macro\",\"scope\":[\"meta.preprocessor.macro.cpp\"],\"settings\":{\"foreground\":\"#403f53\"}},{\"name\":\"C++ Variable Parameter\",\"scope\":[\"variable.parameter\"],\"settings\":{\"foreground\":\"#111111\"}},{\"name\":\"Powershell Variables\",\"scope\":[\"variable.other.readwrite.powershell\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Powershell Function\",\"scope\":[\"support.function.powershell\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"ID Attribute Name in HTML\",\"scope\":[\"entity.other.attribute-name.id.html\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"HTML Punctuation Definition Tag\",\"scope\":[\"punctuation.definition.tag.html\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"HTML Doctype\",\"scope\":[\"meta.tag.sgml.doctype.html\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"JavaScript Classes\",\"scope\":[\"meta.class entity.name.type.class.js\"],\"settings\":{\"foreground\":\"#111111\"}},{\"name\":\"JavaScript Method Declaration e.g. `constructor`\",\"scope\":[\"meta.method.declaration storage.type.js\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"JavaScript Terminator\",\"scope\":[\"terminator.js\"],\"settings\":{\"foreground\":\"#403f53\"}},{\"name\":\"JavaScript Meta Punctuation Definition\",\"scope\":[\"meta.js punctuation.definition.js\"],\"settings\":{\"foreground\":\"#403f53\"}},{\"name\":\"Entity Names in Code Documentations\",\"scope\":[\"entity.name.type.instance.jsdoc\",\"entity.name.type.instance.phpdoc\"],\"settings\":{\"foreground\":\"#4f687d\"}},{\"name\":\"Other Variables in Code Documentations\",\"scope\":[\"variable.other.jsdoc\",\"variable.other.phpdoc\"],\"settings\":{\"foreground\":\"#406c80\"}},{\"name\":\"JavaScript module imports and exports\",\"scope\":[\"variable.other.meta.import.js\",\"meta.import.js variable.other\",\"variable.other.meta.export.js\",\"meta.export.js variable.other\"],\"settings\":{\"foreground\":\"#403f53\"}},{\"name\":\"JavaScript Variable Parameter Function\",\"scope\":[\"variable.parameter.function.js\"],\"settings\":{\"foreground\":\"#5760a6\"}},{\"name\":\"JavaScript[React] Variable Other Object\",\"scope\":[\"variable.other.object.js\",\"variable.other.object.jsx\",\"variable.object.property.js\",\"variable.object.property.jsx\"],\"settings\":{\"foreground\":\"#403f53\"}},{\"name\":\"JavaScript Variables\",\"scope\":[\"variable.js\",\"variable.other.js\"],\"settings\":{\"foreground\":\"#403f53\"}},{\"name\":\"JavaScript Entity Name Type\",\"scope\":[\"entity.name.type.js\",\"entity.name.type.module.js\"],\"settings\":{\"foreground\":\"#111111\",\"fontStyle\":\"\"}},{\"name\":\"JavaScript Support Classes\",\"scope\":[\"support.class.js\"],\"settings\":{\"foreground\":\"#403f53\"}},{\"name\":\"JSON Property Names\",\"scope\":[\"support.type.property-name.json\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"JSON Support Constants\",\"scope\":[\"support.constant.json\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"JSON Property values (string)\",\"scope\":[\"meta.structure.dictionary.value.json string.quoted.double\"],\"settings\":{\"foreground\":\"#7f5889\"}},{\"name\":\"Strings in JSON values\",\"scope\":[\"string.quoted.double.json punctuation.definition.string.json\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"Specific JSON Property values like null\",\"scope\":[\"meta.structure.dictionary.json meta.structure.dictionary.value constant.language\"],\"settings\":{\"foreground\":\"#a54a4a\"}},{\"name\":\"JavaScript Other Variable\",\"scope\":[\"variable.other.object.js\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"Ruby Variables\",\"scope\":[\"variable.other.ruby\"],\"settings\":{\"foreground\":\"#403f53\"}},{\"name\":\"Ruby Class\",\"scope\":[\"entity.name.type.class.ruby\"],\"settings\":{\"foreground\":\"#9b504e\"}},{\"name\":\"Ruby Hashkeys\",\"scope\":[\"constant.language.symbol.hashkey.ruby\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"Ruby Symbols\",\"scope\":[\"constant.language.symbol.ruby\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"LESS Tag names\",\"scope\":[\"entity.name.tag.less\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"LESS Keyword Other Unit\",\"scope\":[\"keyword.other.unit.css\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"Attribute Name for LESS\",\"scope\":[\"meta.attribute-selector.less entity.other.attribute-name.attribute\"],\"settings\":{\"foreground\":\"#aa0982\"}},{\"name\":\"Markdown Headings\",\"scope\":[\"markup.heading.markdown\",\"markup.heading.setext.1.markdown\",\"markup.heading.setext.2.markdown\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Markdown Italics\",\"scope\":[\"markup.italic.markdown\"],\"settings\":{\"foreground\":\"#8d46b4\",\"fontStyle\":\"italic\"}},{\"name\":\"Markdown Bold\",\"scope\":[\"markup.bold.markdown\"],\"settings\":{\"foreground\":\"#3c63b3\",\"fontStyle\":\"bold\"}},{\"name\":\"Markdown Quote + others\",\"scope\":[\"markup.quote.markdown\"],\"settings\":{\"foreground\":\"#5e6487\"}},{\"name\":\"Markdown Raw Code + others\",\"scope\":[\"markup.inline.raw.markdown\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"Markdown Links\",\"scope\":[\"markup.underline.link.markdown\",\"markup.underline.link.image.markdown\"],\"settings\":{\"foreground\":\"#97505b\",\"fontStyle\":\"underline\"}},{\"name\":\"Markdown Link Title and Description\",\"scope\":[\"string.other.link.title.markdown\",\"string.other.link.description.markdown\"],\"settings\":{\"foreground\":\"#403f53\",\"fontStyle\":\"underline\"}},{\"name\":\"Markdown Punctuation\",\"scope\":[\"punctuation.definition.string.markdown\",\"punctuation.definition.string.begin.markdown\",\"punctuation.definition.string.end.markdown\",\"meta.link.inline.markdown punctuation.definition.string\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Markdown MetaData Punctuation\",\"scope\":[\"punctuation.definition.metadata.markdown\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"Markdown List Punctuation\",\"scope\":[\"beginning.punctuation.definition.list.markdown\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Markdown Inline Raw String\",\"scope\":[\"markup.inline.raw.string.markdown\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"PHP Variables\",\"scope\":[\"variable.other.php\",\"variable.other.property.php\"],\"settings\":{\"foreground\":\"#111111\"}},{\"name\":\"Support Classes in PHP\",\"scope\":[\"support.class.php\"],\"settings\":{\"foreground\":\"#111111\"}},{\"name\":\"Punctuations in PHP function calls\",\"scope\":[\"meta.function-call.php punctuation\"],\"settings\":{\"foreground\":\"#403f53\"}},{\"name\":\"PHP Global Variables\",\"scope\":[\"variable.other.global.php\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Declaration Punctuation in PHP Global Variables\",\"scope\":[\"variable.other.global.php punctuation.definition.variable\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Language Constants in Python\",\"scope\":[\"constant.language.python\"],\"settings\":{\"foreground\":\"#a54a4a\"}},{\"name\":\"Python Function Parameter and Arguments\",\"scope\":[\"variable.parameter.function.python\",\"meta.function-call.arguments.python\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Python Function Call\",\"scope\":[\"meta.function-call.python\",\"meta.function-call.generic.python\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"Punctuations in Python\",\"scope\":[\"punctuation.python\"],\"settings\":{\"foreground\":\"#403f53\"}},{\"name\":\"Decorator Functions in Python\",\"scope\":[\"entity.name.function.decorator.python\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Python Language Variable\",\"scope\":[\"source.python variable.language.special\"],\"settings\":{\"foreground\":\"#aa0982\"}},{\"name\":\"Python import control keyword\",\"scope\":[\"keyword.control\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"SCSS Variable\",\"scope\":[\"variable.scss\",\"variable.sass\",\"variable.parameter.url.scss\",\"variable.parameter.url.sass\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Variables in SASS At-Rules\",\"scope\":[\"source.css.scss meta.at-rule variable\",\"source.css.sass meta.at-rule variable\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"Variables in SASS At-Rules\",\"scope\":[\"source.css.scss meta.at-rule variable\",\"source.css.sass meta.at-rule variable\"],\"settings\":{\"foreground\":\"#111111\"}},{\"name\":\"Attribute Name for SASS\",\"scope\":[\"meta.attribute-selector.scss entity.other.attribute-name.attribute\",\"meta.attribute-selector.sass entity.other.attribute-name.attribute\"],\"settings\":{\"foreground\":\"#aa0982\"}},{\"name\":\"Tag names in SASS\",\"scope\":[\"entity.name.tag.scss\",\"entity.name.tag.sass\"],\"settings\":{\"foreground\":\"#097174\"}},{\"name\":\"SASS Keyword Other Unit\",\"scope\":[\"keyword.other.unit.scss\",\"keyword.other.unit.sass\"],\"settings\":{\"foreground\":\"#8d46b4\"}},{\"name\":\"TypeScript[React] Variables and Object Properties\",\"scope\":[\"variable.other.readwrite.alias.ts\",\"variable.other.readwrite.alias.tsx\",\"variable.other.readwrite.ts\",\"variable.other.readwrite.tsx\",\"variable.other.object.ts\",\"variable.other.object.tsx\",\"variable.object.property.ts\",\"variable.object.property.tsx\",\"variable.other.ts\",\"variable.other.tsx\",\"variable.tsx\",\"variable.ts\"],\"settings\":{\"foreground\":\"#403f53\"}},{\"name\":\"TypeScript[React] Entity Name Types\",\"scope\":[\"entity.name.type.ts\",\"entity.name.type.tsx\"],\"settings\":{\"foreground\":\"#111111\"}},{\"name\":\"TypeScript[React] Node Classes\",\"scope\":[\"support.class.node.ts\",\"support.class.node.tsx\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"TypeScript[React] Entity Name Types as Parameters\",\"scope\":[\"meta.type.parameters.ts entity.name.type\",\"meta.type.parameters.tsx entity.name.type\"],\"settings\":{\"foreground\":\"#4f687d\"}},{\"name\":\"TypeScript[React] Import/Export Punctuations\",\"scope\":[\"meta.import.ts punctuation.definition.block\",\"meta.import.tsx punctuation.definition.block\",\"meta.export.ts punctuation.definition.block\",\"meta.export.tsx punctuation.definition.block\"],\"settings\":{\"foreground\":\"#403f53\"}},{\"name\":\"TypeScript[React] Punctuation Decorators\",\"scope\":[\"meta.decorator punctuation.decorator.ts\",\"meta.decorator punctuation.decorator.tsx\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"TypeScript[React] Punctuation Decorators\",\"scope\":[\"meta.tag.js meta.jsx.children.tsx\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"YAML Entity Name Tags\",\"scope\":[\"entity.name.tag.yaml\"],\"settings\":{\"foreground\":\"#111111\"}},{\"name\":\"JavaScript Variable Other ReadWrite\",\"scope\":[\"variable.other.readwrite.js\",\"variable.parameter\"],\"settings\":{\"foreground\":\"#403f53\"}},{\"name\":\"Support Class Component\",\"scope\":[\"support.class.component.js\",\"support.class.component.tsx\"],\"settings\":{\"foreground\":\"#aa0982\",\"fontStyle\":\"\"}},{\"name\":\"Text nested in React tags\",\"scope\":[\"meta.jsx.children\",\"meta.jsx.children.js\",\"meta.jsx.children.tsx\"],\"settings\":{\"foreground\":\"#403f53\"}},{\"name\":\"TypeScript Classes\",\"scope\":[\"meta.class entity.name.type.class.tsx\"],\"settings\":{\"foreground\":\"#111111\"}},{\"name\":\"TypeScript Entity Name Type\",\"scope\":[\"entity.name.type.tsx\",\"entity.name.type.module.tsx\"],\"settings\":{\"foreground\":\"#111111\"}},{\"name\":\"TypeScript Class Variable Keyword\",\"scope\":[\"meta.class.ts meta.var.expr.ts storage.type.ts\",\"meta.class.tsx meta.var.expr.tsx storage.type.tsx\"],\"settings\":{\"foreground\":\"#79598f\"}},{\"name\":\"TypeScript Method Declaration e.g. `constructor`\",\"scope\":[\"meta.method.declaration storage.type.ts\",\"meta.method.declaration storage.type.tsx\"],\"settings\":{\"foreground\":\"#3c63b3\"}},{\"name\":\"normalize font style of certain components\",\"scope\":[\"meta.property-list.css meta.property-value.css variable.other.less\",\"meta.property-list.scss variable.scss\",\"meta.property-list.sass variable.sass\",\"meta.brace\",\"keyword.operator.operator\",\"keyword.operator.or.regexp\",\"keyword.operator.expression.in\",\"keyword.operator.relational\",\"keyword.operator.assignment\",\"keyword.operator.comparison\",\"keyword.operator.type\",\"keyword.operator\",\"keyword\",\"punctuation.definintion.string\",\"punctuation\",\"variable.other.readwrite.js\",\"storage.type\",\"source.css\",\"string.quoted\"],\"settings\":{\"fontStyle\":\"\"}}],\"styleOverrides\":{}}],\"defaultLocale\":\"en\",\"cascadeLayer\":\"starlight.components\",\"styleOverrides\":{\"borderRadius\":\"0px\",\"borderWidth\":\"1px\",\"codePaddingBlock\":\"0.75rem\",\"codePaddingInline\":\"1rem\",\"codeFontFamily\":\"var(--__sl-font-mono)\",\"codeFontSize\":\"var(--sl-text-code)\",\"codeLineHeight\":\"var(--sl-line-height)\",\"uiFontFamily\":\"var(--__sl-font)\",\"textMarkers\":{\"lineDiffIndicatorMarginLeft\":\"0.25rem\",\"defaultChroma\":\"45\",\"backgroundOpacity\":\"60%\"}},\"plugins\":[{\"name\":\"Starlight Plugin\",\"hooks\":{}},{\"name\":\"astro-expressive-code\",\"hooks\":{}}]}]],\"remarkRehype\":{},\"gfm\":true,\"smartypants\":true},\"security\":{\"checkOrigin\":true},\"env\":{\"schema\":{},\"validateSecrets\":false},\"experimental\":{\"clientPrerender\":false,\"contentIntellisense\":false,\"headingIdCompat\":false,\"preserveScriptOrder\":false,\"liveContentCollections\":false,\"csp\":false,\"staticImportMetaEnv\":false,\"chromeDevtoolsWorkspace\":false,\"failOnPrerenderConflict\":false},\"legacy\":{\"collections\":false},\"prefetch\":{\"prefetchAll\":true},\"i18n\":{\"defaultLocale\":\"en\",\"locales\":[\"en\"],\"routing\":{\"prefixDefaultLocale\":false,\"redirectToDefaultLocale\":false,\"fallbackType\":\"redirect\"}}}","docs",["Map",11,12,51,52,134,135,217,218,352,353,447,448,466,467,505,506,545,546,585,586,630,631,664,665,694,695,732,733,764,765,804,805,865,866,900,901],"index",{"id":11,"data":13,"body":23,"filePath":24,"digest":25,"rendered":26},{"title":14,"editUrl":15,"head":16,"template":17,"sidebar":18,"pagefind":15,"draft":21},"Fairspec Python",true,[],"doc",{"order":19,"label":20,"hidden":21,"attrs":22},1,"Getting Started",false,{},"This guide will help you get started with Fairspec Python. If you are new to the core framework's technologies, please take a look at the [Fairspec standard](https://fairspec.org/) and [Polars DataFrames](https://pola.rs/) documentation.\n\n## Runtimes\n\n> [!TIP]\n> - It is possible to use Fairspec Python in [Jupyter Notebooks](/python/jupyter)!\n\nFairspec Python requires:\n\n- **Python 3.12+**\n\n## Installation\n\nThe framework can be installed as one package:\n\n```bash\npip install fairspec\n```\n\nYou can cherry-pick from individual packages:\n\n```bash\npip install fairspec-metadata fairspec-table\n```\n\n## Type Hints\n\nFairspec Python is built with type safety in mind. It uses Python type hints to provide type definitions for all packages and to enforce type safety throughout the framework. It's highly recommended to use a type-aware editor such as VS Code with Pylance or PyCharm to work with the project.\n\n## Examples\n\nLoading a Dataset from Zenodo merging system Zenodo metadata into a user dataset and validating its metadata:\n\n```python\nfrom fairspec import load_dataset\n\ndataset = load_dataset(\"https://zenodo.org/records/10053903\")\n\nprint(dataset)\n# {\n# \"id\": \"https://doi.org/10.5281/zenodo.10053903\",\n# ...\n# }\n```\n\nValidating an in-memory dataset descriptor:\n\n```python\nfrom fairspec import validate_dataset\n\nreport = validate_dataset({\"resources\": \"bad\"})\n\nprint(report.valid)\n# False\nprint(report.errors)\n# [\n# {\n# \"type\": \"metadata\",\n# \"message\": \"must have type array\",\n# \"jsonPointer\": \"/resources\",\n# }\n# ]\n```\n\nLoading a dataset from a remote descriptor and saving it locally as a zip archive, and then using it as a local dataset:\n\n```python\nfrom fairspec import (\n load_dataset,\n load_dataset_from_zip,\n save_dataset_to_zip,\n get_temp_file_path,\n)\n\narchive_path = get_temp_file_path()\nsource_dataset = load_dataset(\n \"https://raw.githubusercontent.com/roll/currency-codes/refs/heads/master/datapackage.json\",\n)\n\nsave_dataset_to_zip(source_dataset, archive_path=archive_path)\ntarget_dataset = load_dataset_from_zip(archive_path)\nprint(target_dataset)\n```\n\nReading a CSV table:\n\n```python\nfrom fairspec import load_table, Resource\nfrom fairspec_metadata import CsvFileDialect\n\ntable = load_table(Resource(data=\"data.csv\"))\n\n# Load with custom format\ntable = load_table(Resource(\n data=\"data.csv\",\n fileDialect=CsvFileDialect(\n delimiter=\";\",\n headerRows=[1],\n ),\n))\n```\n\n## Reference\n\nNote that `fairspec` and `fairspec-library` packages re-export most of the functionality.","content/docs/index.md","77c2dc11739dcb9f",{"html":27,"metadata":28},"\u003Cp>This guide will help you get started with Fairspec Python. If you are new to the core framework’s technologies, please take a look at the \u003Ca href=\"https://fairspec.org/\">Fairspec standard\u003C/a> and \u003Ca href=\"https://pola.rs/\">Polars DataFrames\u003C/a> documentation.\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"runtimes\">Runtimes\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#runtimes\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Runtimes”\u003C/span>\u003C/a>\u003C/div>\n\u003Caside aria-label=\"Tip\" class=\"starlight-aside starlight-aside--tip\">\u003Cp class=\"starlight-aside__title\" aria-hidden=\"true\">\u003Csvg viewBox=\"0 0 24 24\" width=\"16\" height=\"16\" fill=\"currentColor\" class=\"starlight-aside__icon\">\u003Cpath fill-rule=\"evenodd\" clip-rule=\"evenodd\" d=\"M1.43909 8.85483L1.44039 8.85354L4.96668 5.33815C5.30653 4.99386 5.7685 4.79662 6.2524 4.78972L6.26553 4.78963L12.9014 4.78962L13.8479 3.84308C16.9187 0.772319 20.0546 0.770617 21.4678 0.975145C21.8617 1.02914 22.2271 1.21053 22.5083 1.4917C22.7894 1.77284 22.9708 2.13821 23.0248 2.53199C23.2294 3.94517 23.2278 7.08119 20.1569 10.1521L19.2107 11.0983V17.7338L19.2106 17.7469C19.2037 18.2308 19.0067 18.6933 18.6624 19.0331L15.1456 22.5608C14.9095 22.7966 14.6137 22.964 14.29 23.0449C13.9663 23.1259 13.6267 23.1174 13.3074 23.0204C12.9881 22.9235 12.7011 22.7417 12.4771 22.4944C12.2533 22.2473 12.1006 21.9441 12.0355 21.6171L11.1783 17.3417L6.65869 12.822L4.34847 12.3589L2.38351 11.965C2.05664 11.8998 1.75272 11.747 1.50564 11.5232C1.25835 11.2992 1.07653 11.0122 0.979561 10.6929C0.882595 10.3736 0.874125 10.034 0.955057 9.7103C1.03599 9.38659 1.20328 9.09092 1.43909 8.85483ZM6.8186 10.8724L2.94619 10.096L6.32006 6.73268H10.9583L6.8186 10.8724ZM15.2219 5.21703C17.681 2.75787 20.0783 2.75376 21.1124 2.8876C21.2462 3.92172 21.2421 6.31895 18.783 8.77812L12.0728 15.4883L8.51172 11.9272L15.2219 5.21703ZM13.9042 21.0538L13.1279 17.1811L17.2676 13.0414V17.68L13.9042 21.0538Z\">\u003C/path>\u003Cpath d=\"M9.31827 18.3446C9.45046 17.8529 9.17864 17.3369 8.68945 17.1724C8.56178 17.1294 8.43145 17.1145 8.30512 17.1243C8.10513 17.1398 7.91519 17.2172 7.76181 17.3434C7.62613 17.455 7.51905 17.6048 7.45893 17.7835C6.97634 19.2186 5.77062 19.9878 4.52406 20.4029C4.08525 20.549 3.6605 20.644 3.29471 20.7053C3.35607 20.3395 3.45098 19.9148 3.59711 19.476C4.01221 18.2294 4.78141 17.0237 6.21648 16.5411C6.39528 16.481 6.54504 16.3739 6.65665 16.2382C6.85126 16.0016 6.92988 15.678 6.84417 15.3647C6.83922 15.3466 6.83373 15.3286 6.82767 15.3106C6.74106 15.053 6.55701 14.8557 6.33037 14.7459C6.10949 14.6389 5.84816 14.615 5.59715 14.6994C5.47743 14.7397 5.36103 14.7831 5.24786 14.8294C3.22626 15.6569 2.2347 17.4173 1.75357 18.8621C1.49662 19.6337 1.36993 20.3554 1.30679 20.8818C1.27505 21.1464 1.25893 21.3654 1.25072 21.5213C1.24662 21.5993 1.24448 21.6618 1.24337 21.7066L1.243 21.7226L1.24235 21.7605L1.2422 21.7771L1.24217 21.7827L1.24217 21.7856C1.24217 22.3221 1.67703 22.7579 2.2137 22.7579L2.2155 22.7579L2.22337 22.7578L2.23956 22.7577C2.25293 22.7575 2.27096 22.7572 2.29338 22.7567C2.33821 22.7555 2.40073 22.7534 2.47876 22.7493C2.63466 22.7411 2.85361 22.725 3.11822 22.6932C3.64462 22.6301 4.36636 22.5034 5.13797 22.2464C6.58274 21.7653 8.3431 20.7738 9.17063 18.7522C9.21696 18.639 9.26037 18.5226 9.30064 18.4029C9.30716 18.3835 9.31304 18.364 9.31827 18.3446Z\">\u003C/path>\u003C/svg>Tip\u003C/p>\u003Cdiv class=\"starlight-aside__content\">\u003Cp>\u003C/p>\u003Cul>\n\u003Cli>It is possible to use Fairspec Python in \u003Ca href=\"/python/jupyter\">Jupyter Notebooks\u003C/a>!\u003C/li>\n\u003C/ul>\u003C/div>\u003C/aside>\n\u003Cp>Fairspec Python requires:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Cstrong>Python 3.12+\u003C/strong>\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"installation\">Installation\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#installation\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Installation”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The framework can be installed as one package:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Clink rel=\"stylesheet\" href=\"/_astro/ec.8ol6u.css\">\u003Cscript type=\"module\" src=\"/_astro/ec.0vx5m.js\">\u003C/script>\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">pip\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">install\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">fairspec\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"pip install fairspec\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>You can cherry-pick from individual packages:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">pip\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">install\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">fairspec-metadata\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">fairspec-table\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"pip install fairspec-metadata fairspec-table\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"type-hints\">Type Hints\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#type-hints\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Type Hints”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Fairspec Python is built with type safety in mind. It uses Python type hints to provide type definitions for all packages and to enforce type safety throughout the framework. It’s highly recommended to use a type-aware editor such as VS Code with Pylance or PyCharm to work with the project.\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"examples\">Examples\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#examples\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Examples”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Loading a Dataset from Zenodo merging system Zenodo metadata into a user dataset and validating its metadata:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">dataset \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://zenodo.org/records/10053903\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">print\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># {\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># \"id\": \"https://doi.org/10.5281/zenodo.10053903\",\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># ...\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># }\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_datasetdataset = load_dataset("https://zenodo.org/records/10053903")print(dataset)# {# "id": "https://doi.org/10.5281/zenodo.10053903",# ...# }\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>Validating an in-memory dataset descriptor:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> validate_dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">report \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">validate_dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">resources\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">bad\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">}\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">print\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">report.valid\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># False\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">print\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">report.errors\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># [\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># {\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># \"type\": \"metadata\",\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># \"message\": \"must have type array\",\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># \"jsonPointer\": \"/resources\",\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># }\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># ]\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import validate_datasetreport = validate_dataset({"resources": "bad"})print(report.valid)# Falseprint(report.errors)# [# {# "type": "metadata",# "message": "must have type array",# "jsonPointer": "/resources",# }# ]\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>Loading a dataset from a remote descriptor and saving it locally as a zip archive, and then using it as a local dataset:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> (\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">load_dataset,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">load_dataset_from_zip,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">save_dataset_to_zip,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">get_temp_file_path,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">archive_path \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">get_temp_file_path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">source_dataset \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://raw.githubusercontent.com/roll/currency-codes/refs/heads/master/datapackage.json\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">save_dataset_to_zip\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">source_dataset\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">archive_path\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">archive_path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">target_dataset \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_dataset_from_zip\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">archive_path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">print\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">target_dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import ( load_dataset, load_dataset_from_zip, save_dataset_to_zip, get_temp_file_path,)archive_path = get_temp_file_path()source_dataset = load_dataset( "https://raw.githubusercontent.com/roll/currency-codes/refs/heads/master/datapackage.json",)save_dataset_to_zip(source_dataset, archive_path=archive_path)target_dataset = load_dataset_from_zip(archive_path)print(target_dataset)\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>Reading a CSV table:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> CsvFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load with custom format\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">CsvFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">delimiter\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">;\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">headerRows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_table, Resourcefrom fairspec_metadata import CsvFileDialecttable = load_table(Resource(data="data.csv"))# Load with custom formattable = load_table(Resource( data="data.csv", fileDialect=CsvFileDialect( delimiter=";", headerRows=[1], ),))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"reference\">Reference\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#reference\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Reference”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Note that \u003Ccode dir=\"auto\">fairspec\u003C/code> and \u003Ccode dir=\"auto\">fairspec-library\u003C/code> packages re-export most of the functionality.\u003C/p>",{"headings":29,"localImagePaths":46,"remoteImagePaths":47,"frontmatter":48,"imagePaths":50},[30,34,37,40,43],{"depth":31,"slug":32,"text":33},2,"runtimes","Runtimes",{"depth":31,"slug":35,"text":36},"installation","Installation",{"depth":31,"slug":38,"text":39},"type-hints","Type Hints",{"depth":31,"slug":41,"text":42},"examples","Examples",{"depth":31,"slug":44,"text":45},"reference","Reference",[],[],{"title":14,"sidebar":49},{"order":19,"label":20},[],"terminal/file",{"id":51,"data":53,"body":60,"filePath":61,"digest":62,"rendered":63},{"title":54,"editUrl":15,"head":55,"template":17,"sidebar":56,"pagefind":15,"draft":21},"Working with Files in Terminal",[],{"order":57,"label":58,"hidden":21,"attrs":59},4,"File",{},"File operations for copying, describing, validating, and analyzing local or remote files.\n\n## Available Commands\n\nThe `fairspec file` command provides utilities for working with files:\n\n- `copy` - Copy local or remote files\n- `describe` - Get file statistics and metadata\n- `validate` - Validate file integrity\n- `infer-dialect` - Infer file dialect\n\n## Copy Files\n\nCopy files from local or remote sources to a local destination:\n\n```bash\n# Copy a local file\nfairspec file copy data.csv --to-path output.csv\n\n# Copy a remote file\nfairspec file copy https://example.com/data.csv --to-path local-data.csv\n\n# Copy from a dataset resource\nfairspec file copy --from-dataset dataset.json --from-resource users --to-path users.csv\n```\n\n### Options\n\n- `--to-path \u003Cpath>` (required) - Local output path\n- `--from-dataset \u003Cpath>` - Load file from dataset descriptor\n- `--from-resource \u003Cname>` - Specify resource name from dataset\n- `--silent` - Suppress output messages\n- `--debug` - Show debug information\n- `--json` - Output as JSON\n\n## Describe Files\n\nGet detailed information about a file including size, type, and checksums:\n\n```bash\n# Describe a local file\nfairspec file describe data.csv\n\n# Describe with specific hash type\nfairspec file describe data.csv --hash-type sha256\n\n# Describe a remote file\nfairspec file describe https://example.com/data.csv\n\n# Describe from a dataset\nfairspec file describe --from-dataset dataset.json --from-resource users\n```\n\n### Output\n\nThe describe command returns:\n- `bytes` - File size in bytes\n- `textual` - Whether the file is text-based\n- `integrity` - Hash value and type\n\n### Options\n\n- `--hash-type \u003Ctype>` - Hash algorithm to use\n - Choices: `md5`, `sha1`, `sha256` (default), `sha512`\n- `--from-dataset \u003Cpath>` - Load file from dataset descriptor\n- `--from-resource \u003Cname>` - Specify resource name from dataset\n- `--silent` - Suppress output messages\n- `--debug` - Show debug information\n- `--json` - Output as JSON\n\n### Example Output\n\n```json\n{\n \"bytes\": 1024,\n \"textual\": true,\n \"integrity\": {\n \"type\": \"sha256\",\n \"hash\": \"a1b2c3d4e5f6...\"\n }\n}\n```\n\n## Validate Files\n\nValidate file integrity using checksums:\n\n```bash\n# Validate with expected hash\nfairspec file validate data.csv --hash a1b2c3d4e5f6 --hash-type sha256\n\n# Validate using MD5\nfairspec file validate data.csv --hash 098f6bcd4621 --hash-type md5\n\n# Output as JSON for automation\nfairspec file validate data.csv --hash a1b2c3d4 --json\n```\n\n### Options\n\n- `--hash \u003Chash>` - Expected file hash\n- `--hash-type \u003Ctype>` - Hash algorithm to use (default: `md5`)\n - Choices: `md5`, `sha1`, `sha256`, `sha512`\n- `--silent` - Suppress output messages\n- `--debug` - Show debug information\n- `--json` - Output as JSON\n\n### Validation Report\n\nReturns a validation report with:\n- `valid` - Boolean indicating if validation passed\n- `errors` - Array of validation errors (if any)\n\nExample error:\n```json\n{\n \"valid\": false,\n \"errors\": [\n {\n \"type\": \"file/integrity\",\n \"hashType\": \"sha256\",\n \"expectedHash\": \"a1b2c3d4e5f6...\",\n \"actualHash\": \"different...\"\n }\n ]\n}\n```\n\n## Infer File Dialect\n\nAutomatically detect the dialect of a file:\n\n```bash\n# Infer dialect from file\nfairspec file infer-dialect data.csv\n\n# Infer from remote file\nfairspec file infer-dialect https://example.com/data.json\n\n# Output as JSON\nfairspec file infer-dialect data.xlsx --json\n```\n\n### Options\n\n- `--silent` - Suppress output messages\n- `--debug` - Show debug information\n- `--json` - Output as JSON\n\n### Supported Formats\n\nThe command can detect:\n- CSV/TSV files\n- JSON/JSONL files\n- Excel files (.xlsx, .xls)\n- OpenDocument Spreadsheet (.ods)\n- Parquet files\n- Arrow/Feather files\n- SQLite databases\n\n## Working with Datasets\n\nAll file commands support loading files from dataset descriptors:\n\n```bash\n# Describe a resource from a dataset\nfairspec file describe --from-dataset dataset.json --from-resource sales-data\n\n# Copy a resource from a dataset\nfairspec file copy --from-dataset dataset.json --from-resource users --to-path users.csv\n\n# Validate a resource from a dataset\nfairspec file validate --from-dataset dataset.json --from-resource products --hash abc123\n```\n\n## Output Formats\n\n### Text Output (default)\n\nHuman-readable output with colors and formatting:\n\n```bash\nfairspec file describe data.csv\n```\n\n### JSON Output\n\nMachine-readable JSON for automation and scripting:\n\n```bash\nfairspec file describe data.csv --json\n```\n\n### Silent Mode\n\nSuppress all output except errors:\n\n```bash\nfairspec file copy data.csv --to-path output.csv --silent\n```\n\n## Examples\n\n### Copy and Validate\n\n```bash\n# Copy a file and get its hash\nfairspec file copy remote-data.csv --to-path local-data.csv\nfairspec file describe local-data.csv --hash-type sha256\n\n# Validate the copied file\nfairspec file validate local-data.csv --hash \u003Chash-from-describe> --hash-type sha256\n```\n\n### Process Dataset Resources\n\n```bash\n# Describe all details of a dataset resource\nfairspec file describe --from-dataset dataset.json --from-resource sales\n\n# Copy the resource locally\nfairspec file copy --from-dataset dataset.json --from-resource sales --to-path sales.csv\n\n# Infer its dialect\nfairspec file infer-dialect sales.csv\n```\n\n### Automation with JSON\n\n```bash\n# Get file info as JSON for scripting\nINFO=$(fairspec file describe data.csv --json)\nHASH=$(echo $INFO | jq -r '.integrity.hash')\n\n# Use in validation\nfairspec file validate data.csv --hash $HASH --hash-type sha256\n```","content/docs/terminal/file.md","5786085547b8aa05",{"html":64,"metadata":65},"\u003Cp>File operations for copying, describing, validating, and analyzing local or remote files.\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"available-commands\">Available Commands\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#available-commands\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Available Commands”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The \u003Ccode dir=\"auto\">fairspec file\u003C/code> command provides utilities for working with files:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">copy\u003C/code> - Copy local or remote files\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">describe\u003C/code> - Get file statistics and metadata\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">validate\u003C/code> - Validate file integrity\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">infer-dialect\u003C/code> - Infer file dialect\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"copy-files\">Copy Files\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#copy-files\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Copy Files”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Copy files from local or remote sources to a local destination:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Clink rel=\"stylesheet\" href=\"/_astro/ec.8ol6u.css\">\u003Cscript type=\"module\" src=\"/_astro/ec.0vx5m.js\">\u003C/script>\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Copy a local file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">copy\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">output.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Copy a remote file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">copy\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">https://example.com/data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">local-data.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Copy from a dataset resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">copy\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">users\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">users.csv\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec file copy data.csv --to-path output.csvfairspec file copy https://example.com/data.csv --to-path local-data.csvfairspec file copy --from-dataset dataset.json --from-resource users --to-path users.csv\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"options\">Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#options\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--to-path <path>\u003C/code> (required) - Local output path\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--from-dataset <path>\u003C/code> - Load file from dataset descriptor\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--from-resource <name>\u003C/code> - Specify resource name from dataset\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--silent\u003C/code> - Suppress output messages\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--debug\u003C/code> - Show debug information\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--json\u003C/code> - Output as JSON\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"describe-files\">Describe Files\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#describe-files\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Describe Files”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Get detailed information about a file including size, type, and checksums:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Describe a local file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Describe with specific hash type\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--hash-type\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">sha256\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Describe a remote file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">https://example.com/data.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Describe from a dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">users\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec file describe data.csvfairspec file describe data.csv --hash-type sha256fairspec file describe https://example.com/data.csvfairspec file describe --from-dataset dataset.json --from-resource users\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"output\">Output\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#output\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Output”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The describe command returns:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">bytes\u003C/code> - File size in bytes\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">textual\u003C/code> - Whether the file is text-based\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">integrity\u003C/code> - Hash value and type\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"options-1\">Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#options-1\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--hash-type <type>\u003C/code> - Hash algorithm to use\n\u003Cul>\n\u003Cli>Choices: \u003Ccode dir=\"auto\">md5\u003C/code>, \u003Ccode dir=\"auto\">sha1\u003C/code>, \u003Ccode dir=\"auto\">sha256\u003C/code> (default), \u003Ccode dir=\"auto\">sha512\u003C/code>\u003C/li>\n\u003C/ul>\n\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--from-dataset <path>\u003C/code> - Load file from dataset descriptor\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--from-resource <name>\u003C/code> - Specify resource name from dataset\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--silent\u003C/code> - Suppress output messages\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--debug\u003C/code> - Show debug information\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--json\u003C/code> - Output as JSON\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"example-output\">Example Output\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#example-output\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Example Output”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"json\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"bytes\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1024\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"textual\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#FF5874;--1:#A54A4A\">true\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"integrity\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: {\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">sha256\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"hash\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">a1b2c3d4e5f6...\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"{ "bytes": 1024, "textual": true, "integrity": { "type": "sha256", "hash": "a1b2c3d4e5f6..." }}\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"validate-files\">Validate Files\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#validate-files\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Validate Files”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Validate file integrity using checksums:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate with expected hash\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--hash\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">a1b2c3d4e5f6\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--hash-type\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">sha256\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate using MD5\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--hash\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">098f6bcd4621\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--hash-type\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">md5\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Output as JSON for automation\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--hash\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">a1b2c3d4\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec file validate data.csv --hash a1b2c3d4e5f6 --hash-type sha256fairspec file validate data.csv --hash 098f6bcd4621 --hash-type md5fairspec file validate data.csv --hash a1b2c3d4 --json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"options-2\">Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#options-2\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--hash <hash>\u003C/code> - Expected file hash\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--hash-type <type>\u003C/code> - Hash algorithm to use (default: \u003Ccode dir=\"auto\">md5\u003C/code>)\n\u003Cul>\n\u003Cli>Choices: \u003Ccode dir=\"auto\">md5\u003C/code>, \u003Ccode dir=\"auto\">sha1\u003C/code>, \u003Ccode dir=\"auto\">sha256\u003C/code>, \u003Ccode dir=\"auto\">sha512\u003C/code>\u003C/li>\n\u003C/ul>\n\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--silent\u003C/code> - Suppress output messages\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--debug\u003C/code> - Show debug information\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--json\u003C/code> - Output as JSON\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"validation-report\">Validation Report\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#validation-report\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Validation Report”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Returns a validation report with:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">valid\u003C/code> - Boolean indicating if validation passed\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">errors\u003C/code> - Array of validation errors (if any)\u003C/li>\n\u003C/ul>\n\u003Cp>Example error:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"json\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"valid\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#FF5874;--1:#A54A4A\">false\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"errors\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: [\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">file/integrity\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"hashType\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">sha256\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"expectedHash\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">a1b2c3d4e5f6...\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"actualHash\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">different...\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"{ "valid": false, "errors": [ { "type": "file/integrity", "hashType": "sha256", "expectedHash": "a1b2c3d4e5f6...", "actualHash": "different..." } ]}\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"infer-file-dialect\">Infer File Dialect\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#infer-file-dialect\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Infer File Dialect”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Automatically detect the dialect of a file:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Infer dialect from file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-dialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Infer from remote file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-dialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">https://example.com/data.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Output as JSON\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-dialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.xlsx\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec file infer-dialect data.csvfairspec file infer-dialect https://example.com/data.jsonfairspec file infer-dialect data.xlsx --json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"options-3\">Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#options-3\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--silent\u003C/code> - Suppress output messages\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--debug\u003C/code> - Show debug information\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--json\u003C/code> - Output as JSON\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"supported-formats\">Supported Formats\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#supported-formats\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Supported Formats”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The command can detect:\u003C/p>\n\u003Cul>\n\u003Cli>CSV/TSV files\u003C/li>\n\u003Cli>JSON/JSONL files\u003C/li>\n\u003Cli>Excel files (.xlsx, .xls)\u003C/li>\n\u003Cli>OpenDocument Spreadsheet (.ods)\u003C/li>\n\u003Cli>Parquet files\u003C/li>\n\u003Cli>Arrow/Feather files\u003C/li>\n\u003Cli>SQLite databases\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"working-with-datasets\">Working with Datasets\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#working-with-datasets\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Working with Datasets”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>All file commands support loading files from dataset descriptors:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Describe a resource from a dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">sales-data\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Copy a resource from a dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">copy\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">users\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">users.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate a resource from a dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">products\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--hash\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">abc123\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec file describe --from-dataset dataset.json --from-resource sales-datafairspec file copy --from-dataset dataset.json --from-resource users --to-path users.csvfairspec file validate --from-dataset dataset.json --from-resource products --hash abc123\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"output-formats\">Output Formats\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#output-formats\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Output Formats”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"text-output-default\">Text Output (default)\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#text-output-default\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Text Output (default)”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Human-readable output with colors and formatting:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec file describe data.csv\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"json-output\">JSON Output\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#json-output\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “JSON Output”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Machine-readable JSON for automation and scripting:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec file describe data.csv --json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"silent-mode\">Silent Mode\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#silent-mode\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Silent Mode”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Suppress all output except errors:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">copy\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">output.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--silent\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec file copy data.csv --to-path output.csv --silent\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"examples\">Examples\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#examples\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Examples”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"copy-and-validate\">Copy and Validate\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#copy-and-validate\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Copy and Validate”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Copy a file and get its hash\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">copy\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">remote-data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">local-data.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">local-data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--hash-type\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">sha256\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate the copied file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">local-data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--hash\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\"><hash-from-describe>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--hash-type\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">sha256\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec file copy remote-data.csv --to-path local-data.csvfairspec file describe local-data.csv --hash-type sha256fairspec file validate local-data.csv --hash \u003Chash-from-describe> --hash-type sha256\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"process-dataset-resources\">Process Dataset Resources\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#process-dataset-resources\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Process Dataset Resources”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Describe all details of a dataset resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">sales\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Copy the resource locally\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">copy\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">sales\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">sales.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Infer its dialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-dialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">sales.csv\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec file describe --from-dataset dataset.json --from-resource salesfairspec file copy --from-dataset dataset.json --from-resource sales --to-path sales.csvfairspec file infer-dialect sales.csv\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"automation-with-json\">Automation with JSON\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#automation-with-json\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Automation with JSON”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Get file info as JSON for scripting\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">INFO\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">$(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">HASH\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">$(\u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">echo\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">$INFO\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">|\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">jq\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">-r\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">'\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">.integrity.hash\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">'\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Use in validation\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--hash\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">$HASH\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--hash-type\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">sha256\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"INFO=$(fairspec file describe data.csv --json)HASH=$(echo $INFO | jq -r '.integrity.hash')fairspec file validate data.csv --hash $HASH --hash-type sha256\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>",{"headings":66,"localImagePaths":129,"remoteImagePaths":130,"frontmatter":131,"imagePaths":133},[67,70,73,77,80,83,85,88,91,93,96,99,101,104,107,110,113,116,119,120,123,126],{"depth":31,"slug":68,"text":69},"available-commands","Available Commands",{"depth":31,"slug":71,"text":72},"copy-files","Copy Files",{"depth":74,"slug":75,"text":76},3,"options","Options",{"depth":31,"slug":78,"text":79},"describe-files","Describe Files",{"depth":74,"slug":81,"text":82},"output","Output",{"depth":74,"slug":84,"text":76},"options-1",{"depth":74,"slug":86,"text":87},"example-output","Example Output",{"depth":31,"slug":89,"text":90},"validate-files","Validate Files",{"depth":74,"slug":92,"text":76},"options-2",{"depth":74,"slug":94,"text":95},"validation-report","Validation Report",{"depth":31,"slug":97,"text":98},"infer-file-dialect","Infer File Dialect",{"depth":74,"slug":100,"text":76},"options-3",{"depth":74,"slug":102,"text":103},"supported-formats","Supported Formats",{"depth":31,"slug":105,"text":106},"working-with-datasets","Working with Datasets",{"depth":31,"slug":108,"text":109},"output-formats","Output Formats",{"depth":74,"slug":111,"text":112},"text-output-default","Text Output (default)",{"depth":74,"slug":114,"text":115},"json-output","JSON Output",{"depth":74,"slug":117,"text":118},"silent-mode","Silent Mode",{"depth":31,"slug":41,"text":42},{"depth":74,"slug":121,"text":122},"copy-and-validate","Copy and Validate",{"depth":74,"slug":124,"text":125},"process-dataset-resources","Process Dataset Resources",{"depth":74,"slug":127,"text":128},"automation-with-json","Automation with JSON",[],[],{"title":54,"sidebar":132},{"order":57,"label":58},[],"terminal/data",{"id":134,"data":136,"body":142,"filePath":143,"digest":144,"rendered":145},{"title":137,"editUrl":15,"head":138,"template":17,"sidebar":139,"pagefind":15,"draft":21},"Working with JSON Data in Terminal",[],{"order":74,"label":140,"hidden":21,"attrs":141},"Data",{},"JSON data validation and schema operations using JSON Schema standards.\n\n## Available Commands\n\nThe `fairspec data` command provides utilities for working with JSON data:\n\n- `validate` - Validate JSON data against a Data Schema (JSON Schema)\n- `infer-schema` - Automatically generate a Data Schema from JSON data\n- `validate-schema` - Validate a Data Schema itself\n- `infer-dialect` - Infer file dialect\n\n## Validate JSON Data\n\nValidate JSON data files against a Data Schema (JSON Schema):\n\n```bash\n# Validate JSON data with a schema\nfairspec data validate data.json --schema schema.json\n\n# Validate from a remote source\nfairspec data validate https://example.com/data.json --schema schema.json\n\n# Output validation report as JSON\nfairspec data validate data.json --schema schema.json --json\n```\n\n### Options\n\n- `--schema \u003Cpath>` (required) - Path to a Data Schema descriptor (JSON Schema)\n- `--silent` - Suppress output messages\n- `--debug` - Show debug information\n- `--json` - Output as JSON\n\n### Validation Report\n\nReturns a validation report with:\n- `valid` - Boolean indicating if validation passed\n- `errors` - Array of validation errors (if any)\n\nExample validation errors:\n```json\n{\n \"valid\": false,\n \"errors\": [\n {\n \"type\": \"data\",\n \"instancePath\": \"/users/0/email\",\n \"schemaPath\": \"#/properties/users/items/properties/email/format\",\n \"keyword\": \"format\",\n \"message\": \"must match format \\\"email\\\"\"\n }\n ]\n}\n```\n\n### Example Usage\n\nCreate a JSON Schema file (`user-schema.json`):\n```json\n{\n \"$schema\": \"https://json-schema.org/draft/2020-12/schema\",\n \"type\": \"object\",\n \"properties\": {\n \"name\": { \"type\": \"string\" },\n \"email\": { \"type\": \"string\", \"format\": \"email\" },\n \"age\": { \"type\": \"integer\", \"minimum\": 0 }\n },\n \"required\": [\"name\", \"email\"]\n}\n```\n\nValidate data against the schema:\n```bash\nfairspec data validate user.json --schema user-schema.json\n```\n\n## Infer Data Schema\n\nAutomatically generate a Data Schema (JSON Schema) from JSON data:\n\n```bash\n# Infer schema from local file\nfairspec data infer-schema data.json\n\n# Infer schema from remote file\nfairspec data infer-schema https://example.com/data.json\n\n# Save inferred schema to file\nfairspec data infer-schema data.json --json > schema.json\n\n# Output for human reading\nfairspec data infer-schema data.json\n```\n\n### Options\n\n- `--silent` - Suppress output messages\n- `--debug` - Show debug information\n- `--json` - Output as JSON\n\n### Generated Schema\n\nThe inferred schema will automatically detect:\n- Data types (string, number, integer, boolean, null)\n- Object structures and nested properties\n- Array items and their types\n- Required properties based on presence\n- Enum values for properties with limited options\n\n### Example\n\nGiven this JSON data (`users.json`):\n```json\n[\n {\n \"id\": 1,\n \"name\": \"Alice\",\n \"email\": \"alice@example.com\",\n \"age\": 30,\n \"active\": true\n },\n {\n \"id\": 2,\n \"name\": \"Bob\",\n \"email\": \"bob@example.com\",\n \"age\": 25,\n \"active\": false\n }\n]\n```\n\nInfer the schema:\n```bash\nfairspec data infer-schema users.json --json\n```\n\nGenerated schema:\n```json\n{\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"id\": { \"type\": \"integer\" },\n \"name\": { \"type\": \"string\" },\n \"email\": { \"type\": \"string\" },\n \"age\": { \"type\": \"integer\" },\n \"active\": { \"type\": \"boolean\" }\n },\n \"required\": [\"id\", \"name\", \"email\", \"age\", \"active\"]\n }\n}\n```\n\n## Validate Data Schema\n\nValidate that a Data Schema (JSON Schema) file is valid:\n\n```bash\n# Validate a schema file\nfairspec data validate-schema schema.json\n\n# Validate from remote source\nfairspec data validate-schema https://example.com/schema.json\n\n# Output as JSON\nfairspec data validate-schema schema.json --json\n```\n\n### Options\n\n- `--silent` - Suppress output messages\n- `--debug` - Show debug information\n- `--json` - Output as JSON\n\n### Schema Validation\n\nThis validates that the schema itself is:\n- Valid JSON\n- Compliant with JSON Schema Draft 2020-12 specification\n- Has correct property definitions\n- Uses valid keywords and formats\n\n### Validation Report\n\n```json\n{\n \"valid\": true,\n \"errors\": []\n}\n```\n\nOr if invalid:\n```json\n{\n \"valid\": false,\n \"errors\": [\n {\n \"type\": \"schema/invalid\",\n \"message\": \"Invalid schema property: 'typ' (did you mean 'type'?)\"\n }\n ]\n}\n```\n\n## Infer File Dialect\n\nAutomatically detect the dialect of a data file:\n\n```bash\n# Infer dialect from file\nfairspec data infer-dialect data.json\n\n# Infer from remote file\nfairspec data infer-dialect https://example.com/data.jsonl\n\n# Output as JSON\nfairspec data infer-dialect data.json --json\n```\n\n### Options\n\n- `--sample-bytes \u003Cbytes>` - Sample size in bytes for file dialect detection\n- `--silent` - Suppress output messages\n- `--debug` - Show debug information\n- `--json` - Output as JSON\n\n### Detected Formats\n\nThe command can detect:\n- `json` - Standard JSON format\n- `jsonl` - JSON Lines (newline-delimited JSON)\n\n### Example Output\n\n```json\n{\n \"name\": \"json\"\n}\n```\n\nOr for JSONL:\n```json\n{\n \"name\": \"jsonl\"\n}\n```\n\n## Common Workflows\n\n### Create and Validate with Schema\n\n```bash\n# 1. Infer schema from existing data\nfairspec data infer-schema sample-data.json --json > data-schema.json\n\n# 2. Validate new data against the schema\nfairspec data validate new-data.json --schema data-schema.json\n\n# 3. Check if validation passed\nif [ $? -eq 0 ]; then\n echo \"Data is valid!\"\nelse\n echo \"Data validation failed\"\nfi\n```\n\n### Schema-Driven Development\n\n```bash\n# 1. Create a schema for your data structure\ncat > api-schema.json \u003C\u003C 'EOF'\n{\n \"$schema\": \"https://json-schema.org/draft/2020-12/schema\",\n \"type\": \"object\",\n \"properties\": {\n \"users\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"id\": { \"type\": \"integer\" },\n \"username\": { \"type\": \"string\", \"minLength\": 3 },\n \"email\": { \"type\": \"string\", \"format\": \"email\" }\n },\n \"required\": [\"id\", \"username\", \"email\"]\n }\n }\n }\n}\nEOF\n\n# 2. Validate the schema itself\nfairspec data validate-schema api-schema.json\n\n# 3. Validate API responses against the schema\nfairspec data validate response.json --schema api-schema.json\n```\n\n### Automated Testing\n\n```bash\n# Validate data in a test script\nfor file in test-data/*.json; do\n echo \"Validating $file...\"\n if fairspec data validate \"$file\" --schema schema.json --silent; then\n echo \"✓ $file is valid\"\n else\n echo \"✗ $file failed validation\"\n exit 1\n fi\ndone\n```\n\n## Output Formats\n\n### Text Output (default)\n\nHuman-readable output with colors and formatting:\n\n```bash\nfairspec data validate data.json --schema schema.json\n```\n\nOutput:\n```\n✓ Data is valid\n```\n\nOr with errors:\n```\n✗ Data validation failed\n\nErrors:\n • /users/0/email: must match format \"email\"\n • /users/1/age: must be >= 0\n```\n\n### JSON Output\n\nMachine-readable JSON for automation and scripting:\n\n```bash\nfairspec data validate data.json --schema schema.json --json\n```\n\n### Silent Mode\n\nSuppress all output except errors:\n\n```bash\nfairspec data validate data.json --schema schema.json --silent\n```\n\nUse exit code to check success:\n```bash\nif fairspec data validate data.json --schema schema.json --silent; then\n echo \"Valid\"\nelse\n echo \"Invalid\"\nfi\n```\n\n## Examples\n\n### API Response Validation\n\n```bash\n# Fetch API response and validate\ncurl -s https://api.example.com/users > response.json\nfairspec data infer-schema response.json --json > api-schema.json\n\n# Validate future responses\ncurl -s https://api.example.com/users | \\\n fairspec data validate /dev/stdin --schema api-schema.json\n```\n\n### Configuration File Validation\n\n```bash\n# Create schema for config files\ncat > config-schema.json \u003C\u003C 'EOF'\n{\n \"type\": \"object\",\n \"properties\": {\n \"host\": { \"type\": \"string\" },\n \"port\": { \"type\": \"integer\", \"minimum\": 1, \"maximum\": 65535 },\n \"ssl\": { \"type\": \"boolean\" }\n },\n \"required\": [\"host\", \"port\"]\n}\nEOF\n\n# Validate config file\nfairspec data validate config.json --schema config-schema.json\n```\n\n### Data Pipeline Validation\n\n```bash\n# Validate input data\nfairspec data validate input.json --schema input-schema.json\n\n# Process data (your custom script)\n./process-data.sh input.json output.json\n\n# Validate output data\nfairspec data validate output.json --schema output-schema.json\n```\n\n### Schema Evolution\n\n```bash\n# Start with inferred schema from v1 data\nfairspec data infer-schema data-v1.json --json > schema-v1.json\n\n# Manually update schema for v2 (add optional properties)\n# Edit schema-v1.json -> schema-v2.json\n\n# Validate that v2 schema is still valid\nfairspec data validate-schema schema-v2.json\n\n# Ensure v1 data is still compatible with v2 schema\nfairspec data validate data-v1.json --schema schema-v2.json\n```","content/docs/terminal/data.md","b82a1b470c384a79",{"html":146,"metadata":147},"\u003Cp>JSON data validation and schema operations using JSON Schema standards.\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"available-commands\">Available Commands\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#available-commands\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Available Commands”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The \u003Ccode dir=\"auto\">fairspec data\u003C/code> command provides utilities for working with JSON data:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">validate\u003C/code> - Validate JSON data against a Data Schema (JSON Schema)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">infer-schema\u003C/code> - Automatically generate a Data Schema from JSON data\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">validate-schema\u003C/code> - Validate a Data Schema itself\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">infer-dialect\u003C/code> - Infer file dialect\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"validate-json-data\">Validate JSON Data\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#validate-json-data\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Validate JSON Data”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Validate JSON data files against a Data Schema (JSON Schema):\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Clink rel=\"stylesheet\" href=\"/_astro/ec.8ol6u.css\">\u003Cscript type=\"module\" src=\"/_astro/ec.0vx5m.js\">\u003C/script>\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate JSON data with a schema\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate from a remote source\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">https://example.com/data.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Output validation report as JSON\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec data validate data.json --schema schema.jsonfairspec data validate https://example.com/data.json --schema schema.jsonfairspec data validate data.json --schema schema.json --json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"options\">Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#options\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--schema <path>\u003C/code> (required) - Path to a Data Schema descriptor (JSON Schema)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--silent\u003C/code> - Suppress output messages\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--debug\u003C/code> - Show debug information\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--json\u003C/code> - Output as JSON\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"validation-report\">Validation Report\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#validation-report\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Validation Report”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Returns a validation report with:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">valid\u003C/code> - Boolean indicating if validation passed\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">errors\u003C/code> - Array of validation errors (if any)\u003C/li>\n\u003C/ul>\n\u003Cp>Example validation errors:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"json\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"valid\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#FF5874;--1:#A54A4A\">false\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"errors\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: [\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">data\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"instancePath\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">/users/0/email\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"schemaPath\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">#/properties/users/items/properties/email/format\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"keyword\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">format\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"message\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">must match format \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">email\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"{ "valid": false, "errors": [ { "type": "data", "instancePath": "/users/0/email", "schemaPath": "#/properties/users/items/properties/email/format", "keyword": "format", "message": "must match format \\"email\\"" } ]}\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"example-usage\">Example Usage\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#example-usage\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Example Usage”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Create a JSON Schema file (\u003Ccode dir=\"auto\">user-schema.json\u003C/code>):\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"json\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"$schema\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">https://json-schema.org/draft/2020-12/schema\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">object\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"properties\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: {\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"name\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">string\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"email\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">string\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"format\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">email\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"age\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">integer\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"minimum\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">0\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> }\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"required\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: [\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">email\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"{ "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object", "properties": { "name": { "type": "string" }, "email": { "type": "string", "format": "email" }, "age": { "type": "integer", "minimum": 0 } }, "required": ["name", "email"]}\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>Validate data against the schema:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">user.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">user-schema.json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec data validate user.json --schema user-schema.json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"infer-data-schema\">Infer Data Schema\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#infer-data-schema\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Infer Data Schema”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Automatically generate a Data Schema (JSON Schema) from JSON data:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Infer schema from local file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Infer schema from remote file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">https://example.com/data.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Save inferred schema to file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Output for human reading\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec data infer-schema data.jsonfairspec data infer-schema https://example.com/data.jsonfairspec data infer-schema data.json --json > schema.jsonfairspec data infer-schema data.json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"options-1\">Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#options-1\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--silent\u003C/code> - Suppress output messages\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--debug\u003C/code> - Show debug information\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--json\u003C/code> - Output as JSON\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"generated-schema\">Generated Schema\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#generated-schema\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Generated Schema”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The inferred schema will automatically detect:\u003C/p>\n\u003Cul>\n\u003Cli>Data types (string, number, integer, boolean, null)\u003C/li>\n\u003Cli>Object structures and nested properties\u003C/li>\n\u003Cli>Array items and their types\u003C/li>\n\u003Cli>Required properties based on presence\u003C/li>\n\u003Cli>Enum values for properties with limited options\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"example\">Example\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#example\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Example”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Given this JSON data (\u003Ccode dir=\"auto\">users.json\u003C/code>):\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"json\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">[\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"id\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"name\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">Alice\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"email\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">alice@example.com\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"age\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">30\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"active\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#FF5874;--1:#A54A4A\">true\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"id\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">2\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"name\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">Bob\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"email\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">bob@example.com\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"age\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">25\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"active\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#FF5874;--1:#A54A4A\">false\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">]\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"[ { "id": 1, "name": "Alice", "email": "alice@example.com", "age": 30, "active": true }, { "id": 2, "name": "Bob", "email": "bob@example.com", "age": 25, "active": false }]\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>Infer the schema:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">users.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec data infer-schema users.json --json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>Generated schema:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"json\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">array\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"items\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: {\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">object\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"properties\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: {\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"id\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">integer\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"name\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">string\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"email\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">string\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"age\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">integer\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"active\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">boolean\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> }\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"required\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: [\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">email\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">age\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">active\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"{ "type": "array", "items": { "type": "object", "properties": { "id": { "type": "integer" }, "name": { "type": "string" }, "email": { "type": "string" }, "age": { "type": "integer" }, "active": { "type": "boolean" } }, "required": ["id", "name", "email", "age", "active"] }}\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"validate-data-schema\">Validate Data Schema\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#validate-data-schema\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Validate Data Schema”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Validate that a Data Schema (JSON Schema) file is valid:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate a schema file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate from remote source\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">https://example.com/schema.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Output as JSON\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec data validate-schema schema.jsonfairspec data validate-schema https://example.com/schema.jsonfairspec data validate-schema schema.json --json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"options-2\">Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#options-2\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--silent\u003C/code> - Suppress output messages\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--debug\u003C/code> - Show debug information\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--json\u003C/code> - Output as JSON\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"schema-validation\">Schema Validation\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#schema-validation\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Schema Validation”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>This validates that the schema itself is:\u003C/p>\n\u003Cul>\n\u003Cli>Valid JSON\u003C/li>\n\u003Cli>Compliant with JSON Schema Draft 2020-12 specification\u003C/li>\n\u003Cli>Has correct property definitions\u003C/li>\n\u003Cli>Uses valid keywords and formats\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"validation-report-1\">Validation Report\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#validation-report-1\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Validation Report”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"json\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"valid\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#FF5874;--1:#A54A4A\">true\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"errors\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: []\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"{ "valid": true, "errors": []}\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>Or if invalid:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"json\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"valid\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#FF5874;--1:#A54A4A\">false\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"errors\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: [\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">schema/invalid\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"message\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">Invalid schema property: 'typ' (did you mean 'type'?)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"{ "valid": false, "errors": [ { "type": "schema/invalid", "message": "Invalid schema property: 'typ' (did you mean 'type'?)" } ]}\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"infer-file-dialect\">Infer File Dialect\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#infer-file-dialect\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Infer File Dialect”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Automatically detect the dialect of a data file:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Infer dialect from file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-dialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Infer from remote file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-dialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">https://example.com/data.jsonl\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Output as JSON\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-dialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec data infer-dialect data.jsonfairspec data infer-dialect https://example.com/data.jsonlfairspec data infer-dialect data.json --json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"options-3\">Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#options-3\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--sample-bytes <bytes>\u003C/code> - Sample size in bytes for file dialect detection\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--silent\u003C/code> - Suppress output messages\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--debug\u003C/code> - Show debug information\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--json\u003C/code> - Output as JSON\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"detected-formats\">Detected Formats\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#detected-formats\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Detected Formats”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The command can detect:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">json\u003C/code> - Standard JSON format\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">jsonl\u003C/code> - JSON Lines (newline-delimited JSON)\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"example-output\">Example Output\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#example-output\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Example Output”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"json\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"name\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">json\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"{ "name": "json"}\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>Or for JSONL:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"json\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"name\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">jsonl\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"{ "name": "jsonl"}\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"common-workflows\">Common Workflows\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#common-workflows\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Common Workflows”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"create-and-validate-with-schema\">Create and Validate with Schema\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#create-and-validate-with-schema\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Create and Validate with Schema”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 1. Infer schema from existing data\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">sample-data.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data-schema.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 2. Validate new data against the schema\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">new-data.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data-schema.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 3. Check if validation passed\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">if\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> [ \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">$?\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">-eq\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">0\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> ]; \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">then\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">echo\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Data is valid!\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">else\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">echo\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Data validation failed\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">fi\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec data infer-schema sample-data.json --json > data-schema.jsonfairspec data validate new-data.json --schema data-schema.jsonif [ $? -eq 0 ]; then echo "Data is valid!"else echo "Data validation failed"fi\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"schema-driven-development\">Schema-Driven Development\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#schema-driven-development\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Schema-Driven Development”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 1. Create a schema for your data structure\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">cat\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">api-schema.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\"><<\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">'EOF'\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"$schema\": \"https://json-schema.org/draft/2020-12/schema\",\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"type\": \"object\",\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"properties\": {\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"users\": {\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"type\": \"array\",\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"items\": {\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"type\": \"object\",\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"properties\": {\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"id\": { \"type\": \"integer\" },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"username\": { \"type\": \"string\", \"minLength\": 3 },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"email\": { \"type\": \"string\", \"format\": \"email\" }\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"required\": [\"id\", \"username\", \"email\"]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">EOF\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 2. Validate the schema itself\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">api-schema.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 3. Validate API responses against the schema\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">response.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">api-schema.json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"cat > api-schema.json \u003C\u003C 'EOF'{ "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object", "properties": { "users": { "type": "array", "items": { "type": "object", "properties": { "id": { "type": "integer" }, "username": { "type": "string", "minLength": 3 }, "email": { "type": "string", "format": "email" } }, "required": ["id", "username", "email"] } } }}EOFfairspec data validate-schema api-schema.jsonfairspec data validate response.json --schema api-schema.json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"automated-testing\">Automated Testing\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#automated-testing\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Automated Testing”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate data in a test script\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">for\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">file\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">in\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">test-data/*.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">; \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">do\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">echo\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Validating \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">$file\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">...\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">if\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">$file\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--silent\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">; \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">then\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">echo\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">✓ \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">$file\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> is valid\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">else\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">echo\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">✗ \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">$file\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> failed validation\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">exit\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">fi\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">done\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"for file in test-data/*.json; do echo "Validating $file..." if fairspec data validate "$file" --schema schema.json --silent; then echo "✓ $file is valid" else echo "✗ $file failed validation" exit 1 fidone\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"output-formats\">Output Formats\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#output-formats\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Output Formats”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"text-output-default\">Text Output (default)\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#text-output-default\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Text Output (default)”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Human-readable output with colors and formatting:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec data validate data.json --schema schema.json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>Output:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"plaintext\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#d6deeb;--1:#403f53\">✓ Data is valid\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"✓ Data is valid\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>Or with errors:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"plaintext\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#d6deeb;--1:#403f53\">✗ Data validation failed\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#d6deeb;--1:#403f53\">Errors:\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#d6deeb;--1:#403f53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#d6deeb;--1:#403f53\">• /users/0/email: must match format \"email\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#d6deeb;--1:#403f53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#d6deeb;--1:#403f53\">• /users/1/age: must be >= 0\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"✗ Data validation failedErrors: • /users/0/email: must match format "email" • /users/1/age: must be >= 0\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"json-output\">JSON Output\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#json-output\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “JSON Output”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Machine-readable JSON for automation and scripting:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec data validate data.json --schema schema.json --json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"silent-mode\">Silent Mode\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#silent-mode\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Silent Mode”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Suppress all output except errors:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--silent\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec data validate data.json --schema schema.json --silent\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>Use exit code to check success:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">if\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--silent\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">; \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">then\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">echo\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Valid\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">else\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">echo\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Invalid\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">fi\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"if fairspec data validate data.json --schema schema.json --silent; then echo "Valid"else echo "Invalid"fi\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"examples\">Examples\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#examples\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Examples”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"api-response-validation\">API Response Validation\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#api-response-validation\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “API Response Validation”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Fetch API response and validate\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">curl\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">-s\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">https://api.example.com/users\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">response.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">response.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">api-schema.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate future responses\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">curl\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">-s\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">https://api.example.com/users\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">|\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">/dev/stdin\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">api-schema.json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"curl -s https://api.example.com/users > response.jsonfairspec data infer-schema response.json --json > api-schema.jsoncurl -s https://api.example.com/users | \\ fairspec data validate /dev/stdin --schema api-schema.json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"configuration-file-validation\">Configuration File Validation\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#configuration-file-validation\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Configuration File Validation”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Create schema for config files\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">cat\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">config-schema.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\"><<\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">'EOF'\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"type\": \"object\",\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"properties\": {\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"host\": { \"type\": \"string\" },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"port\": { \"type\": \"integer\", \"minimum\": 1, \"maximum\": 65535 },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"ssl\": { \"type\": \"boolean\" }\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"required\": [\"host\", \"port\"]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">EOF\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate config file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">config.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">config-schema.json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"cat > config-schema.json \u003C\u003C 'EOF'{ "type": "object", "properties": { "host": { "type": "string" }, "port": { "type": "integer", "minimum": 1, "maximum": 65535 }, "ssl": { "type": "boolean" } }, "required": ["host", "port"]}EOFfairspec data validate config.json --schema config-schema.json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"data-pipeline-validation\">Data Pipeline Validation\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#data-pipeline-validation\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Data Pipeline Validation”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate input data\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">input.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">input-schema.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Process data (your custom script)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">./process-data.sh\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">input.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">output.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate output data\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">output.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">output-schema.json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec data validate input.json --schema input-schema.json./process-data.sh input.json output.jsonfairspec data validate output.json --schema output-schema.json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"schema-evolution\">Schema Evolution\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#schema-evolution\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Schema Evolution”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Start with inferred schema from v1 data\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data-v1.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema-v1.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Manually update schema for v2 (add optional properties)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Edit schema-v1.json -> schema-v2.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate that v2 schema is still valid\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema-v2.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Ensure v1 data is still compatible with v2 schema\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data-v1.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema-v2.json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec data infer-schema data-v1.json --json > schema-v1.jsonfairspec data validate-schema schema-v2.jsonfairspec data validate data-v1.json --schema schema-v2.json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>",{"headings":148,"localImagePaths":212,"remoteImagePaths":213,"frontmatter":214,"imagePaths":216},[149,150,153,154,155,158,161,162,165,168,171,172,175,177,178,179,182,183,186,189,192,195,196,197,198,199,200,203,206,209],{"depth":31,"slug":68,"text":69},{"depth":31,"slug":151,"text":152},"validate-json-data","Validate JSON Data",{"depth":74,"slug":75,"text":76},{"depth":74,"slug":94,"text":95},{"depth":74,"slug":156,"text":157},"example-usage","Example Usage",{"depth":31,"slug":159,"text":160},"infer-data-schema","Infer Data Schema",{"depth":74,"slug":84,"text":76},{"depth":74,"slug":163,"text":164},"generated-schema","Generated Schema",{"depth":74,"slug":166,"text":167},"example","Example",{"depth":31,"slug":169,"text":170},"validate-data-schema","Validate Data Schema",{"depth":74,"slug":92,"text":76},{"depth":74,"slug":173,"text":174},"schema-validation","Schema Validation",{"depth":74,"slug":176,"text":95},"validation-report-1",{"depth":31,"slug":97,"text":98},{"depth":74,"slug":100,"text":76},{"depth":74,"slug":180,"text":181},"detected-formats","Detected Formats",{"depth":74,"slug":86,"text":87},{"depth":31,"slug":184,"text":185},"common-workflows","Common Workflows",{"depth":74,"slug":187,"text":188},"create-and-validate-with-schema","Create and Validate with Schema",{"depth":74,"slug":190,"text":191},"schema-driven-development","Schema-Driven Development",{"depth":74,"slug":193,"text":194},"automated-testing","Automated Testing",{"depth":31,"slug":108,"text":109},{"depth":74,"slug":111,"text":112},{"depth":74,"slug":114,"text":115},{"depth":74,"slug":117,"text":118},{"depth":31,"slug":41,"text":42},{"depth":74,"slug":201,"text":202},"api-response-validation","API Response Validation",{"depth":74,"slug":204,"text":205},"configuration-file-validation","Configuration File Validation",{"depth":74,"slug":207,"text":208},"data-pipeline-validation","Data Pipeline Validation",{"depth":74,"slug":210,"text":211},"schema-evolution","Schema Evolution",[],[],{"title":137,"sidebar":215},{"order":74,"label":140},[],"terminal/table",{"id":217,"data":219,"body":225,"filePath":226,"digest":227,"rendered":228},{"title":220,"editUrl":15,"head":221,"template":17,"sidebar":222,"pagefind":15,"draft":21},"Working with Tables in Terminal",[],{"order":31,"label":223,"hidden":21,"attrs":224},"Table",{},"Table operations including querying, validation, statistics, and schema management for tabular data files.\n\n## Available Commands\n\nThe `fairspec table` command provides utilities for working with tables:\n\n- `describe` - Get table statistics and summary information\n- `query` - Query tables using SQL syntax\n- `validate` - Validate table data against a Table Schema\n- `infer-schema` - Automatically infer Table Schema from table data\n- `render-schema` - Render Table Schema as HTML or Markdown documentation\n- `validate-schema` - Validate a Table Schema file\n- `infer-dialect` - Infer file dialect\n- `script` - Interactive REPL session with loaded table\n\n## Describe Tables\n\nGet statistical summary information about a table:\n\n```bash\n# Describe a CSV file\nfairspec table describe data.csv\n\n# Describe a remote table\nfairspec table describe https://example.com/data.csv\n\n# Describe from a dataset\nfairspec table describe --from-dataset dataset.json --from-resource sales\n\n# Output as JSON\nfairspec table describe data.csv --json\n```\n\n### Output\n\nReturns statistics for each column including:\n- `count` - Number of non-null values\n- `null_count` - Number of null values\n- `mean` - Average value (numeric columns)\n- `std` - Standard deviation (numeric columns)\n- `min` - Minimum value\n- `max` - Maximum value\n- `median` - Median value (numeric columns)\n\n### Options\n\n- `--from-dataset \u003Cpath>` - Load table from dataset descriptor\n- `--from-resource \u003Cname>` - Specify resource name from dataset\n- `--debug` - Show debug information\n- `--json` - Output as JSON\n\n### Format Options\n\nAll standard format options are available (see Format Options section below).\n\n## Query Tables\n\nExecute SQL queries on tables using Polars SQL engine:\n\n```bash\n# Basic query\nfairspec table query data.csv \"SELECT * FROM self WHERE age > 25\"\n\n# Aggregate data\nfairspec table query sales.csv \"SELECT region, SUM(amount) as total FROM self GROUP BY region\"\n\n# Filter and sort\nfairspec table query users.csv \"SELECT name, email FROM self WHERE active = true ORDER BY name\"\n\n# Query from dataset resource\nfairspec table query --from-dataset dataset.json --from-resource users \\\n \"SELECT * FROM self WHERE created_at > '2024-01-01'\"\n```\n\n### SQL Syntax\n\n- Use `self` as the table name in queries\n- Supports SELECT, WHERE, GROUP BY, ORDER BY, LIMIT, JOIN, etc.\n- Full Polars SQL syntax supported\n- Results are output as formatted tables\n\n### Options\n\n- `--from-dataset \u003Cpath>` - Load table from dataset descriptor\n- `--from-resource \u003Cname>` - Specify resource name from dataset\n- `--debug` - Show debug information\n- `--json` - Output as JSON\n\n## Validate Tables\n\nValidate table data against a Table Schema:\n\n```bash\n# Validate with explicit schema\nfairspec table validate data.csv --table-schema schema.json\n\n# Validate with inferred schema\nfairspec table validate data.csv\n\n# Validate from dataset (uses embedded schema)\nfairspec table validate --from-dataset dataset.json --from-resource users\n\n# Output validation report as JSON\nfairspec table validate data.csv --table-schema schema.json --json\n```\n\n### Validation Report\n\nReturns a validation report with:\n- `valid` - Boolean indicating if validation passed\n- `errors` - Array of validation errors (if any)\n\nExample validation errors:\n```json\n{\n \"valid\": false,\n \"errors\": [\n {\n \"type\": \"table/constraint\",\n \"propertyName\": \"age\",\n \"rowNumber\": 5,\n \"message\": \"value 200 exceeds maximum of 150\"\n },\n {\n \"type\": \"table/type\",\n \"propertyName\": \"email\",\n \"rowNumber\": 12,\n \"message\": \"invalid email format\"\n }\n ]\n}\n```\n\n### Options\n\n- `--table-schema \u003Cpath>` - Path to Table Schema file\n- `--from-dataset \u003Cpath>` - Load table from dataset descriptor\n- `--from-resource \u003Cname>` - Specify resource name from dataset\n- `--debug` - Show debug information\n- `--json` - Output as JSON\n\n## Infer Table Schema\n\nAutomatically generate a Table Schema from table data:\n\n```bash\n# Infer schema from local file\nfairspec table infer-schema data.csv\n\n# Infer from remote file\nfairspec table infer-schema https://example.com/data.csv\n\n# Save inferred schema to file\nfairspec table infer-schema data.csv --json > schema.json\n\n# Infer with custom options\nfairspec table infer-schema data.csv --sample-rows 1000 --confidence 0.95\n```\n\n### Schema Inference Options\n\n- `--sample-rows \u003Cnumber>` - Number of rows to sample for inference (default: 100)\n- `--confidence \u003Cnumber>` - Confidence threshold for type detection (0-1, default: 0.9)\n- `--keep-strings` - Keep original string types instead of inferring\n- `--column-types \u003Cjson>` - Override types for specific columns\n- `--comma-decimal` - Treat comma as decimal separator\n- `--month-first` - Parse dates as month-first (MM/DD/YYYY)\n\n### Generated Schema\n\nThe inferred schema automatically detects:\n- Column types (string, integer, number, boolean, date, datetime, etc.)\n- Required columns based on presence\n- Enum values for columns with limited distinct values\n- Numeric constraints (minimum, maximum)\n- String patterns\n- Missing value indicators\n\n### Example\n\nGiven this CSV data:\n```csv\nid,name,price,quantity,active,created_at\n1,Product A,19.99,100,true,2024-01-15\n2,Product B,29.99,50,false,2024-01-20\n3,Product C,39.99,75,true,2024-02-01\n```\n\nInfer the schema:\n```bash\nfairspec table infer-schema products.csv --json\n```\n\nGenerated schema:\n```json\n{\n \"properties\": {\n \"id\": { \"type\": \"integer\" },\n \"name\": { \"type\": \"string\" },\n \"price\": { \"type\": \"number\" },\n \"quantity\": { \"type\": \"integer\" },\n \"active\": { \"type\": \"boolean\" },\n \"created_at\": { \"type\": \"date\" }\n },\n \"required\": [\"id\", \"name\", \"price\", \"quantity\", \"active\", \"created_at\"]\n}\n```\n\n## Render Table Schema\n\nRender a Table Schema as human-readable HTML or Markdown documentation:\n\n```bash\n# Render as Markdown\nfairspec table render-schema schema.json --to-format markdown\n\n# Render as HTML\nfairspec table render-schema schema.json --to-format html\n\n# Save to file\nfairspec table render-schema schema.json --to-format markdown --to-path schema.md\nfairspec table render-schema schema.json --to-format html --to-path schema.html\n```\n\n### Output Formats\n\n- `markdown` - Generates Markdown documentation with column descriptions, types, and constraints\n- `html` - Generates styled HTML table documentation\n\n### Options\n\n- `--to-format \u003Cformat>` (required) - Output format (markdown or html)\n- `--to-path \u003Cpath>` - Save to file instead of stdout\n- `--silent` - Suppress output messages\n- `--debug` - Show debug information\n\n## Validate Table Schema\n\nValidate that a Table Schema file is valid:\n\n```bash\n# Validate a schema file\nfairspec table validate-schema schema.json\n\n# Validate from remote source\nfairspec table validate-schema https://example.com/schema.json\n\n# Output as JSON\nfairspec table validate-schema schema.json --json\n```\n\n### Schema Validation\n\nThis validates that the schema itself is:\n- Valid JSON\n- Compliant with Table Schema specification\n- Has correct property definitions\n- Uses valid column types and constraints\n\n### Validation Report\n\n```json\n{\n \"valid\": true,\n \"errors\": []\n}\n```\n\nOr if invalid:\n```json\n{\n \"valid\": false,\n \"errors\": [\n {\n \"type\": \"schema/invalid\",\n \"message\": \"Invalid column type: 'txt' (did you mean 'text'?)\"\n }\n ]\n}\n```\n\n### Options\n\n- `--silent` - Suppress output messages\n- `--debug` - Show debug information\n- `--json` - Output as JSON\n\n## Infer File Dialect\n\nAutomatically detect the dialect of a table file:\n\n```bash\n# Infer dialect from file\nfairspec table infer-dialect data.csv\n\n# Infer from remote file\nfairspec table infer-dialect https://example.com/data.xlsx\n\n# Output as JSON\nfairspec table infer-dialect data.parquet --json\n```\n\n### Detected Formats\n\nThe command can detect:\n- `csv` - Comma-separated values\n- `tsv` - Tab-separated values\n- `json` - JSON format\n- `jsonl` - JSON Lines (newline-delimited JSON)\n- `xlsx` - Excel spreadsheet\n- `ods` - OpenDocument Spreadsheet\n- `parquet` - Apache Parquet\n- `arrow` - Apache Arrow/Feather\n- `sqlite` - SQLite database\n\n### Example Output\n\n```json\n{\n \"name\": \"csv\",\n \"delimiter\": \",\",\n \"quoteChar\": \"\\\"\"\n}\n```\n\n## Interactive Scripting\n\nStart an interactive REPL session with a loaded table:\n\n```bash\n# Load table and start REPL\nfairspec table script data.csv\n\n# Script table from dataset\nfairspec table script --from-dataset dataset.json --from-resource users\n```\n\n### Available in Session\n\n- `fairspec` - Full fairspec library\n- `table` - Loaded table (LazyFrame)\n\n### Example Session\n\n```python\nfairspec> table\nLazyFrame { ... }\n\nfairspec> table.collect()\nDataFrame { ... }\n\nfairspec> table.select([\"name\", \"age\"]).collect()\nDataFrame { ... }\n\nfairspec> table.filter(pl.col(\"age\").gt(25)).collect()\nDataFrame { ... }\n```\n\n## Format Options\n\nAll table commands support these format options for loading data:\n\n### CSV/TSV Options\n\n- `--format \u003Cname>` - Format name (csv, tsv, etc.)\n- `--delimiter \u003Cchar>` - Column delimiter (default: `,`)\n- `--line-terminator \u003Cchars>` - Row terminator (default: `\\n`)\n- `--quote-char \u003Cchar>` - Quote character (default: `\"`)\n- `--null-sequence \u003Cstring>` - Null value indicator\n- `--header-rows \u003Cnumbers>` - Header row indices (e.g., `[1,2]`)\n- `--header-join \u003Cchar>` - Character to join multi-row headers\n- `--comment-rows \u003Cnumbers>` - Comment row indices to skip\n- `--comment-prefix \u003Cchar>` - Comment line prefix (e.g., `#`)\n- `--column-names \u003Cnames>` - Override column names (JSON array)\n\n### JSON Options\n\n- `--json-pointer \u003Cpointer>` - JSON pointer to data array (e.g., `/data/users`)\n- `--row-type \u003Ctype>` - Row format: `object` or `array`\n\n### Excel/ODS Options\n\n- `--sheet-number \u003Cnumber>` - Sheet index (0-based)\n- `--sheet-name \u003Cname>` - Sheet name\n\n### SQLite Options\n\n- `--table-name \u003Cname>` - Table name in database\n\n## Table Schema Options\n\nAll table commands support these schema-related options:\n\n### Type Inference\n\n- `--sample-rows \u003Cnumber>` - Sample size for type inference\n- `--confidence \u003Cnumber>` - Confidence threshold (0-1)\n- `--keep-strings` - Don't infer types, keep as strings\n- `--column-types \u003Cjson>` - Override types (e.g., `{\"age\":\"integer\"}`)\n\n### Value Parsing\n\n- `--missing-values \u003Cvalues>` - Missing value indicators (JSON array)\n- `--decimal-char \u003Cchar>` - Decimal separator (default: `.`)\n- `--group-char \u003Cchar>` - Thousands separator (default: `,`)\n- `--comma-decimal` - Use comma as decimal (shorthand)\n- `--true-values \u003Cvalues>` - Custom true values (JSON array)\n- `--false-values \u003Cvalues>` - Custom false values (JSON array)\n\n### Date/Time Parsing\n\n- `--datetime-format \u003Cformat>` - Datetime format string\n- `--date-format \u003Cformat>` - Date format string\n- `--time-format \u003Cformat>` - Time format string\n- `--month-first` - Parse dates as month-first\n\n### Array/List Parsing\n\n- `--array-type \u003Ctype>` - Array item type\n- `--list-delimiter \u003Cchar>` - List delimiter (default: `;`)\n- `--list-item-type \u003Ctype>` - List item type\n\n## Common Workflows\n\n### Explore Unknown Data\n\n```bash\n# 1. Infer the dialect\nfairspec table infer-dialect unknown-data.txt\n\n# 2. Get basic statistics\nfairspec table describe unknown-data.txt\n\n# 3. Infer the schema\nfairspec table infer-schema unknown-data.txt --json > schema.json\n\n# 4. Query the data\nfairspec table query unknown-data.txt \"SELECT * FROM self LIMIT 10\"\n```\n\n### Schema-Driven Validation\n\n```bash\n# 1. Create schema from sample data\nfairspec table infer-schema sample.csv --json > schema.json\n\n# 2. Validate the schema itself\nfairspec table validate-schema schema.json\n\n# 3. Generate documentation\nfairspec table render-schema schema.json --to-format markdown --to-path docs.md\n\n# 4. Validate production data\nfairspec table validate production.csv --table-schema schema.json\n```\n\n### Data Quality Checks\n\n```bash\n# Check for data quality issues\nfairspec table validate data.csv --table-schema schema.json\n\n# Get detailed statistics\nfairspec table describe data.csv\n\n# Query for specific issues\nfairspec table query data.csv \"SELECT * FROM self WHERE email NOT LIKE '%@%'\"\n\n# Find duplicates\nfairspec table query data.csv \"SELECT id, COUNT(*) as cnt FROM self GROUP BY id HAVING cnt > 1\"\n```\n\n### Interactive Analysis\n\n```bash\n# Start interactive session\nfairspec table script data.csv\n\n# In REPL:\n# - Explore: table.head(10).collect()\n# - Filter: table.filter(pl.col(\"status\").eq(\"active\")).collect()\n# - Aggregate: table.group_by(\"category\").agg(pl.sum(\"amount\")).collect()\n# - Transform: table.with_columns(pl.col(\"price\").mul(1.1).alias(\"new_price\")).collect()\n```\n\n### Format Conversion\n\n```bash\n# Query and output as JSON\nfairspec table query data.csv \"SELECT * FROM self\" --json > output.json\n\n# Get statistics and save\nfairspec table describe large-file.parquet --json > stats.json\n```\n\n## Output Formats\n\n### Text Output (default)\n\nHuman-readable output with formatted tables:\n\n```bash\nfairspec table describe data.csv\n```\n\nOutput:\n```\n# count mean std min max\nid 100 50.5 29.01 1 100\nprice 100 29.99 15.43 9.99 99.99\nquantity 100 75 28.87 1 150\n```\n\n### JSON Output\n\nMachine-readable JSON for automation:\n\n```bash\nfairspec table describe data.csv --json\n```\n\n## Examples\n\n### CSV Data Analysis\n\n```bash\n# Get overview of sales data\nfairspec table describe sales.csv\n\n# Find top customers\nfairspec table query sales.csv \\\n \"SELECT customer, SUM(amount) as total FROM self GROUP BY customer ORDER BY total DESC LIMIT 10\"\n\n# Validate data quality\nfairspec table validate sales.csv --table-schema sales-schema.json\n```\n\n### Multi-Format Pipeline\n\n```bash\n# Load Excel data\nfairspec table describe report.xlsx --sheet-name \"Q1 Sales\"\n\n# Query specific sheet\nfairspec table query report.xlsx --sheet-name \"Q1 Sales\" \\\n \"SELECT region, SUM(revenue) FROM self GROUP BY region\"\n\n# Validate against schema\nfairspec table validate report.xlsx --sheet-name \"Q1 Sales\" --table-schema schema.json\n```\n\n### Remote Data Validation\n\n```bash\n# Infer schema from remote data\nfairspec table infer-schema https://api.example.com/export.csv --json > remote-schema.json\n\n# Validate local data against remote schema\nfairspec table validate local-data.csv --table-schema remote-schema.json\n```\n\n### Database Export Validation\n\n```bash\n# Validate SQLite export\nfairspec table validate export.db --table-name users --table-schema expected-schema.json\n\n# Get statistics from database\nfairspec table describe export.db --table-name users\n\n# Query database table\nfairspec table query export.db --table-name users \\\n \"SELECT status, COUNT(*) FROM self GROUP BY status\"\n```","content/docs/terminal/table.md","92652fd19b1ed038",{"html":229,"metadata":230},"\u003Cp>Table operations including querying, validation, statistics, and schema management for tabular data files.\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"available-commands\">Available Commands\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#available-commands\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Available Commands”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The \u003Ccode dir=\"auto\">fairspec table\u003C/code> command provides utilities for working with tables:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">describe\u003C/code> - Get table statistics and summary information\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">query\u003C/code> - Query tables using SQL syntax\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">validate\u003C/code> - Validate table data against a Table Schema\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">infer-schema\u003C/code> - Automatically infer Table Schema from table data\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">render-schema\u003C/code> - Render Table Schema as HTML or Markdown documentation\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">validate-schema\u003C/code> - Validate a Table Schema file\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">infer-dialect\u003C/code> - Infer file dialect\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">script\u003C/code> - Interactive REPL session with loaded table\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"describe-tables\">Describe Tables\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#describe-tables\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Describe Tables”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Get statistical summary information about a table:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Clink rel=\"stylesheet\" href=\"/_astro/ec.8ol6u.css\">\u003Cscript type=\"module\" src=\"/_astro/ec.0vx5m.js\">\u003C/script>\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Describe a CSV file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Describe a remote table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">https://example.com/data.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Describe from a dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">sales\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Output as JSON\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec table describe data.csvfairspec table describe https://example.com/data.csvfairspec table describe --from-dataset dataset.json --from-resource salesfairspec table describe data.csv --json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"output\">Output\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#output\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Output”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Returns statistics for each column including:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">count\u003C/code> - Number of non-null values\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">null_count\u003C/code> - Number of null values\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">mean\u003C/code> - Average value (numeric columns)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">std\u003C/code> - Standard deviation (numeric columns)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">min\u003C/code> - Minimum value\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">max\u003C/code> - Maximum value\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">median\u003C/code> - Median value (numeric columns)\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"options\">Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#options\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--from-dataset <path>\u003C/code> - Load table from dataset descriptor\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--from-resource <name>\u003C/code> - Specify resource name from dataset\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--debug\u003C/code> - Show debug information\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--json\u003C/code> - Output as JSON\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"format-options\">Format Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#format-options\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Format Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>All standard format options are available (see Format Options section below).\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"query-tables\">Query Tables\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#query-tables\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Query Tables”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Execute SQL queries on tables using Polars SQL engine:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Basic query\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">query\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">SELECT * FROM self WHERE age > 25\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Aggregate data\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">query\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">sales.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">SELECT region, SUM(amount) as total FROM self GROUP BY region\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Filter and sort\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">query\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">users.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">SELECT name, email FROM self WHERE active = true ORDER BY name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Query from dataset resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">query\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">users\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">SELECT * FROM self WHERE created_at > '2024-01-01'\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec table query data.csv "SELECT * FROM self WHERE age > 25"fairspec table query sales.csv "SELECT region, SUM(amount) as total FROM self GROUP BY region"fairspec table query users.csv "SELECT name, email FROM self WHERE active = true ORDER BY name"fairspec table query --from-dataset dataset.json --from-resource users \\ "SELECT * FROM self WHERE created_at > '2024-01-01'"\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"sql-syntax\">SQL Syntax\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#sql-syntax\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “SQL Syntax”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>Use \u003Ccode dir=\"auto\">self\u003C/code> as the table name in queries\u003C/li>\n\u003Cli>Supports SELECT, WHERE, GROUP BY, ORDER BY, LIMIT, JOIN, etc.\u003C/li>\n\u003Cli>Full Polars SQL syntax supported\u003C/li>\n\u003Cli>Results are output as formatted tables\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"options-1\">Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#options-1\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--from-dataset <path>\u003C/code> - Load table from dataset descriptor\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--from-resource <name>\u003C/code> - Specify resource name from dataset\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--debug\u003C/code> - Show debug information\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--json\u003C/code> - Output as JSON\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"validate-tables\">Validate Tables\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#validate-tables\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Validate Tables”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Validate table data against a Table Schema:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate with explicit schema\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--table-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate with inferred schema\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate from dataset (uses embedded schema)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">users\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Output validation report as JSON\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--table-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec table validate data.csv --table-schema schema.jsonfairspec table validate data.csvfairspec table validate --from-dataset dataset.json --from-resource usersfairspec table validate data.csv --table-schema schema.json --json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"validation-report\">Validation Report\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#validation-report\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Validation Report”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Returns a validation report with:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">valid\u003C/code> - Boolean indicating if validation passed\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">errors\u003C/code> - Array of validation errors (if any)\u003C/li>\n\u003C/ul>\n\u003Cp>Example validation errors:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"json\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"valid\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#FF5874;--1:#A54A4A\">false\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"errors\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: [\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">table/constraint\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"propertyName\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">age\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"rowNumber\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">5\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"message\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">value 200 exceeds maximum of 150\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">table/type\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"propertyName\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">email\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"rowNumber\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">12\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"message\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">invalid email format\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"{ "valid": false, "errors": [ { "type": "table/constraint", "propertyName": "age", "rowNumber": 5, "message": "value 200 exceeds maximum of 150" }, { "type": "table/type", "propertyName": "email", "rowNumber": 12, "message": "invalid email format" } ]}\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"options-2\">Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#options-2\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--table-schema <path>\u003C/code> - Path to Table Schema file\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--from-dataset <path>\u003C/code> - Load table from dataset descriptor\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--from-resource <name>\u003C/code> - Specify resource name from dataset\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--debug\u003C/code> - Show debug information\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--json\u003C/code> - Output as JSON\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"infer-table-schema\">Infer Table Schema\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#infer-table-schema\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Infer Table Schema”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Automatically generate a Table Schema from table data:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Infer schema from local file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Infer from remote file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">https://example.com/data.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Save inferred schema to file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Infer with custom options\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--sample-rows\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1000\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--confidence\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">0.95\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec table infer-schema data.csvfairspec table infer-schema https://example.com/data.csvfairspec table infer-schema data.csv --json > schema.jsonfairspec table infer-schema data.csv --sample-rows 1000 --confidence 0.95\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"schema-inference-options\">Schema Inference Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#schema-inference-options\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Schema Inference Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--sample-rows <number>\u003C/code> - Number of rows to sample for inference (default: 100)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--confidence <number>\u003C/code> - Confidence threshold for type detection (0-1, default: 0.9)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--keep-strings\u003C/code> - Keep original string types instead of inferring\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--column-types <json>\u003C/code> - Override types for specific columns\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--comma-decimal\u003C/code> - Treat comma as decimal separator\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--month-first\u003C/code> - Parse dates as month-first (MM/DD/YYYY)\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"generated-schema\">Generated Schema\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#generated-schema\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Generated Schema”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The inferred schema automatically detects:\u003C/p>\n\u003Cul>\n\u003Cli>Column types (string, integer, number, boolean, date, datetime, etc.)\u003C/li>\n\u003Cli>Required columns based on presence\u003C/li>\n\u003Cli>Enum values for columns with limited distinct values\u003C/li>\n\u003Cli>Numeric constraints (minimum, maximum)\u003C/li>\n\u003Cli>String patterns\u003C/li>\n\u003Cli>Missing value indicators\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"example\">Example\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#example\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Example”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Given this CSV data:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"csv\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">id,\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">name,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">price,\u003C/span>\u003Cspan style=\"--0:#809191;--1:#616671\">quantity,\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">active,\u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">created_at\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">1,\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">Product A,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">19.99,\u003C/span>\u003Cspan style=\"--0:#809191;--1:#616671\">100,\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">true,\u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">2024-01-15\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">2,\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">Product B,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">29.99,\u003C/span>\u003Cspan style=\"--0:#809191;--1:#616671\">50,\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">false,\u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">2024-01-20\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">3,\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">Product C,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">39.99,\u003C/span>\u003Cspan style=\"--0:#809191;--1:#616671\">75,\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">true,\u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">2024-02-01\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"id,name,price,quantity,active,created_at1,Product A,19.99,100,true,2024-01-152,Product B,29.99,50,false,2024-01-203,Product C,39.99,75,true,2024-02-01\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>Infer the schema:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">products.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec table infer-schema products.csv --json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>Generated schema:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"json\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"properties\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: {\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"id\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">integer\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"name\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">string\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"price\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">number\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"quantity\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">integer\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"active\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">boolean\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"created_at\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">date\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> }\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"required\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: [\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">price\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">quantity\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">active\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">created_at\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"{ "properties": { "id": { "type": "integer" }, "name": { "type": "string" }, "price": { "type": "number" }, "quantity": { "type": "integer" }, "active": { "type": "boolean" }, "created_at": { "type": "date" } }, "required": ["id", "name", "price", "quantity", "active", "created_at"]}\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"render-table-schema\">Render Table Schema\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#render-table-schema\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Render Table Schema”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Render a Table Schema as human-readable HTML or Markdown documentation:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Render as Markdown\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">render-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-format\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">markdown\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Render as HTML\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">render-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-format\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">html\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Save to file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">render-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-format\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">markdown\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.md\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">render-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-format\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">html\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.html\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec table render-schema schema.json --to-format markdownfairspec table render-schema schema.json --to-format htmlfairspec table render-schema schema.json --to-format markdown --to-path schema.mdfairspec table render-schema schema.json --to-format html --to-path schema.html\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"output-formats\">Output Formats\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#output-formats\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Output Formats”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">markdown\u003C/code> - Generates Markdown documentation with column descriptions, types, and constraints\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">html\u003C/code> - Generates styled HTML table documentation\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"options-3\">Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#options-3\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--to-format <format>\u003C/code> (required) - Output format (markdown or html)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--to-path <path>\u003C/code> - Save to file instead of stdout\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--silent\u003C/code> - Suppress output messages\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--debug\u003C/code> - Show debug information\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"validate-table-schema\">Validate Table Schema\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#validate-table-schema\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Validate Table Schema”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Validate that a Table Schema file is valid:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate a schema file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate from remote source\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">https://example.com/schema.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Output as JSON\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec table validate-schema schema.jsonfairspec table validate-schema https://example.com/schema.jsonfairspec table validate-schema schema.json --json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"schema-validation\">Schema Validation\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#schema-validation\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Schema Validation”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>This validates that the schema itself is:\u003C/p>\n\u003Cul>\n\u003Cli>Valid JSON\u003C/li>\n\u003Cli>Compliant with Table Schema specification\u003C/li>\n\u003Cli>Has correct property definitions\u003C/li>\n\u003Cli>Uses valid column types and constraints\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"validation-report-1\">Validation Report\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#validation-report-1\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Validation Report”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"json\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"valid\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#FF5874;--1:#A54A4A\">true\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"errors\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: []\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"{ "valid": true, "errors": []}\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>Or if invalid:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"json\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"valid\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#FF5874;--1:#A54A4A\">false\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"errors\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: [\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">schema/invalid\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"message\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">Invalid column type: 'txt' (did you mean 'text'?)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"{ "valid": false, "errors": [ { "type": "schema/invalid", "message": "Invalid column type: 'txt' (did you mean 'text'?)" } ]}\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"options-4\">Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#options-4\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--silent\u003C/code> - Suppress output messages\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--debug\u003C/code> - Show debug information\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--json\u003C/code> - Output as JSON\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"infer-file-dialect\">Infer File Dialect\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#infer-file-dialect\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Infer File Dialect”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Automatically detect the dialect of a table file:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Infer dialect from file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-dialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Infer from remote file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-dialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">https://example.com/data.xlsx\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Output as JSON\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-dialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.parquet\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec table infer-dialect data.csvfairspec table infer-dialect https://example.com/data.xlsxfairspec table infer-dialect data.parquet --json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"detected-formats\">Detected Formats\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#detected-formats\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Detected Formats”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The command can detect:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">csv\u003C/code> - Comma-separated values\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">tsv\u003C/code> - Tab-separated values\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">json\u003C/code> - JSON format\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">jsonl\u003C/code> - JSON Lines (newline-delimited JSON)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">xlsx\u003C/code> - Excel spreadsheet\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">ods\u003C/code> - OpenDocument Spreadsheet\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">parquet\u003C/code> - Apache Parquet\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">arrow\u003C/code> - Apache Arrow/Feather\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">sqlite\u003C/code> - SQLite database\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"example-output\">Example Output\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#example-output\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Example Output”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"json\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"name\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"delimiter\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">,\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"quoteChar\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"{ "name": "csv", "delimiter": ",", "quoteChar": "\\""}\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"interactive-scripting\">Interactive Scripting\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#interactive-scripting\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Interactive Scripting”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Start an interactive REPL session with a loaded table:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load table and start REPL\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">script\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Script table from dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">script\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">users\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec table script data.csvfairspec table script --from-dataset dataset.json --from-resource users\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"available-in-session\">Available in Session\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#available-in-session\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Available in Session”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">fairspec\u003C/code> - Full fairspec library\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">table\u003C/code> - Loaded table (LazyFrame)\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"example-session\">Example Session\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#example-session\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Example Session”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">fairspec\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">LazyFrame { \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">...\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> }\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">fairspec\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> table.\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">collect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">DataFrame { \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">...\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> }\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">fairspec\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> table.\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">select\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">age\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">).\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">collect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">DataFrame { \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">...\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> }\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">fairspec\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> table.\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">filter\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">pl.\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">col\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">age\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">.\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">gt\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">25\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)).\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">collect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">DataFrame { \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">...\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> }\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec> tableLazyFrame { ... }fairspec> table.collect()DataFrame { ... }fairspec> table.select(["name", "age"]).collect()DataFrame { ... }fairspec> table.filter(pl.col("age").gt(25)).collect()DataFrame { ... }\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"format-options-1\">Format Options\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#format-options-1\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Format Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>All table commands support these format options for loading data:\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"csvtsv-options\">CSV/TSV Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#csvtsv-options\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “CSV/TSV Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--format <name>\u003C/code> - Format name (csv, tsv, etc.)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--delimiter <char>\u003C/code> - Column delimiter (default: \u003Ccode dir=\"auto\">,\u003C/code>)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--line-terminator <chars>\u003C/code> - Row terminator (default: \u003Ccode dir=\"auto\">\\n\u003C/code>)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--quote-char <char>\u003C/code> - Quote character (default: \u003Ccode dir=\"auto\">\"\u003C/code>)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--null-sequence <string>\u003C/code> - Null value indicator\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--header-rows <numbers>\u003C/code> - Header row indices (e.g., \u003Ccode dir=\"auto\">[1,2]\u003C/code>)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--header-join <char>\u003C/code> - Character to join multi-row headers\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--comment-rows <numbers>\u003C/code> - Comment row indices to skip\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--comment-prefix <char>\u003C/code> - Comment line prefix (e.g., \u003Ccode dir=\"auto\">#\u003C/code>)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--column-names <names>\u003C/code> - Override column names (JSON array)\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"json-options\">JSON Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#json-options\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “JSON Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--json-pointer <pointer>\u003C/code> - JSON pointer to data array (e.g., \u003Ccode dir=\"auto\">/data/users\u003C/code>)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--row-type <type>\u003C/code> - Row format: \u003Ccode dir=\"auto\">object\u003C/code> or \u003Ccode dir=\"auto\">array\u003C/code>\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"excelods-options\">Excel/ODS Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#excelods-options\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Excel/ODS Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--sheet-number <number>\u003C/code> - Sheet index (0-based)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--sheet-name <name>\u003C/code> - Sheet name\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"sqlite-options\">SQLite Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#sqlite-options\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “SQLite Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--table-name <name>\u003C/code> - Table name in database\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"table-schema-options\">Table Schema Options\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#table-schema-options\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Table Schema Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>All table commands support these schema-related options:\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"type-inference\">Type Inference\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#type-inference\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Type Inference”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--sample-rows <number>\u003C/code> - Sample size for type inference\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--confidence <number>\u003C/code> - Confidence threshold (0-1)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--keep-strings\u003C/code> - Don’t infer types, keep as strings\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--column-types <json>\u003C/code> - Override types (e.g., \u003Ccode dir=\"auto\">{\"age\":\"integer\"}\u003C/code>)\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"value-parsing\">Value Parsing\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#value-parsing\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Value Parsing”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--missing-values <values>\u003C/code> - Missing value indicators (JSON array)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--decimal-char <char>\u003C/code> - Decimal separator (default: \u003Ccode dir=\"auto\">.\u003C/code>)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--group-char <char>\u003C/code> - Thousands separator (default: \u003Ccode dir=\"auto\">,\u003C/code>)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--comma-decimal\u003C/code> - Use comma as decimal (shorthand)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--true-values <values>\u003C/code> - Custom true values (JSON array)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--false-values <values>\u003C/code> - Custom false values (JSON array)\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"datetime-parsing\">Date/Time Parsing\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#datetime-parsing\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Date/Time Parsing”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--datetime-format <format>\u003C/code> - Datetime format string\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--date-format <format>\u003C/code> - Date format string\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--time-format <format>\u003C/code> - Time format string\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--month-first\u003C/code> - Parse dates as month-first\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"arraylist-parsing\">Array/List Parsing\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#arraylist-parsing\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Array/List Parsing”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--array-type <type>\u003C/code> - Array item type\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--list-delimiter <char>\u003C/code> - List delimiter (default: \u003Ccode dir=\"auto\">;\u003C/code>)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--list-item-type <type>\u003C/code> - List item type\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"common-workflows\">Common Workflows\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#common-workflows\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Common Workflows”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"explore-unknown-data\">Explore Unknown Data\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#explore-unknown-data\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Explore Unknown Data”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 1. Infer the dialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-dialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">unknown-data.txt\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 2. Get basic statistics\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">unknown-data.txt\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 3. Infer the schema\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">unknown-data.txt\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 4. Query the data\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">query\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">unknown-data.txt\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">SELECT * FROM self LIMIT 10\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec table infer-dialect unknown-data.txtfairspec table describe unknown-data.txtfairspec table infer-schema unknown-data.txt --json > schema.jsonfairspec table query unknown-data.txt "SELECT * FROM self LIMIT 10"\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"schema-driven-validation\">Schema-Driven Validation\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#schema-driven-validation\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Schema-Driven Validation”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 1. Create schema from sample data\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">sample.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 2. Validate the schema itself\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 3. Generate documentation\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">render-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-format\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">markdown\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">docs.md\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 4. Validate production data\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">production.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--table-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec table infer-schema sample.csv --json > schema.jsonfairspec table validate-schema schema.jsonfairspec table render-schema schema.json --to-format markdown --to-path docs.mdfairspec table validate production.csv --table-schema schema.json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"data-quality-checks\">Data Quality Checks\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#data-quality-checks\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Data Quality Checks”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Check for data quality issues\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--table-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Get detailed statistics\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Query for specific issues\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">query\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">SELECT * FROM self WHERE email NOT LIKE '%@%'\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Find duplicates\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">query\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">SELECT id, COUNT(*) as cnt FROM self GROUP BY id HAVING cnt > 1\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec table validate data.csv --table-schema schema.jsonfairspec table describe data.csvfairspec table query data.csv "SELECT * FROM self WHERE email NOT LIKE '%@%'"fairspec table query data.csv "SELECT id, COUNT(*) as cnt FROM self GROUP BY id HAVING cnt > 1"\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"interactive-analysis\">Interactive Analysis\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#interactive-analysis\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Interactive Analysis”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Start interactive session\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">script\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># In REPL:\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># - Explore: table.head(10).collect()\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># - Filter: table.filter(pl.col(\"status\").eq(\"active\")).collect()\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># - Aggregate: table.group_by(\"category\").agg(pl.sum(\"amount\")).collect()\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># - Transform: table.with_columns(pl.col(\"price\").mul(1.1).alias(\"new_price\")).collect()\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec table script data.csv\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"format-conversion\">Format Conversion\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#format-conversion\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Format Conversion”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Query and output as JSON\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">query\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">SELECT * FROM self\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">output.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Get statistics and save\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">large-file.parquet\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">stats.json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec table query data.csv "SELECT * FROM self" --json > output.jsonfairspec table describe large-file.parquet --json > stats.json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"output-formats-1\">Output Formats\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#output-formats-1\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Output Formats”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"text-output-default\">Text Output (default)\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#text-output-default\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Text Output (default)”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Human-readable output with formatted tables:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec table describe data.csv\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>Output:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"plaintext\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#d6deeb;--1:#403f53\"># count mean std min max\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#d6deeb;--1:#403f53\">id 100 50.5 29.01 1 100\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#d6deeb;--1:#403f53\">price 100 29.99 15.43 9.99 99.99\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#d6deeb;--1:#403f53\">quantity 100 75 28.87 1 150\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"# count mean std min maxid 100 50.5 29.01 1 100price 100 29.99 15.43 9.99 99.99quantity 100 75 28.87 1 150\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"json-output\">JSON Output\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#json-output\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “JSON Output”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Machine-readable JSON for automation:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec table describe data.csv --json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"examples\">Examples\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#examples\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Examples”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"csv-data-analysis\">CSV Data Analysis\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#csv-data-analysis\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “CSV Data Analysis”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Get overview of sales data\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">sales.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Find top customers\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">query\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">sales.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">SELECT customer, SUM(amount) as total FROM self GROUP BY customer ORDER BY total DESC LIMIT 10\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate data quality\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">sales.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--table-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">sales-schema.json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec table describe sales.csvfairspec table query sales.csv \\ "SELECT customer, SUM(amount) as total FROM self GROUP BY customer ORDER BY total DESC LIMIT 10"fairspec table validate sales.csv --table-schema sales-schema.json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"multi-format-pipeline\">Multi-Format Pipeline\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#multi-format-pipeline\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Multi-Format Pipeline”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load Excel data\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">report.xlsx\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--sheet-name\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Q1 Sales\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Query specific sheet\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">query\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">report.xlsx\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--sheet-name\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Q1 Sales\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">SELECT region, SUM(revenue) FROM self GROUP BY region\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate against schema\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">report.xlsx\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--sheet-name\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Q1 Sales\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--table-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec table describe report.xlsx --sheet-name "Q1 Sales"fairspec table query report.xlsx --sheet-name "Q1 Sales" \\ "SELECT region, SUM(revenue) FROM self GROUP BY region"fairspec table validate report.xlsx --sheet-name "Q1 Sales" --table-schema schema.json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"remote-data-validation\">Remote Data Validation\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#remote-data-validation\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Remote Data Validation”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Infer schema from remote data\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">https://api.example.com/export.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">remote-schema.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate local data against remote schema\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">local-data.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--table-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">remote-schema.json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec table infer-schema https://api.example.com/export.csv --json > remote-schema.jsonfairspec table validate local-data.csv --table-schema remote-schema.json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"database-export-validation\">Database Export Validation\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#database-export-validation\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Database Export Validation”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate SQLite export\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">export.db\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--table-name\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">users\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--table-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">expected-schema.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Get statistics from database\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">export.db\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--table-name\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">users\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Query database table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">query\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">export.db\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--table-name\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">users\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">SELECT status, COUNT(*) FROM self GROUP BY status\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec table validate export.db --table-name users --table-schema expected-schema.jsonfairspec table describe export.db --table-name usersfairspec table query export.db --table-name users \\ "SELECT status, COUNT(*) FROM self GROUP BY status"\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>",{"headings":231,"localImagePaths":347,"remoteImagePaths":348,"frontmatter":349,"imagePaths":351},[232,233,236,237,238,241,244,247,248,251,252,253,256,259,260,261,264,265,266,269,270,271,273,274,275,276,279,282,285,287,290,293,296,299,302,305,308,311,314,315,318,321,324,327,330,332,333,334,335,338,341,344],{"depth":31,"slug":68,"text":69},{"depth":31,"slug":234,"text":235},"describe-tables","Describe Tables",{"depth":74,"slug":81,"text":82},{"depth":74,"slug":75,"text":76},{"depth":74,"slug":239,"text":240},"format-options","Format Options",{"depth":31,"slug":242,"text":243},"query-tables","Query Tables",{"depth":74,"slug":245,"text":246},"sql-syntax","SQL Syntax",{"depth":74,"slug":84,"text":76},{"depth":31,"slug":249,"text":250},"validate-tables","Validate Tables",{"depth":74,"slug":94,"text":95},{"depth":74,"slug":92,"text":76},{"depth":31,"slug":254,"text":255},"infer-table-schema","Infer Table Schema",{"depth":74,"slug":257,"text":258},"schema-inference-options","Schema Inference Options",{"depth":74,"slug":163,"text":164},{"depth":74,"slug":166,"text":167},{"depth":31,"slug":262,"text":263},"render-table-schema","Render Table Schema",{"depth":74,"slug":108,"text":109},{"depth":74,"slug":100,"text":76},{"depth":31,"slug":267,"text":268},"validate-table-schema","Validate Table Schema",{"depth":74,"slug":173,"text":174},{"depth":74,"slug":176,"text":95},{"depth":74,"slug":272,"text":76},"options-4",{"depth":31,"slug":97,"text":98},{"depth":74,"slug":180,"text":181},{"depth":74,"slug":86,"text":87},{"depth":31,"slug":277,"text":278},"interactive-scripting","Interactive Scripting",{"depth":74,"slug":280,"text":281},"available-in-session","Available in Session",{"depth":74,"slug":283,"text":284},"example-session","Example Session",{"depth":31,"slug":286,"text":240},"format-options-1",{"depth":74,"slug":288,"text":289},"csvtsv-options","CSV/TSV Options",{"depth":74,"slug":291,"text":292},"json-options","JSON Options",{"depth":74,"slug":294,"text":295},"excelods-options","Excel/ODS Options",{"depth":74,"slug":297,"text":298},"sqlite-options","SQLite Options",{"depth":31,"slug":300,"text":301},"table-schema-options","Table Schema Options",{"depth":74,"slug":303,"text":304},"type-inference","Type Inference",{"depth":74,"slug":306,"text":307},"value-parsing","Value Parsing",{"depth":74,"slug":309,"text":310},"datetime-parsing","Date/Time Parsing",{"depth":74,"slug":312,"text":313},"arraylist-parsing","Array/List Parsing",{"depth":31,"slug":184,"text":185},{"depth":74,"slug":316,"text":317},"explore-unknown-data","Explore Unknown Data",{"depth":74,"slug":319,"text":320},"schema-driven-validation","Schema-Driven Validation",{"depth":74,"slug":322,"text":323},"data-quality-checks","Data Quality Checks",{"depth":74,"slug":325,"text":326},"interactive-analysis","Interactive Analysis",{"depth":74,"slug":328,"text":329},"format-conversion","Format Conversion",{"depth":31,"slug":331,"text":109},"output-formats-1",{"depth":74,"slug":111,"text":112},{"depth":74,"slug":114,"text":115},{"depth":31,"slug":41,"text":42},{"depth":74,"slug":336,"text":337},"csv-data-analysis","CSV Data Analysis",{"depth":74,"slug":339,"text":340},"multi-format-pipeline","Multi-Format Pipeline",{"depth":74,"slug":342,"text":343},"remote-data-validation","Remote Data Validation",{"depth":74,"slug":345,"text":346},"database-export-validation","Database Export Validation",[],[],{"title":220,"sidebar":350},{"order":31,"label":223},[],"terminal/dataset",{"id":352,"data":354,"body":360,"filePath":361,"digest":362,"rendered":363},{"title":355,"editUrl":15,"head":356,"template":17,"sidebar":357,"pagefind":15,"draft":21},"Working with Datasets in Terminal",[],{"order":19,"label":358,"hidden":21,"attrs":359},"Dataset",{},"Dataset operations for managing collections of tabular resources with metadata and schemas.\n\n## Available Commands\n\nThe `fairspec dataset` command provides utilities for working with datasets:\n\n- `infer` - Automatically infer a dataset descriptor from data files\n- `copy` - Copy datasets to a local folder\n- `validate` - Validate dataset descriptors and their resources\n- `list` - List resources in a dataset\n- `script` - Interactive REPL session with loaded dataset\n\n## What is a Dataset?\n\nA dataset is a collection of related data resources (tables) with:\n- Metadata describing the dataset (title, description, license, etc.)\n- Resource definitions for each table (path, format, schema)\n- Table Schemas defining the structure of each resource\n- Relationships and foreign keys between resources\n\nDatasets use JSON descriptor files (often named `dataset.json`) following the Fairspec specification.\n\n## Infer Dataset\n\nAutomatically generate a dataset descriptor from data files:\n\n```bash\n# Infer from single file\nfairspec dataset infer data.csv\n\n# Infer from multiple files\nfairspec dataset infer users.csv products.csv orders.csv\n\n# Infer with remote files\nfairspec dataset infer https://example.com/data1.csv data2.csv\n\n# Save to descriptor file\nfairspec dataset infer *.csv --json > dataset.json\n```\n\n### Inference Process\n\nThe infer command automatically:\n1. Detects format for each file (CSV, JSON, Excel, etc.)\n2. Infers Table Schema for each resource\n3. Generates resource names from file names\n4. Creates a complete dataset descriptor\n\n### Options\n\n- `--debug` - Show debug information\n- `--json` - Output as JSON\n\n### Format Options\n\nFormat detection and schema inference can be customized:\n\n- `--delimiter \u003Cchar>` - CSV delimiter\n- `--header-rows \u003Cnumbers>` - Header row indices (JSON array)\n- `--sample-rows \u003Cnumber>` - Sample size for schema inference\n- `--confidence \u003Cnumber>` - Confidence threshold for type detection\n- `--column-types \u003Cjson>` - Override types for specific columns\n- `--keep-strings` - Keep original string types\n- `--comma-decimal` - Treat comma as decimal separator\n- `--month-first` - Parse dates as month-first\n\n### Generated Descriptor\n\nExample generated dataset descriptor:\n\n```json\n{\n \"resources\": [\n {\n \"name\": \"users\",\n \"data\": \"users.csv\",\n \"format\": {\n \"name\": \"csv\",\n \"delimiter\": \",\"\n },\n \"tableSchema\": {\n \"properties\": {\n \"id\": { \"type\": \"integer\" },\n \"name\": { \"type\": \"string\" },\n \"email\": { \"type\": \"string\" },\n \"created_at\": { \"type\": \"date\" }\n },\n \"required\": [\"id\", \"name\", \"email\"]\n }\n },\n {\n \"name\": \"orders\",\n \"data\": \"orders.csv\",\n \"format\": {\n \"name\": \"csv\"\n },\n \"tableSchema\": {\n \"properties\": {\n \"order_id\": { \"type\": \"integer\" },\n \"user_id\": { \"type\": \"integer\" },\n \"amount\": { \"type\": \"number\" },\n \"status\": { \"type\": \"string\" }\n }\n }\n }\n ]\n}\n```\n\n## Copy Dataset\n\nCopy a dataset and all its resources to a local folder:\n\n```bash\n# Copy dataset to local folder\nfairspec dataset copy dataset.json --to-path ./local-dataset\n\n# Copy remote dataset\nfairspec dataset copy https://example.com/dataset.json --to-path ./dataset\n\n# Silent mode for automation\nfairspec dataset copy dataset.json --to-path ./output --silent\n```\n\n### Copy Behavior\n\nThe copy command:\n- Downloads all remote resources\n- Preserves directory structure\n- Updates resource paths in the descriptor to point to local files\n- Creates the target directory if it doesn't exist\n- Saves the updated descriptor to the target location\n\n### Options\n\n- `--to-path \u003Cpath>` (required) - Target directory path\n- `--silent` - Suppress output messages\n- `--debug` - Show debug information\n- `--json` - Output as JSON\n\n### Example\n\nGiven a dataset with remote resources:\n```json\n{\n \"resources\": [\n {\n \"name\": \"users\",\n \"data\": \"https://example.com/data/users.csv\"\n },\n {\n \"name\": \"products\",\n \"data\": \"https://example.com/data/products.csv\"\n }\n ]\n}\n```\n\nAfter copying:\n```bash\nfairspec dataset copy dataset.json --to-path ./local\n```\n\nResults in:\n```\n./local/\n dataset.json # Updated descriptor\n users.csv # Downloaded resource\n products.csv # Downloaded resource\n```\n\n## Validate Dataset\n\nValidate a dataset descriptor and all its resources:\n\n```bash\n# Validate local dataset\nfairspec dataset validate dataset.json\n\n# Validate remote dataset\nfairspec dataset validate https://example.com/dataset.json\n\n# Output validation report as JSON\nfairspec dataset validate dataset.json --json\n```\n\n### Validation Checks\n\nThe validate command checks:\n- **Descriptor validity** - Valid JSON and conforms to Data Package spec\n- **Resource existence** - All referenced resources can be loaded\n- **Schema validation** - Each resource validates against its Table Schema\n- **Referential integrity** - Foreign key relationships are valid\n- **Format compliance** - Resources match their declared formats\n\n### Validation Report\n\nReturns a validation report with:\n- `valid` - Boolean indicating if validation passed\n- `errors` - Array of validation errors (if any)\n\nExample validation errors:\n```json\n{\n \"valid\": false,\n \"errors\": [\n {\n \"type\": \"dataset/resource-not-found\",\n \"resourceName\": \"users\",\n \"message\": \"Resource file 'users.csv' not found\"\n },\n {\n \"type\": \"table/schema\",\n \"resourceName\": \"orders\",\n \"rowNumber\": 15,\n \"propertyName\": \"amount\",\n \"message\": \"value must be a number\"\n },\n {\n \"type\": \"dataset/foreign-key\",\n \"resourceName\": \"orders\",\n \"message\": \"Foreign key 'user_id' references non-existent value in 'users'\"\n }\n ]\n}\n```\n\n### Options\n\n- `--debug` - Show debug information\n- `--json` - Output as JSON\n\n## List Resources\n\nList all resources in a dataset:\n\n```bash\n# List resources\nfairspec dataset list dataset.json\n\n# List from remote dataset\nfairspec dataset list https://example.com/dataset.json\n\n# Output as JSON array\nfairspec dataset list dataset.json --json\n```\n\n### Output\n\nReturns an array of resource names in the dataset:\n\nText output:\n```\nusers\nproducts\norders\ntransactions\n```\n\nJSON output:\n```json\n[\"users\", \"products\", \"orders\", \"transactions\"]\n```\n\n### Options\n\n- `--debug` - Show debug information\n- `--json` - Output as JSON\n\n## Interactive Scripting\n\nStart an interactive REPL session with a loaded dataset:\n\n```bash\n# Load dataset and start REPL\nfairspec dataset script dataset.json\n\n# Script remote dataset\nfairspec dataset script https://example.com/dataset.json\n```\n\n### Available in Session\n\n- `fairspec` - Full fairspec library\n- `dataset` - Loaded dataset descriptor\n\n### Example Session\n\n```python\nfairspec> dataset\n{\n \"resources\": [\n {\"name\": \"users\", \"data\": \"users.csv\", ...},\n {\"name\": \"orders\", \"data\": \"orders.csv\", ...}\n ]\n}\n\nfairspec> len(dataset.resources)\n2\n\nfairspec> dataset.resources[0].name\n'users'\n\nfairspec> table = fairspec.load_table(dataset.resources[0])\nfairspec> table.head(5).collect()\nDataFrame { ... }\n```\n\n## Common Workflows\n\n### Create Dataset from Files\n\n```bash\n# 1. Infer dataset from multiple files\nfairspec dataset infer data/*.csv --json > dataset.json\n\n# 2. Manually edit dataset.json to add:\n# - Title and description\n# - License information\n# - Foreign key relationships\n# - Additional metadata\n\n# 3. Validate the dataset\nfairspec dataset validate dataset.json\n\n# 4. List resources to confirm\nfairspec dataset list dataset.json\n```\n\n### Clone Remote Dataset\n\n```bash\n# 1. Copy remote dataset locally\nfairspec dataset copy https://example.com/dataset.json --to-path ./local-data\n\n# 2. Validate local copy\nfairspec dataset validate ./local-data/dataset.json\n\n# 3. List resources\nfairspec dataset list ./local-data/dataset.json\n```\n\n### Dataset Quality Assurance\n\n```bash\n# 1. Validate the dataset\nfairspec dataset validate dataset.json\n\n# 2. If validation fails, check individual resources\nfairspec table validate --from-dataset dataset.json --from-resource users\n\n# 3. Inspect resource schemas\nfairspec table infer-schema --from-dataset dataset.json --from-resource users\n\n# 4. Generate schema documentation\nfairspec table render-schema schema.json --to-format markdown --to-path docs/users-schema.md\n```\n\n### Dataset Evolution\n\n```bash\n# 1. Start with existing dataset\nfairspec dataset validate old-dataset.json\n\n# 2. Add new data files\nfairspec dataset infer old-data/*.csv new-data/*.csv --json > dataset.json\n\n# 3. Merge metadata from old descriptor\n# (manual step - copy title, license, etc.)\n\n# 4. Validate updated dataset\nfairspec dataset validate dataset.json\n\n# 5. Verify all resources\nfairspec dataset list dataset.json\n```\n\n### Automation and CI/CD\n\n```bash\n#!/bin/bash\n\n# Validate dataset in CI pipeline\nif fairspec dataset validate dataset.json --json | jq -e '.valid'; then\n echo \"✓ Dataset validation passed\"\n exit 0\nelse\n echo \"✗ Dataset validation failed\"\n fairspec dataset validate dataset.json\n exit 1\nfi\n```\n\n## Output Formats\n\n### Text Output (default)\n\nHuman-readable output with colors and formatting:\n\n```bash\nfairspec dataset list dataset.json\n```\n\nOutput:\n```\nusers\nproducts\norders\n```\n\n### JSON Output\n\nMachine-readable JSON for automation and scripting:\n\n```bash\nfairspec dataset validate dataset.json --json\n```\n\n### Silent Mode\n\nSuppress all output except errors (for copy command):\n\n```bash\nfairspec dataset copy dataset.json --to-path ./output --silent\n```\n\nUse exit code to check success:\n```bash\nif fairspec dataset copy dataset.json --to-path ./output --silent; then\n echo \"Success\"\nelse\n echo \"Failed\"\nfi\n```\n\n## Examples\n\n### Create Multi-Table Dataset\n\n```bash\n# Prepare your data files\n# - customers.csv\n# - orders.csv\n# - products.csv\n\n# Infer the dataset\nfairspec dataset infer customers.csv orders.csv products.csv --json > dataset.json\n\n# Enhance the descriptor\ncat > dataset.json \u003C\u003C 'EOF'\n{\n \"name\": \"sales-data\",\n \"title\": \"Sales Database Export\",\n \"description\": \"Customer orders and product catalog\",\n \"license\": \"CC-BY-4.0\",\n \"resources\": [\n {\n \"name\": \"customers\",\n \"data\": \"customers.csv\",\n \"tableSchema\": { \"properties\": { ... } }\n },\n {\n \"name\": \"orders\",\n \"data\": \"orders.csv\",\n \"tableSchema\": {\n \"properties\": { ... },\n \"foreignKeys\": [\n {\n \"columns\": [\"customer_id\"],\n \"reference\": {\n \"resource\": \"customers\",\n \"columns\": [\"id\"]\n }\n }\n ]\n }\n }\n ]\n}\nEOF\n\n# Validate\nfairspec dataset validate dataset.json\n```\n\n### Download and Validate Public Dataset\n\n```bash\n# Copy public dataset\nfairspec dataset copy https://data.example.org/climate/dataset.json \\\n --to-path ./climate-data\n\n# Validate local copy\nfairspec dataset validate ./climate-data/dataset.json\n\n# List available resources\nfairspec dataset list ./climate-data/dataset.json\n\n# Explore specific resource\nfairspec table describe --from-dataset ./climate-data/dataset.json \\\n --from-resource temperature\n```\n\n### Dataset Testing\n\n```bash\n# test-dataset.sh\n\necho \"Testing dataset integrity...\"\n\n# 1. Validate descriptor\nif ! fairspec dataset validate dataset.json --silent; then\n echo \"✗ Dataset validation failed\"\n fairspec dataset validate dataset.json\n exit 1\nfi\n\n# 2. Check all resources exist\nfor resource in $(fairspec dataset list dataset.json --json | jq -r '.[]'); do\n echo \"Checking resource: $resource\"\n if ! fairspec table describe --from-dataset dataset.json --from-resource \"$resource\" --silent; then\n echo \"✗ Resource $resource could not be loaded\"\n exit 1\n fi\ndone\n\necho \"✓ All tests passed\"\n```\n\n### Interactive Data Exploration\n\n```bash\n# Start interactive session\nfairspec dataset script dataset.json\n\n# In REPL, explore the dataset:\n```\n\n```python\n# List all resources\n[r.name for r in dataset.resources]\n\n# Load a specific resource\nusers = fairspec.load_table(next(r for r in dataset.resources if r.name == \"users\"))\n\n# Query the data\nactive_users = users.filter(pl.col(\"active\").eq(True)).collect()\nprint(active_users)\n\n# Check schema\nprint(dataset.resources[0].tableSchema)\n```\n\n## Working with Resources\n\nAll dataset commands integrate with table commands through the `--from-dataset` and `--from-resource` options:\n\n```bash\n# Load resource from dataset\nfairspec table describe --from-dataset dataset.json --from-resource users\n\n# Query resource\nfairspec table query --from-dataset dataset.json --from-resource orders \\\n \"SELECT * FROM self WHERE status = 'shipped'\"\n\n# Validate resource\nfairspec table validate --from-dataset dataset.json --from-resource products\n\n# Infer resource schema\nfairspec table infer-schema --from-dataset dataset.json --from-resource users\n```\n\nThis approach allows you to:\n- Work with resources without specifying paths or formats\n- Use embedded Table Schemas automatically\n- Maintain consistency across your dataset\n- Simplify command-line usage","content/docs/terminal/dataset.md","0518aa8ddd84c252",{"html":364,"metadata":365},"\u003Cp>Dataset operations for managing collections of tabular resources with metadata and schemas.\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"available-commands\">Available Commands\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#available-commands\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Available Commands”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The \u003Ccode dir=\"auto\">fairspec dataset\u003C/code> command provides utilities for working with datasets:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">infer\u003C/code> - Automatically infer a dataset descriptor from data files\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">copy\u003C/code> - Copy datasets to a local folder\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">validate\u003C/code> - Validate dataset descriptors and their resources\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">list\u003C/code> - List resources in a dataset\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">script\u003C/code> - Interactive REPL session with loaded dataset\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"what-is-a-dataset\">What is a Dataset?\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#what-is-a-dataset\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “What is a Dataset?”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>A dataset is a collection of related data resources (tables) with:\u003C/p>\n\u003Cul>\n\u003Cli>Metadata describing the dataset (title, description, license, etc.)\u003C/li>\n\u003Cli>Resource definitions for each table (path, format, schema)\u003C/li>\n\u003Cli>Table Schemas defining the structure of each resource\u003C/li>\n\u003Cli>Relationships and foreign keys between resources\u003C/li>\n\u003C/ul>\n\u003Cp>Datasets use JSON descriptor files (often named \u003Ccode dir=\"auto\">dataset.json\u003C/code>) following the Fairspec specification.\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"infer-dataset\">Infer Dataset\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#infer-dataset\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Infer Dataset”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Automatically generate a dataset descriptor from data files:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Clink rel=\"stylesheet\" href=\"/_astro/ec.8ol6u.css\">\u003Cscript type=\"module\" src=\"/_astro/ec.0vx5m.js\">\u003C/script>\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Infer from single file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Infer from multiple files\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">users.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">products.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">orders.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Infer with remote files\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">https://example.com/data1.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data2.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Save to descriptor file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">*\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec dataset infer data.csvfairspec dataset infer users.csv products.csv orders.csvfairspec dataset infer https://example.com/data1.csv data2.csvfairspec dataset infer *.csv --json > dataset.json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"inference-process\">Inference Process\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#inference-process\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Inference Process”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The infer command automatically:\u003C/p>\n\u003Col>\n\u003Cli>Detects format for each file (CSV, JSON, Excel, etc.)\u003C/li>\n\u003Cli>Infers Table Schema for each resource\u003C/li>\n\u003Cli>Generates resource names from file names\u003C/li>\n\u003Cli>Creates a complete dataset descriptor\u003C/li>\n\u003C/ol>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"options\">Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#options\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--debug\u003C/code> - Show debug information\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--json\u003C/code> - Output as JSON\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"format-options\">Format Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#format-options\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Format Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Format detection and schema inference can be customized:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--delimiter <char>\u003C/code> - CSV delimiter\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--header-rows <numbers>\u003C/code> - Header row indices (JSON array)\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--sample-rows <number>\u003C/code> - Sample size for schema inference\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--confidence <number>\u003C/code> - Confidence threshold for type detection\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--column-types <json>\u003C/code> - Override types for specific columns\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--keep-strings\u003C/code> - Keep original string types\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--comma-decimal\u003C/code> - Treat comma as decimal separator\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--month-first\u003C/code> - Parse dates as month-first\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"generated-descriptor\">Generated Descriptor\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#generated-descriptor\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Generated Descriptor”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Example generated dataset descriptor:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"json\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"resources\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: [\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"name\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">users\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"data\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">users.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"format\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: {\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"name\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"delimiter\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">,\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"tableSchema\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: {\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"properties\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: {\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"id\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">integer\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"name\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">string\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"email\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">string\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"created_at\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">date\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> }\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"required\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: [\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">email\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"name\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">orders\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"data\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">orders.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"format\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: {\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"name\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"tableSchema\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: {\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"properties\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: {\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"order_id\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">integer\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"user_id\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">integer\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"amount\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">number\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"status\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: { \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">string\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> }\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"{ "resources": [ { "name": "users", "data": "users.csv", "format": { "name": "csv", "delimiter": "," }, "tableSchema": { "properties": { "id": { "type": "integer" }, "name": { "type": "string" }, "email": { "type": "string" }, "created_at": { "type": "date" } }, "required": ["id", "name", "email"] } }, { "name": "orders", "data": "orders.csv", "format": { "name": "csv" }, "tableSchema": { "properties": { "order_id": { "type": "integer" }, "user_id": { "type": "integer" }, "amount": { "type": "number" }, "status": { "type": "string" } } } } ]}\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"copy-dataset\">Copy Dataset\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#copy-dataset\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Copy Dataset”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Copy a dataset and all its resources to a local folder:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Copy dataset to local folder\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">copy\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">./local-dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Copy remote dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">copy\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">https://example.com/dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">./dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Silent mode for automation\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">copy\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">./output\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--silent\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec dataset copy dataset.json --to-path ./local-datasetfairspec dataset copy https://example.com/dataset.json --to-path ./datasetfairspec dataset copy dataset.json --to-path ./output --silent\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"copy-behavior\">Copy Behavior\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#copy-behavior\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Copy Behavior”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The copy command:\u003C/p>\n\u003Cul>\n\u003Cli>Downloads all remote resources\u003C/li>\n\u003Cli>Preserves directory structure\u003C/li>\n\u003Cli>Updates resource paths in the descriptor to point to local files\u003C/li>\n\u003Cli>Creates the target directory if it doesn’t exist\u003C/li>\n\u003Cli>Saves the updated descriptor to the target location\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"options-1\">Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#options-1\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--to-path <path>\u003C/code> (required) - Target directory path\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--silent\u003C/code> - Suppress output messages\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--debug\u003C/code> - Show debug information\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--json\u003C/code> - Output as JSON\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"example\">Example\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#example\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Example”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Given a dataset with remote resources:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"json\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"resources\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: [\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"name\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">users\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"data\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">https://example.com/data/users.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"name\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">products\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"data\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">https://example.com/data/products.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"{ "resources": [ { "name": "users", "data": "https://example.com/data/users.csv" }, { "name": "products", "data": "https://example.com/data/products.csv" } ]}\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>After copying:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">copy\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">./local\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec dataset copy dataset.json --to-path ./local\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>Results in:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"plaintext\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#d6deeb;--1:#403f53\">./local/\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#d6deeb;--1:#403f53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#d6deeb;--1:#403f53\">dataset.json # Updated descriptor\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#d6deeb;--1:#403f53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#d6deeb;--1:#403f53\">users.csv # Downloaded resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#d6deeb;--1:#403f53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#d6deeb;--1:#403f53\">products.csv # Downloaded resource\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"./local/ dataset.json # Updated descriptor users.csv # Downloaded resource products.csv # Downloaded resource\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"validate-dataset\">Validate Dataset\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#validate-dataset\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Validate Dataset”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Validate a dataset descriptor and all its resources:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate local dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate remote dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">https://example.com/dataset.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Output validation report as JSON\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec dataset validate dataset.jsonfairspec dataset validate https://example.com/dataset.jsonfairspec dataset validate dataset.json --json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"validation-checks\">Validation Checks\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#validation-checks\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Validation Checks”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The validate command checks:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Cstrong>Descriptor validity\u003C/strong> - Valid JSON and conforms to Data Package spec\u003C/li>\n\u003Cli>\u003Cstrong>Resource existence\u003C/strong> - All referenced resources can be loaded\u003C/li>\n\u003Cli>\u003Cstrong>Schema validation\u003C/strong> - Each resource validates against its Table Schema\u003C/li>\n\u003Cli>\u003Cstrong>Referential integrity\u003C/strong> - Foreign key relationships are valid\u003C/li>\n\u003Cli>\u003Cstrong>Format compliance\u003C/strong> - Resources match their declared formats\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"validation-report\">Validation Report\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#validation-report\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Validation Report”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Returns a validation report with:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">valid\u003C/code> - Boolean indicating if validation passed\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">errors\u003C/code> - Array of validation errors (if any)\u003C/li>\n\u003C/ul>\n\u003Cp>Example validation errors:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"json\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"valid\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#FF5874;--1:#A54A4A\">false\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"errors\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: [\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">dataset/resource-not-found\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"resourceName\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">users\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"message\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">Resource file 'users.csv' not found\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">table/schema\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"resourceName\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">orders\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"rowNumber\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">15\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"propertyName\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">amount\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"message\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">value must be a number\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"type\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">dataset/foreign-key\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"resourceName\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">orders\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"message\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">Foreign key 'user_id' references non-existent value in 'users'\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"{ "valid": false, "errors": [ { "type": "dataset/resource-not-found", "resourceName": "users", "message": "Resource file 'users.csv' not found" }, { "type": "table/schema", "resourceName": "orders", "rowNumber": 15, "propertyName": "amount", "message": "value must be a number" }, { "type": "dataset/foreign-key", "resourceName": "orders", "message": "Foreign key 'user_id' references non-existent value in 'users'" } ]}\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"options-2\">Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#options-2\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--debug\u003C/code> - Show debug information\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--json\u003C/code> - Output as JSON\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"list-resources\">List Resources\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#list-resources\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “List Resources”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>List all resources in a dataset:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># List resources\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">list\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># List from remote dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">list\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">https://example.com/dataset.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Output as JSON array\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">list\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec dataset list dataset.jsonfairspec dataset list https://example.com/dataset.jsonfairspec dataset list dataset.json --json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"output\">Output\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#output\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Output”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Returns an array of resource names in the dataset:\u003C/p>\n\u003Cp>Text output:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"plaintext\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#d6deeb;--1:#403f53\">users\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#d6deeb;--1:#403f53\">products\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#d6deeb;--1:#403f53\">orders\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#d6deeb;--1:#403f53\">transactions\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"usersproductsorderstransactions\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>JSON output:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"json\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">users\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">products\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">orders\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">transactions\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">]\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"["users", "products", "orders", "transactions"]\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"options-3\">Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#options-3\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">--debug\u003C/code> - Show debug information\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">--json\u003C/code> - Output as JSON\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"interactive-scripting\">Interactive Scripting\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#interactive-scripting\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Interactive Scripting”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Start an interactive REPL session with a loaded dataset:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load dataset and start REPL\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">script\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Script remote dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">script\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">https://example.com/dataset.json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec dataset script dataset.jsonfairspec dataset script https://example.com/dataset.json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"available-in-session\">Available in Session\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#available-in-session\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Available in Session”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">fairspec\u003C/code> - Full fairspec library\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">dataset\u003C/code> - Loaded dataset descriptor\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"example-session\">Example Session\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#example-session\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Example Session”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">fairspec\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">resources\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">users\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">users.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">...\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">orders\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">orders.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">...\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">fairspec\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">len\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">dataset.resources\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">2\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">fairspec\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> dataset.resources[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">0\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">].name\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">'\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">users\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">'\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">fairspec\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec.\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">dataset.resources\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">0\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">])\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">fairspec\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> table.\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">head\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">5\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">).\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">collect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">DataFrame { \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">...\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> }\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec> dataset{ "resources": [ {"name": "users", "data": "users.csv", ...}, {"name": "orders", "data": "orders.csv", ...} ]}fairspec> len(dataset.resources)2fairspec> dataset.resources[0].name'users'fairspec> table = fairspec.load_table(dataset.resources[0])fairspec> table.head(5).collect()DataFrame { ... }\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"common-workflows\">Common Workflows\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#common-workflows\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Common Workflows”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"create-dataset-from-files\">Create Dataset from Files\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#create-dataset-from-files\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Create Dataset from Files”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 1. Infer dataset from multiple files\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">data/\u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">*\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 2. Manually edit dataset.json to add:\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># - Title and description\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># - License information\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># - Foreign key relationships\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># - Additional metadata\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 3. Validate the dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 4. List resources to confirm\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">list\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec dataset infer data/*.csv --json > dataset.jsonfairspec dataset validate dataset.jsonfairspec dataset list dataset.json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"clone-remote-dataset\">Clone Remote Dataset\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#clone-remote-dataset\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Clone Remote Dataset”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 1. Copy remote dataset locally\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">copy\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">https://example.com/dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">./local-data\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 2. Validate local copy\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">./local-data/dataset.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 3. List resources\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">list\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">./local-data/dataset.json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec dataset copy https://example.com/dataset.json --to-path ./local-datafairspec dataset validate ./local-data/dataset.jsonfairspec dataset list ./local-data/dataset.json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"dataset-quality-assurance\">Dataset Quality Assurance\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#dataset-quality-assurance\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Dataset Quality Assurance”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 1. Validate the dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 2. If validation fails, check individual resources\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">users\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 3. Inspect resource schemas\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">users\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 4. Generate schema documentation\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">render-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">schema.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-format\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">markdown\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">docs/users-schema.md\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec dataset validate dataset.jsonfairspec table validate --from-dataset dataset.json --from-resource usersfairspec table infer-schema --from-dataset dataset.json --from-resource usersfairspec table render-schema schema.json --to-format markdown --to-path docs/users-schema.md\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"dataset-evolution\">Dataset Evolution\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#dataset-evolution\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Dataset Evolution”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 1. Start with existing dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">old-dataset.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 2. Add new data files\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">old-data/\u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">*\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">new-data/\u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">*\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 3. Merge metadata from old descriptor\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># (manual step - copy title, license, etc.)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 4. Validate updated dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 5. Verify all resources\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">list\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec dataset validate old-dataset.jsonfairspec dataset infer old-data/*.csv new-data/*.csv --json > dataset.jsonfairspec dataset validate dataset.jsonfairspec dataset list dataset.json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"automation-and-cicd\">Automation and CI/CD\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#automation-and-cicd\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Automation and CI/CD”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\">#!/bin/bash\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate dataset in CI pipeline\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">if\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">|\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">jq\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">-e\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">'\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">.valid\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">'\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">; \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">then\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">echo\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">✓ Dataset validation passed\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">exit\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">0\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">else\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">echo\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">✗ Dataset validation failed\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">exit\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">fi\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"#!/bin/bash# Validate dataset in CI pipelineif fairspec dataset validate dataset.json --json | jq -e '.valid'; then echo "✓ Dataset validation passed" exit 0else echo "✗ Dataset validation failed" fairspec dataset validate dataset.json exit 1fi\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"output-formats\">Output Formats\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#output-formats\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Output Formats”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"text-output-default\">Text Output (default)\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#text-output-default\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Text Output (default)”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Human-readable output with colors and formatting:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">list\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec dataset list dataset.json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>Output:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"plaintext\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#d6deeb;--1:#403f53\">users\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#d6deeb;--1:#403f53\">products\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#d6deeb;--1:#403f53\">orders\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"usersproductsorders\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"json-output\">JSON Output\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#json-output\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “JSON Output”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Machine-readable JSON for automation and scripting:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec dataset validate dataset.json --json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"silent-mode\">Silent Mode\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#silent-mode\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Silent Mode”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Suppress all output except errors (for copy command):\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">copy\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">./output\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--silent\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec dataset copy dataset.json --to-path ./output --silent\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>Use exit code to check success:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">if\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">copy\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">./output\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--silent\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">; \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">then\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">echo\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Success\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">else\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">echo\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Failed\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">fi\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"if fairspec dataset copy dataset.json --to-path ./output --silent; then echo "Success"else echo "Failed"fi\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"examples\">Examples\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#examples\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Examples”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"create-multi-table-dataset\">Create Multi-Table Dataset\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#create-multi-table-dataset\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Create Multi-Table Dataset”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Prepare your data files\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># - customers.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># - orders.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># - products.csv\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Infer the dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">customers.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">orders.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">products.csv\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Enhance the descriptor\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">cat\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\"><<\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">'EOF'\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"name\": \"sales-data\",\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"title\": \"Sales Database Export\",\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"description\": \"Customer orders and product catalog\",\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"license\": \"CC-BY-4.0\",\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"resources\": [\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"name\": \"customers\",\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"data\": \"customers.csv\",\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"tableSchema\": { \"properties\": { ... } }\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"name\": \"orders\",\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"data\": \"orders.csv\",\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"tableSchema\": {\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"properties\": { ... },\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"foreignKeys\": [\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"columns\": [\"customer_id\"],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"reference\": {\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"resource\": \"customers\",\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"columns\": [\"id\"]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">EOF\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec dataset infer customers.csv orders.csv products.csv --json > dataset.jsoncat > dataset.json \u003C\u003C 'EOF'{ "name": "sales-data", "title": "Sales Database Export", "description": "Customer orders and product catalog", "license": "CC-BY-4.0", "resources": [ { "name": "customers", "data": "customers.csv", "tableSchema": { "properties": { ... } } }, { "name": "orders", "data": "orders.csv", "tableSchema": { "properties": { ... }, "foreignKeys": [ { "columns": ["customer_id"], "reference": { "resource": "customers", "columns": ["id"] } } ] } } ]}EOFfairspec dataset validate dataset.json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"download-and-validate-public-dataset\">Download and Validate Public Dataset\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#download-and-validate-public-dataset\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Download and Validate Public Dataset”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Copy public dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">copy\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">https://data.example.org/climate/dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--to-path\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">./climate-data\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate local copy\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">./climate-data/dataset.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># List available resources\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">list\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">./climate-data/dataset.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Explore specific resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">./climate-data/dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">temperature\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec dataset copy https://data.example.org/climate/dataset.json \\ --to-path ./climate-datafairspec dataset validate ./climate-data/dataset.jsonfairspec dataset list ./climate-data/dataset.jsonfairspec table describe --from-dataset ./climate-data/dataset.json \\ --from-resource temperature\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"dataset-testing\">Dataset Testing\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#dataset-testing\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Dataset Testing”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame has-title not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">test-dataset.sh\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">echo\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Testing dataset integrity...\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 1. Validate descriptor\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">if\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">!\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--silent\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">; \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">then\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">echo\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">✗ Dataset validation failed\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">exit\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">fi\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># 2. Check all resources exist\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">for\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">in\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> $(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">list\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">|\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">jq\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">-r\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">'\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">.[]\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">'\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">); \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">do\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">echo\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Checking resource: \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">$resource\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">if\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">!\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">$resource\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--silent\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">; \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">then\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">echo\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">✗ Resource \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">$resource\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\"> could not be loaded\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">exit\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">fi\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">done\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">echo\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">✓ All tests passed\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"echo "Testing dataset integrity..."# 1. Validate descriptorif ! fairspec dataset validate dataset.json --silent; then echo "✗ Dataset validation failed" fairspec dataset validate dataset.json exit 1fi# 2. Check all resources existfor resource in $(fairspec dataset list dataset.json --json | jq -r '.[]'); do echo "Checking resource: $resource" if ! fairspec table describe --from-dataset dataset.json --from-resource "$resource" --silent; then echo "✗ Resource $resource could not be loaded" exit 1 fidoneecho "✓ All tests passed"\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"interactive-data-exploration\">Interactive Data Exploration\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#interactive-data-exploration\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Interactive Data Exploration”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Start interactive session\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">script\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># In REPL, explore the dataset:\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec dataset script dataset.json\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># List all resources\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">r.name \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">for\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> r \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">in\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> dataset.resources\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load a specific resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">users \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec.\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">next\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">r \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">for\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> r \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">in\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> dataset.resources \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">if\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> r.name \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">==\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">users\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Query the data\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">active_users \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> users.\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">filter\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">pl.\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">col\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">active\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">.\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">eq\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#FF5874;--1:#A54A4A\">True\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)).\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">collect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">print\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">active_users\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Check schema\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C5E478;--1:#3C63B3\">print\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">dataset.resources\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">0\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">]\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">.tableSchema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"# List all resources[r.name for r in dataset.resources]# Load a specific resourceusers = fairspec.load_table(next(r for r in dataset.resources if r.name == "users"))# Query the dataactive_users = users.filter(pl.col("active").eq(True)).collect()print(active_users)# Check schemaprint(dataset.resources[0].tableSchema)\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"working-with-resources\">Working with Resources\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#working-with-resources\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Working with Resources”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>All dataset commands integrate with table commands through the \u003Ccode dir=\"auto\">--from-dataset\u003C/code> and \u003Ccode dir=\"auto\">--from-resource\u003C/code> options:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load resource from dataset\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">describe\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">users\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Query resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">query\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">orders\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">SELECT * FROM self WHERE status = 'shipped'\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Validate resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">validate\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">products\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Infer resource schema\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">fairspec\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">infer-schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-dataset\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">dataset.json\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">--from-resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">users\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"fairspec table describe --from-dataset dataset.json --from-resource usersfairspec table query --from-dataset dataset.json --from-resource orders \\ "SELECT * FROM self WHERE status = 'shipped'"fairspec table validate --from-dataset dataset.json --from-resource productsfairspec table infer-schema --from-dataset dataset.json --from-resource users\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cp>This approach allows you to:\u003C/p>\n\u003Cul>\n\u003Cli>Work with resources without specifying paths or formats\u003C/li>\n\u003Cli>Use embedded Table Schemas automatically\u003C/li>\n\u003Cli>Maintain consistency across your dataset\u003C/li>\n\u003Cli>Simplify command-line usage\u003C/li>\n\u003C/ul>",{"headings":366,"localImagePaths":442,"remoteImagePaths":443,"frontmatter":444,"imagePaths":446},[367,368,371,374,377,378,379,382,385,388,389,390,393,396,397,398,401,402,403,404,405,406,407,410,413,416,419,422,423,424,425,426,427,430,433,436,439],{"depth":31,"slug":68,"text":69},{"depth":31,"slug":369,"text":370},"what-is-a-dataset","What is a Dataset?",{"depth":31,"slug":372,"text":373},"infer-dataset","Infer Dataset",{"depth":74,"slug":375,"text":376},"inference-process","Inference Process",{"depth":74,"slug":75,"text":76},{"depth":74,"slug":239,"text":240},{"depth":74,"slug":380,"text":381},"generated-descriptor","Generated Descriptor",{"depth":31,"slug":383,"text":384},"copy-dataset","Copy Dataset",{"depth":74,"slug":386,"text":387},"copy-behavior","Copy Behavior",{"depth":74,"slug":84,"text":76},{"depth":74,"slug":166,"text":167},{"depth":31,"slug":391,"text":392},"validate-dataset","Validate Dataset",{"depth":74,"slug":394,"text":395},"validation-checks","Validation Checks",{"depth":74,"slug":94,"text":95},{"depth":74,"slug":92,"text":76},{"depth":31,"slug":399,"text":400},"list-resources","List Resources",{"depth":74,"slug":81,"text":82},{"depth":74,"slug":100,"text":76},{"depth":31,"slug":277,"text":278},{"depth":74,"slug":280,"text":281},{"depth":74,"slug":283,"text":284},{"depth":31,"slug":184,"text":185},{"depth":74,"slug":408,"text":409},"create-dataset-from-files","Create Dataset from Files",{"depth":74,"slug":411,"text":412},"clone-remote-dataset","Clone Remote Dataset",{"depth":74,"slug":414,"text":415},"dataset-quality-assurance","Dataset Quality Assurance",{"depth":74,"slug":417,"text":418},"dataset-evolution","Dataset Evolution",{"depth":74,"slug":420,"text":421},"automation-and-cicd","Automation and CI/CD",{"depth":31,"slug":108,"text":109},{"depth":74,"slug":111,"text":112},{"depth":74,"slug":114,"text":115},{"depth":74,"slug":117,"text":118},{"depth":31,"slug":41,"text":42},{"depth":74,"slug":428,"text":429},"create-multi-table-dataset","Create Multi-Table Dataset",{"depth":74,"slug":431,"text":432},"download-and-validate-public-dataset","Download and Validate Public Dataset",{"depth":74,"slug":434,"text":435},"dataset-testing","Dataset Testing",{"depth":74,"slug":437,"text":438},"interactive-data-exploration","Interactive Data Exploration",{"depth":31,"slug":440,"text":441},"working-with-resources","Working with Resources",[],[],{"title":355,"sidebar":445},{"order":19,"label":358},[],"overview/contributing",{"id":447,"data":449,"body":454,"filePath":455,"digest":456,"rendered":457},{"title":450,"editUrl":15,"head":451,"template":17,"sidebar":452,"pagefind":15,"draft":21},"Contributing",[],{"order":31,"hidden":21,"attrs":453},{},"Thank you for your interest in contributing to Fairspec Python! This document provides guidelines and instructions for contributing to this project.\n\n> [!NOTE]\n> This document is a work in progress.","content/docs/overview/contributing.md","a16fabf43754a0d2",{"html":458,"metadata":459},"\u003Cp>Thank you for your interest in contributing to Fairspec Python! This document provides guidelines and instructions for contributing to this project.\u003C/p>\n\u003Caside aria-label=\"Note\" class=\"starlight-aside starlight-aside--note\">\u003Cp class=\"starlight-aside__title\" aria-hidden=\"true\">\u003Csvg viewBox=\"0 0 24 24\" width=\"16\" height=\"16\" fill=\"currentColor\" class=\"starlight-aside__icon\">\u003Cpath d=\"M12 11C11.7348 11 11.4804 11.1054 11.2929 11.2929C11.1054 11.4804 11 11.7348 11 12V16C11 16.2652 11.1054 16.5196 11.2929 16.7071C11.4804 16.8946 11.7348 17 12 17C12.2652 17 12.5196 16.8946 12.7071 16.7071C12.8946 16.5196 13 16.2652 13 16V12C13 11.7348 12.8946 11.4804 12.7071 11.2929C12.5196 11.1054 12.2652 11 12 11ZM12.38 7.08C12.1365 6.97998 11.8635 6.97998 11.62 7.08C11.4973 7.12759 11.3851 7.19896 11.29 7.29C11.2017 7.3872 11.1306 7.49882 11.08 7.62C11.024 7.73868 10.9966 7.86882 11 8C10.9992 8.13161 11.0245 8.26207 11.0742 8.38391C11.124 8.50574 11.1973 8.61656 11.29 8.71C11.3872 8.79833 11.4988 8.86936 11.62 8.92C11.7715 8.98224 11.936 9.00632 12.099 8.99011C12.2619 8.97391 12.4184 8.91792 12.5547 8.82707C12.691 8.73622 12.8029 8.61328 12.8805 8.46907C12.9582 8.32486 12.9992 8.16378 13 8C12.9963 7.73523 12.8927 7.48163 12.71 7.29C12.6149 7.19896 12.5028 7.12759 12.38 7.08ZM12 2C10.0222 2 8.08879 2.58649 6.4443 3.6853C4.79981 4.78412 3.51809 6.3459 2.76121 8.17317C2.00433 10.0004 1.8063 12.0111 2.19215 13.9509C2.578 15.8907 3.53041 17.6725 4.92894 19.0711C6.32746 20.4696 8.10929 21.422 10.0491 21.8079C11.9889 22.1937 13.9996 21.9957 15.8268 21.2388C17.6541 20.4819 19.2159 19.2002 20.3147 17.5557C21.4135 15.9112 22 13.9778 22 12C22 10.6868 21.7413 9.38642 21.2388 8.17317C20.7363 6.95991 19.9997 5.85752 19.0711 4.92893C18.1425 4.00035 17.0401 3.26375 15.8268 2.7612C14.6136 2.25866 13.3132 2 12 2ZM12 20C10.4178 20 8.87104 19.5308 7.55544 18.6518C6.23985 17.7727 5.21447 16.5233 4.60897 15.0615C4.00347 13.5997 3.84504 11.9911 4.15372 10.4393C4.4624 8.88743 5.22433 7.46197 6.34315 6.34315C7.46197 5.22433 8.88743 4.4624 10.4393 4.15372C11.9911 3.84504 13.5997 4.00346 15.0615 4.60896C16.5233 5.21447 17.7727 6.23984 18.6518 7.55544C19.5308 8.87103 20 10.4177 20 12C20 14.1217 19.1572 16.1566 17.6569 17.6569C16.1566 19.1571 14.1217 20 12 20Z\">\u003C/path>\u003C/svg>Note\u003C/p>\u003Cdiv class=\"starlight-aside__content\">\u003Cp>This document is a work in progress.\u003C/p>\u003C/div>\u003C/aside>",{"headings":460,"localImagePaths":461,"remoteImagePaths":462,"frontmatter":463,"imagePaths":465},[],[],[],{"title":450,"sidebar":464},{"order":31},[],"python/arrow",{"id":466,"data":468,"body":475,"filePath":476,"digest":477,"rendered":478},{"title":469,"editUrl":15,"head":470,"template":17,"sidebar":471,"pagefind":15,"draft":21},"Working with Arrow in Python",[],{"order":472,"label":473,"hidden":21,"attrs":474},7,"Arrow",{},"Apache Arrow IPC file handling with high-performance columnar data processing.\n\n## Installation\n\n```bash\npip install fairspec\n```\n\n## Getting Started\n\nThe Arrow plugin provides:\n\n- `load_arrow_table` - Load Arrow IPC files into tables\n- `save_arrow_table` - Save tables to Arrow IPC files\n- `ArrowPlugin` - Plugin for framework integration\n\nFor example:\n\n```python\nfrom fairspec import load_arrow_table, Resource\n\ntable = load_arrow_table(Resource(data=\"table.arrow\"))\n# High-performance columnar format\n```\n\n## Basic Usage\n\n### Loading Arrow Files\n\n```python\nfrom fairspec import load_arrow_table, Resource\n\n# Load from local file\ntable = load_arrow_table(Resource(data=\"data.arrow\"))\n\n# Load from remote URL\ntable = load_arrow_table(Resource(data=\"https://example.com/data.arrow\"))\n\n# Load multiple files (concatenated)\ntable = load_arrow_table(Resource(data=[\"file1.arrow\", \"file2.arrow\"]))\n```\n\n### Saving Arrow Files\n\n```python\nfrom fairspec import save_arrow_table\n\n# Save with default options\nsave_arrow_table(table, path=\"output.arrow\")\n```\n\n## Advanced Features\n\n### Remote File Loading\n\n```python\nfrom fairspec import load_arrow_table, Resource\n\n# Load from URL\ntable = load_arrow_table(Resource(data=\"https://example.com/data.arrow\"))\n\n# Load multiple remote files\ntable = load_arrow_table(Resource(data=[\n \"https://api.example.com/data-2023.arrow\",\n \"https://api.example.com/data-2024.arrow\",\n]))\n```","content/docs/python/arrow.md","4b7dadb95f869e56",{"html":479,"metadata":480},"\u003Cp>Apache Arrow IPC file handling with high-performance columnar data processing.\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"installation\">Installation\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#installation\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Installation”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Clink rel=\"stylesheet\" href=\"/_astro/ec.8ol6u.css\">\u003Cscript type=\"module\" src=\"/_astro/ec.0vx5m.js\">\u003C/script>\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">pip\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">install\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">fairspec\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"pip install fairspec\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"getting-started\">Getting Started\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#getting-started\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Getting Started”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The Arrow plugin provides:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">load_arrow_table\u003C/code> - Load Arrow IPC files into tables\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">save_arrow_table\u003C/code> - Save tables to Arrow IPC files\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">ArrowPlugin\u003C/code> - Plugin for framework integration\u003C/li>\n\u003C/ul>\n\u003Cp>For example:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_arrow_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_arrow_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">table.arrow\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># High-performance columnar format\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_arrow_table, Resourcetable = load_arrow_table(Resource(data="table.arrow"))# High-performance columnar format\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"basic-usage\">Basic Usage\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#basic-usage\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Basic Usage”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"loading-arrow-files\">Loading Arrow Files\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#loading-arrow-files\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Loading Arrow Files”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_arrow_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load from local file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_arrow_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.arrow\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load from remote URL\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_arrow_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://example.com/data.arrow\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load multiple files (concatenated)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_arrow_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">file1.arrow\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">file2.arrow\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_arrow_table, Resource# Load from local filetable = load_arrow_table(Resource(data="data.arrow"))# Load from remote URLtable = load_arrow_table(Resource(data="https://example.com/data.arrow"))# Load multiple files (concatenated)table = load_arrow_table(Resource(data=["file1.arrow", "file2.arrow"]))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"saving-arrow-files\">Saving Arrow Files\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#saving-arrow-files\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Saving Arrow Files”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> save_arrow_table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Save with default options\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">save_arrow_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">path\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">output.arrow\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import save_arrow_table# Save with default optionssave_arrow_table(table, path="output.arrow")\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"advanced-features\">Advanced Features\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#advanced-features\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Advanced Features”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"remote-file-loading\">Remote File Loading\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#remote-file-loading\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Remote File Loading”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_arrow_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load from URL\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_arrow_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://example.com/data.arrow\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load multiple remote files\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_arrow_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://api.example.com/data-2023.arrow\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://api.example.com/data-2024.arrow\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_arrow_table, Resource# Load from URLtable = load_arrow_table(Resource(data="https://example.com/data.arrow"))# Load multiple remote filestable = load_arrow_table(Resource(data=[ "https://api.example.com/data-2023.arrow", "https://api.example.com/data-2024.arrow",]))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>",{"headings":481,"localImagePaths":500,"remoteImagePaths":501,"frontmatter":502,"imagePaths":504},[482,483,485,488,491,494,497],{"depth":31,"slug":35,"text":36},{"depth":31,"slug":484,"text":20},"getting-started",{"depth":31,"slug":486,"text":487},"basic-usage","Basic Usage",{"depth":74,"slug":489,"text":490},"loading-arrow-files","Loading Arrow Files",{"depth":74,"slug":492,"text":493},"saving-arrow-files","Saving Arrow Files",{"depth":31,"slug":495,"text":496},"advanced-features","Advanced Features",{"depth":74,"slug":498,"text":499},"remote-file-loading","Remote File Loading",[],[],{"title":469,"sidebar":503},{"label":473,"order":472},[],"python/csv",{"id":505,"data":507,"body":513,"filePath":514,"digest":515,"rendered":516},{"title":508,"editUrl":15,"head":509,"template":17,"sidebar":510,"pagefind":15,"draft":21},"Working with CSV in Python",[],{"order":19,"label":511,"hidden":21,"attrs":512},"CSV",{},"Comprehensive CSV file handling with automatic format detection, advanced header processing, and high-performance data operations.\n\n## Installation\n\n```bash\npip install fairspec\n```\n\n## Getting Started\n\nThe CSV plugin provides these capabilities:\n\n- `load_csv_table` - Load CSV/TSV files into tables\n- `save_csv_table` - Save tables to CSV/TSV files\n- `CsvPlugin` - Plugin for framework integration\n\nFor example:\n\n```python\nfrom fairspec import load_csv_table, Resource\n\ntable = load_csv_table(Resource(data=\"table.csv\"))\n# the column types will be automatically inferred\n# or you can provide a Table Schema\n```\n\n## Basic Usage\n\n### Loading CSV Files\n\n```python\nfrom fairspec import load_csv_table, Resource\nfrom fairspec_metadata import CsvFileDialect\n\n# Load a simple CSV file\ntable = load_csv_table(Resource(data=\"data.csv\"))\n\n# Load with custom format\ntable = load_csv_table(Resource(\n data=\"data.csv\",\n fileDialect=CsvFileDialect(\n delimiter=\";\",\n headerRows=[1],\n ),\n))\n\n# Load multiple CSV files (concatenated)\ntable = load_csv_table(Resource(data=[\"part1.csv\", \"part2.csv\", \"part3.csv\"]))\n```\n\n### Saving CSV Files\n\n```python\nfrom fairspec import save_csv_table\nfrom fairspec_metadata import CsvFileDialect\n\n# Save with default options\nsave_csv_table(table, path=\"output.csv\")\n\n# Save with custom format\nsave_csv_table(table, path=\"output.csv\", fileDialect=CsvFileDialect(\n delimiter=\"\\t\",\n quoteChar=\"'\",\n))\n\n# Save as TSV\nsave_csv_table(table, path=\"output.tsv\", fileDialect=CsvFileDialect(delimiter=\"\\t\"))\n```\n\n### Automatic Format Detection\n\n```python\nfrom fairspec import load_csv_table, Resource\nfrom fairspec_metadata import CsvFileDialect\n\n# Format is automatically detected when not specified\ntable = load_csv_table(Resource(data=\"unknown-dialect.csv\"))\n# The CSV plugin will automatically infer delimiter, quote characters, etc.\n\n# You can also explicitly specify the format if detection isn't accurate\ntable = load_csv_table(Resource(\n data=\"data.csv\",\n fileDialect=CsvFileDialect(\n delimiter=\",\",\n quoteChar='\"',\n headerRows=[1],\n ),\n))\n```\n\n## Advanced Features\n\n### Multi-Header Row Processing\n\n```python\n# CSV with multiple header rows:\n# Year,2023,2023,2024,2024\n# Quarter,Q1,Q2,Q1,Q2\n# Revenue,100,120,110,130\n\nfrom fairspec import load_csv_table, Resource\nfrom fairspec_metadata import CsvFileDialect\n\ntable = load_csv_table(Resource(\n data=\"multi-header.csv\",\n fileDialect=CsvFileDialect(\n headerRows=[1, 2],\n headerJoin=\"_\",\n ),\n))\n# Resulting columns: [\"Year_Quarter\", \"2023_Q1\", \"2023_Q2\", \"2024_Q1\", \"2024_Q2\"]\n```\n\n### Comment Row Handling\n\n```python\n# CSV with comment rows:\n# # This is a comment\n# # Generated on 2024-01-01\n# Name,Age,City\n# John,25,NYC\n\nfrom fairspec import load_csv_table, Resource\nfrom fairspec_metadata import CsvFileDialect\n\ntable = load_csv_table(Resource(\n data=\"with-comments.csv\",\n fileDialect=CsvFileDialect(\n commentRows=[1, 2],\n headerRows=[3],\n ),\n))\n\n# Or use commentPrefix to skip lines starting with a specific character\ntable = load_csv_table(Resource(\n data=\"with-comments.csv\",\n fileDialect=CsvFileDialect(\n commentPrefix=\"#\",\n headerRows=[1],\n ),\n))\n```\n\n### Remote File Loading\n\n```python\nfrom fairspec import load_csv_table, Resource\n\n# Load from URL\ntable = load_csv_table(Resource(data=\"https://example.com/data.csv\"))\n\n# Load multiple remote files\ntable = load_csv_table(Resource(data=[\n \"https://api.example.com/data-2023.csv\",\n \"https://api.example.com/data-2024.csv\",\n]))\n```","content/docs/python/csv.md","00de6329d9013f16",{"html":517,"metadata":518},"\u003Cp>Comprehensive CSV file handling with automatic format detection, advanced header processing, and high-performance data operations.\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"installation\">Installation\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#installation\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Installation”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Clink rel=\"stylesheet\" href=\"/_astro/ec.8ol6u.css\">\u003Cscript type=\"module\" src=\"/_astro/ec.0vx5m.js\">\u003C/script>\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">pip\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">install\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">fairspec\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"pip install fairspec\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"getting-started\">Getting Started\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#getting-started\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Getting Started”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The CSV plugin provides these capabilities:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">load_csv_table\u003C/code> - Load CSV/TSV files into tables\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">save_csv_table\u003C/code> - Save tables to CSV/TSV files\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">CsvPlugin\u003C/code> - Plugin for framework integration\u003C/li>\n\u003C/ul>\n\u003Cp>For example:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_csv_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">table.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># the column types will be automatically inferred\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># or you can provide a Table Schema\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_csv_table, Resourcetable = load_csv_table(Resource(data="table.csv"))# the column types will be automatically inferred# or you can provide a Table Schema\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"basic-usage\">Basic Usage\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#basic-usage\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Basic Usage”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"loading-csv-files\">Loading CSV Files\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#loading-csv-files\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Loading CSV Files”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_csv_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> CsvFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load a simple CSV file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load with custom format\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">CsvFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">delimiter\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">;\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">headerRows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load multiple CSV files (concatenated)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">part1.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">part2.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">part3.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_csv_table, Resourcefrom fairspec_metadata import CsvFileDialect# Load a simple CSV filetable = load_csv_table(Resource(data="data.csv"))# Load with custom formattable = load_csv_table(Resource( data="data.csv", fileDialect=CsvFileDialect( delimiter=";", headerRows=[1], ),))# Load multiple CSV files (concatenated)table = load_csv_table(Resource(data=["part1.csv", "part2.csv", "part3.csv"]))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"saving-csv-files\">Saving CSV Files\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#saving-csv-files\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Saving CSV Files”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> save_csv_table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> CsvFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Save with default options\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">save_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">path\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">output.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Save with custom format\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">save_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">path\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">output.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">CsvFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">delimiter\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\t\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">quoteChar\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">'\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Save as TSV\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">save_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">path\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">output.tsv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">CsvFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">delimiter\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\t\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import save_csv_tablefrom fairspec_metadata import CsvFileDialect# Save with default optionssave_csv_table(table, path="output.csv")# Save with custom formatsave_csv_table(table, path="output.csv", fileDialect=CsvFileDialect( delimiter="\\t", quoteChar="'",))# Save as TSVsave_csv_table(table, path="output.tsv", fileDialect=CsvFileDialect(delimiter="\\t"))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"automatic-format-detection\">Automatic Format Detection\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#automatic-format-detection\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Automatic Format Detection”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_csv_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> CsvFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Format is automatically detected when not specified\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">unknown-dialect.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># The CSV plugin will automatically infer delimiter, quote characters, etc.\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># You can also explicitly specify the format if detection isn't accurate\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">CsvFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">delimiter\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">,\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">quoteChar\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">'\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">'\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">headerRows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_csv_table, Resourcefrom fairspec_metadata import CsvFileDialect# Format is automatically detected when not specifiedtable = load_csv_table(Resource(data="unknown-dialect.csv"))# The CSV plugin will automatically infer delimiter, quote characters, etc.# You can also explicitly specify the format if detection isn't accuratetable = load_csv_table(Resource( data="data.csv", fileDialect=CsvFileDialect( delimiter=",", quoteChar='"', headerRows=[1], ),))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"advanced-features\">Advanced Features\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#advanced-features\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Advanced Features”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"multi-header-row-processing\">Multi-Header Row Processing\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#multi-header-row-processing\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Multi-Header Row Processing”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># CSV with multiple header rows:\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Year,2023,2023,2024,2024\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Quarter,Q1,Q2,Q1,Q2\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Revenue,100,120,110,130\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_csv_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> CsvFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">multi-header.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">CsvFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">headerRows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">2\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">headerJoin\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">_\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Resulting columns: [\"Year_Quarter\", \"2023_Q1\", \"2023_Q2\", \"2024_Q1\", \"2024_Q2\"]\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"# CSV with multiple header rows:# Year,2023,2023,2024,2024# Quarter,Q1,Q2,Q1,Q2# Revenue,100,120,110,130from fairspec import load_csv_table, Resourcefrom fairspec_metadata import CsvFileDialecttable = load_csv_table(Resource( data="multi-header.csv", fileDialect=CsvFileDialect( headerRows=[1, 2], headerJoin="_", ),))# Resulting columns: ["Year_Quarter", "2023_Q1", "2023_Q2", "2024_Q1", "2024_Q2"]\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"comment-row-handling\">Comment Row Handling\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#comment-row-handling\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Comment Row Handling”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># CSV with comment rows:\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># # This is a comment\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># # Generated on 2024-01-01\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Name,Age,City\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># John,25,NYC\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_csv_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> CsvFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">with-comments.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">CsvFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">commentRows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">2\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">headerRows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">3\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Or use commentPrefix to skip lines starting with a specific character\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">with-comments.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">CsvFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">commentPrefix\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">#\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">headerRows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"# CSV with comment rows:# # This is a comment# # Generated on 2024-01-01# Name,Age,City# John,25,NYCfrom fairspec import load_csv_table, Resourcefrom fairspec_metadata import CsvFileDialecttable = load_csv_table(Resource( data="with-comments.csv", fileDialect=CsvFileDialect( commentRows=[1, 2], headerRows=[3], ),))# Or use commentPrefix to skip lines starting with a specific charactertable = load_csv_table(Resource( data="with-comments.csv", fileDialect=CsvFileDialect( commentPrefix="#", headerRows=[1], ),))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"remote-file-loading\">Remote File Loading\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#remote-file-loading\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Remote File Loading”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_csv_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load from URL\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://example.com/data.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load multiple remote files\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://api.example.com/data-2023.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://api.example.com/data-2024.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_csv_table, Resource# Load from URLtable = load_csv_table(Resource(data="https://example.com/data.csv"))# Load multiple remote filestable = load_csv_table(Resource(data=[ "https://api.example.com/data-2023.csv", "https://api.example.com/data-2024.csv",]))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>",{"headings":519,"localImagePaths":540,"remoteImagePaths":541,"frontmatter":542,"imagePaths":544},[520,521,522,523,526,529,532,533,536,539],{"depth":31,"slug":35,"text":36},{"depth":31,"slug":484,"text":20},{"depth":31,"slug":486,"text":487},{"depth":74,"slug":524,"text":525},"loading-csv-files","Loading CSV Files",{"depth":74,"slug":527,"text":528},"saving-csv-files","Saving CSV Files",{"depth":74,"slug":530,"text":531},"automatic-format-detection","Automatic Format Detection",{"depth":31,"slug":495,"text":496},{"depth":74,"slug":534,"text":535},"multi-header-row-processing","Multi-Header Row Processing",{"depth":74,"slug":537,"text":538},"comment-row-handling","Comment Row Handling",{"depth":74,"slug":498,"text":499},[],[],{"title":508,"sidebar":543},{"label":511,"order":19},[],"python/inline",{"id":545,"data":547,"body":554,"filePath":555,"digest":556,"rendered":557},{"title":548,"editUrl":15,"head":549,"template":17,"sidebar":550,"pagefind":15,"draft":21},"Working with Inline Data tables in Python",[],{"order":551,"label":552,"hidden":21,"attrs":553},10,"Inline Data",{},"Inline data handling for tables embedded directly in resource definitions.\n\n## Installation\n\n```bash\npip install fairspec\n```\n\n## Getting Started\n\nThe Inline plugin provides:\n\n- `load_inline_table` - Load tables from inline data\n- `InlinePlugin` - Plugin for framework integration\n\nFor example:\n\n```python\nfrom fairspec import load_inline_table, Resource\n\ntable = load_inline_table(Resource(data=[\n {\"id\": 1, \"name\": \"Alice\"},\n {\"id\": 2, \"name\": \"Bob\"},\n]))\n```\n\n## Basic Usage\n\n### Object Format Data\n\nThe most common format is an array of objects:\n\n```python\nfrom fairspec import load_inline_table, Resource\n\ntable = load_inline_table(Resource(data=[\n {\"id\": 1, \"name\": \"english\", \"native\": \"English\"},\n {\"id\": 2, \"name\": \"chinese\", \"native\": \"\\u4e2d\\u6587\"},\n {\"id\": 3, \"name\": \"spanish\", \"native\": \"Espa\\u00f1ol\"},\n]))\n```\n\n### Array Format Data\n\nYou can also use array-of-arrays format with the first row as headers:\n\n```python\nfrom fairspec import load_inline_table, Resource\n\ntable = load_inline_table(Resource(data=[\n [\"id\", \"name\", \"native\"],\n [1, \"english\", \"English\"],\n [2, \"chinese\", \"\\u4e2d\\u6587\"],\n [3, \"spanish\", \"Espa\\u00f1ol\"],\n]))\n```\n\n## Advanced Features\n\n### With Table Schema\n\nProvide a Table Schema for type validation and conversion:\n\n```python\nfrom fairspec import load_inline_table, Resource\nfrom fairspec_metadata import TableSchema, IntegerColumnProperty, StringColumnProperty, BooleanColumnProperty\n\ntable = load_inline_table(Resource(\n data=[\n {\"id\": 1, \"name\": \"english\", \"active\": True},\n {\"id\": 2, \"name\": \"chinese\", \"active\": False},\n ],\n tableSchema=TableSchema(properties={\n \"id\": IntegerColumnProperty(),\n \"name\": StringColumnProperty(),\n \"active\": BooleanColumnProperty(),\n }),\n))\n```\n\n### Mixed with File Data\n\nInline data can be used alongside file-based resources in datasets:\n\n```python\nfrom fairspec import load_inline_table, load_csv_table, Resource\n\n# Load inline reference data\nlanguages = load_inline_table(Resource(\n name=\"languages\",\n data=[\n {\"id\": 1, \"name\": \"english\"},\n {\"id\": 2, \"name\": \"chinese\"},\n ],\n))\n\n# Load main data from file\nusers = load_csv_table(Resource(name=\"users\", data=\"users.csv\"))\n```\n\n### Resource Metadata\n\nYou can include metadata with inline data resources:\n\n```python\nfrom fairspec import load_inline_table, Resource\nfrom fairspec_metadata import TableSchema, StringColumnProperty\n\ntable = load_inline_table(Resource(\n name=\"countries\",\n title=\"Country Reference Data\",\n description=\"ISO country codes and names\",\n data=[\n {\"code\": \"US\", \"name\": \"United States\"},\n {\"code\": \"CN\", \"name\": \"China\"},\n {\"code\": \"ES\", \"name\": \"Spain\"},\n ],\n tableSchema=TableSchema(properties={\n \"code\": StringColumnProperty(),\n \"name\": StringColumnProperty(),\n }),\n))\n```","content/docs/python/inline.md","8bc06b15a76d4aaf",{"html":558,"metadata":559},"\u003Cp>Inline data handling for tables embedded directly in resource definitions.\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"installation\">Installation\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#installation\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Installation”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Clink rel=\"stylesheet\" href=\"/_astro/ec.8ol6u.css\">\u003Cscript type=\"module\" src=\"/_astro/ec.0vx5m.js\">\u003C/script>\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">pip\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">install\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">fairspec\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"pip install fairspec\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"getting-started\">Getting Started\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#getting-started\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Getting Started”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The Inline plugin provides:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">load_inline_table\u003C/code> - Load tables from inline data\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">InlinePlugin\u003C/code> - Plugin for framework integration\u003C/li>\n\u003C/ul>\n\u003Cp>For example:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_inline_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_inline_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Alice\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">2\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Bob\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_inline_table, Resourcetable = load_inline_table(Resource(data=[ {"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"},]))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"basic-usage\">Basic Usage\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#basic-usage\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Basic Usage”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"object-format-data\">Object Format Data\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#object-format-data\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Object Format Data”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The most common format is an array of objects:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_inline_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_inline_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">english\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">native\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">English\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">2\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">chinese\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">native\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\u4e2d\\u6587\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">3\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">spanish\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">native\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Espa\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\u00f1\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">ol\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_inline_table, Resourcetable = load_inline_table(Resource(data=[ {"id": 1, "name": "english", "native": "English"}, {"id": 2, "name": "chinese", "native": "\\u4e2d\\u6587"}, {"id": 3, "name": "spanish", "native": "Espa\\u00f1ol"},]))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"array-format-data\">Array Format Data\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#array-format-data\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Array Format Data”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>You can also use array-of-arrays format with the first row as headers:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_inline_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_inline_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">native\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">english\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">English\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">2\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">chinese\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\u4e2d\\u6587\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">3\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">spanish\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Espa\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\u00f1\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">ol\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_inline_table, Resourcetable = load_inline_table(Resource(data=[ ["id", "name", "native"], [1, "english", "English"], [2, "chinese", "\\u4e2d\\u6587"], [3, "spanish", "Espa\\u00f1ol"],]))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"advanced-features\">Advanced Features\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#advanced-features\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Advanced Features”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"with-table-schema\">With Table Schema\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#with-table-schema\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “With Table Schema”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Provide a Table Schema for type validation and conversion:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_inline_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> TableSchema, IntegerColumnProperty, StringColumnProperty, BooleanColumnProperty\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_inline_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">english\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">active\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#FF5874;--1:#A54A4A\">True\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">2\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">chinese\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">active\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#FF5874;--1:#A54A4A\">False\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">tableSchema\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">TableSchema\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">properties\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">IntegerColumnProperty\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">StringColumnProperty\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">active\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">BooleanColumnProperty\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">}\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_inline_table, Resourcefrom fairspec_metadata import TableSchema, IntegerColumnProperty, StringColumnProperty, BooleanColumnPropertytable = load_inline_table(Resource( data=[ {"id": 1, "name": "english", "active": True}, {"id": 2, "name": "chinese", "active": False}, ], tableSchema=TableSchema(properties={ "id": IntegerColumnProperty(), "name": StringColumnProperty(), "active": BooleanColumnProperty(), }),))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"mixed-with-file-data\">Mixed with File Data\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#mixed-with-file-data\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Mixed with File Data”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Inline data can be used alongside file-based resources in datasets:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_inline_table, load_csv_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load inline reference data\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">languages \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_inline_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">name\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">languages\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">english\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">2\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">chinese\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load main data from file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">users \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">name\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">users\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">users.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_inline_table, load_csv_table, Resource# Load inline reference datalanguages = load_inline_table(Resource( name="languages", data=[ {"id": 1, "name": "english"}, {"id": 2, "name": "chinese"}, ],))# Load main data from fileusers = load_csv_table(Resource(name="users", data="users.csv"))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"resource-metadata\">Resource Metadata\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#resource-metadata\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Resource Metadata”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>You can include metadata with inline data resources:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_inline_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> TableSchema, StringColumnProperty\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_inline_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">name\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">countries\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">title\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Country Reference Data\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">description\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">ISO country codes and names\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">code\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">US\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">United States\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">code\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">CN\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">China\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">code\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">ES\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Spain\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">tableSchema\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">TableSchema\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">properties\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">code\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">StringColumnProperty\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">StringColumnProperty\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">}\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_inline_table, Resourcefrom fairspec_metadata import TableSchema, StringColumnPropertytable = load_inline_table(Resource( name="countries", title="Country Reference Data", description="ISO country codes and names", data=[ {"code": "US", "name": "United States"}, {"code": "CN", "name": "China"}, {"code": "ES", "name": "Spain"}, ], tableSchema=TableSchema(properties={ "code": StringColumnProperty(), "name": StringColumnProperty(), }),))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>",{"headings":560,"localImagePaths":580,"remoteImagePaths":581,"frontmatter":582,"imagePaths":584},[561,562,563,564,567,570,571,574,577],{"depth":31,"slug":35,"text":36},{"depth":31,"slug":484,"text":20},{"depth":31,"slug":486,"text":487},{"depth":74,"slug":565,"text":566},"object-format-data","Object Format Data",{"depth":74,"slug":568,"text":569},"array-format-data","Array Format Data",{"depth":31,"slug":495,"text":496},{"depth":74,"slug":572,"text":573},"with-table-schema","With Table Schema",{"depth":74,"slug":575,"text":576},"mixed-with-file-data","Mixed with File Data",{"depth":74,"slug":578,"text":579},"resource-metadata","Resource Metadata",[],[],{"title":548,"sidebar":583},{"label":552,"order":551},[],"python/json",{"id":585,"data":587,"body":593,"filePath":594,"digest":595,"rendered":596},{"title":588,"editUrl":15,"head":589,"template":17,"sidebar":590,"pagefind":15,"draft":21},"Working with JSON tables in Python",[],{"order":74,"label":591,"hidden":21,"attrs":592},"JSON",{},"JSON file handling with automatic format detection and high-performance data operations.\n\n## Installation\n\n```bash\npip install fairspec\n```\n\n## Getting Started\n\nThe JSON plugin provides:\n\n- `load_json_table` - Load JSON files into tables\n- `save_json_table` - Save tables to JSON files\n- `JsonPlugin` - Plugin for framework integration\n\nFor example:\n\n```python\nfrom fairspec import load_json_table, Resource\n\ntable = load_json_table(Resource(data=\"table.json\"))\n# Standard JSON array of objects format\n```\n\n## Basic Usage\n\n### Loading JSON Files\n\n```python\nfrom fairspec import load_json_table, Resource\n\n# Load from local file\ntable = load_json_table(Resource(data=\"data.json\"))\n\n# Load from remote URL\ntable = load_json_table(Resource(data=\"https://example.com/data.json\"))\n\n# Load multiple files (concatenated)\ntable = load_json_table(Resource(data=[\"file1.json\", \"file2.json\"]))\n```\n\n### Saving JSON Files\n\n```python\nfrom fairspec import save_json_table\nfrom fairspec_metadata import JsonFileDialect\n\n# Save with default options\nsave_json_table(table, path=\"output.json\")\n\n# Save with explicit format\nsave_json_table(table, path=\"output.json\", fileDialect=JsonFileDialect())\n```\n\n## Standard Format\n\nJSON tables use an array of objects format:\n\n```json\n[\n {\"id\": 1, \"name\": \"Alice\", \"age\": 30},\n {\"id\": 2, \"name\": \"Bob\", \"age\": 25}\n]\n```\n\n## Advanced Features\n\n### JSON Pointer Extraction\n\nExtract data from nested objects using `jsonPointer`:\n\n```python\nfrom fairspec import load_json_table, Resource\nfrom fairspec_metadata import JsonFileDialect\n\n# Input: {\"users\": [{\"id\": 1, \"name\": \"Alice\"}]}\ntable = load_json_table(Resource(\n data=\"data.json\",\n fileDialect=JsonFileDialect(jsonPointer=\"users\"),\n))\n```\n\n### Column Selection\n\nSelect specific columns using `columnNames`:\n\n```python\nfrom fairspec import load_json_table, Resource\nfrom fairspec_metadata import JsonFileDialect\n\n# Only load specific columns\ntable = load_json_table(Resource(\n data=\"data.json\",\n fileDialect=JsonFileDialect(columnNames=[\"name\", \"age\"]),\n))\n```\n\n### Array Format Handling\n\nHandle CSV-style array data with `rowType: \"array\"`:\n\n```python\nfrom fairspec import load_json_table, Resource\nfrom fairspec_metadata import JsonFileDialect\n\n# Input: [[\"id\", \"name\"], [1, \"Alice\"], [2, \"Bob\"]]\ntable = load_json_table(Resource(\n data=\"data.json\",\n fileDialect=JsonFileDialect(rowType=\"array\"),\n))\n```\n\n### Saving with JSON Pointer\n\nWrap data in a nested structure when saving:\n\n```python\nfrom fairspec import save_json_table\nfrom fairspec_metadata import JsonFileDialect\n\n# Output: {\"users\": [{\"id\": 1, \"name\": \"Alice\"}]}\nsave_json_table(table, path=\"output.json\", fileDialect=JsonFileDialect(\n jsonPointer=\"users\",\n))\n```","content/docs/python/json.md","012fa02ac585b3d2",{"html":597,"metadata":598},"\u003Cp>JSON file handling with automatic format detection and high-performance data operations.\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"installation\">Installation\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#installation\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Installation”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Clink rel=\"stylesheet\" href=\"/_astro/ec.8ol6u.css\">\u003Cscript type=\"module\" src=\"/_astro/ec.0vx5m.js\">\u003C/script>\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">pip\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">install\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">fairspec\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"pip install fairspec\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"getting-started\">Getting Started\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#getting-started\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Getting Started”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The JSON plugin provides:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">load_json_table\u003C/code> - Load JSON files into tables\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">save_json_table\u003C/code> - Save tables to JSON files\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">JsonPlugin\u003C/code> - Plugin for framework integration\u003C/li>\n\u003C/ul>\n\u003Cp>For example:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_json_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_json_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">table.json\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Standard JSON array of objects format\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_json_table, Resourcetable = load_json_table(Resource(data="table.json"))# Standard JSON array of objects format\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"basic-usage\">Basic Usage\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#basic-usage\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Basic Usage”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"loading-json-files\">Loading JSON Files\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#loading-json-files\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Loading JSON Files”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_json_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load from local file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_json_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.json\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load from remote URL\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_json_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://example.com/data.json\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load multiple files (concatenated)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_json_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">file1.json\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">file2.json\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_json_table, Resource# Load from local filetable = load_json_table(Resource(data="data.json"))# Load from remote URLtable = load_json_table(Resource(data="https://example.com/data.json"))# Load multiple files (concatenated)table = load_json_table(Resource(data=["file1.json", "file2.json"]))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"saving-json-files\">Saving JSON Files\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#saving-json-files\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Saving JSON Files”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> save_json_table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> JsonFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Save with default options\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">save_json_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">path\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">output.json\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Save with explicit format\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">save_json_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">path\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">output.json\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">JsonFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">())\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import save_json_tablefrom fairspec_metadata import JsonFileDialect# Save with default optionssave_json_table(table, path="output.json")# Save with explicit formatsave_json_table(table, path="output.json", fileDialect=JsonFileDialect())\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"standard-format\">Standard Format\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#standard-format\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Standard Format”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>JSON tables use an array of objects format:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"json\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">[\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"id\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"name\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">Alice\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"age\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">30\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">},\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"id\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">2\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"name\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">Bob\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"age\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">25\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">]\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"[ {"id": 1, "name": "Alice", "age": 30}, {"id": 2, "name": "Bob", "age": 25}]\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"advanced-features\">Advanced Features\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#advanced-features\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Advanced Features”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"json-pointer-extraction\">JSON Pointer Extraction\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#json-pointer-extraction\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “JSON Pointer Extraction”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Extract data from nested objects using \u003Ccode dir=\"auto\">jsonPointer\u003C/code>:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_json_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> JsonFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Input: {\"users\": [{\"id\": 1, \"name\": \"Alice\"}]}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_json_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.json\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">JsonFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">jsonPointer\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">users\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_json_table, Resourcefrom fairspec_metadata import JsonFileDialect# Input: {"users": [{"id": 1, "name": "Alice"}]}table = load_json_table(Resource( data="data.json", fileDialect=JsonFileDialect(jsonPointer="users"),))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"column-selection\">Column Selection\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#column-selection\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Column Selection”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Select specific columns using \u003Ccode dir=\"auto\">columnNames\u003C/code>:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_json_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> JsonFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Only load specific columns\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_json_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.json\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">JsonFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">columnNames\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">age\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_json_table, Resourcefrom fairspec_metadata import JsonFileDialect# Only load specific columnstable = load_json_table(Resource( data="data.json", fileDialect=JsonFileDialect(columnNames=["name", "age"]),))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"array-format-handling\">Array Format Handling\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#array-format-handling\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Array Format Handling”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Handle CSV-style array data with \u003Ccode dir=\"auto\">rowType: \"array\"\u003C/code>:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_json_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> JsonFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Input: [[\"id\", \"name\"], [1, \"Alice\"], [2, \"Bob\"]]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_json_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.json\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">JsonFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">rowType\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">array\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_json_table, Resourcefrom fairspec_metadata import JsonFileDialect# Input: [["id", "name"], [1, "Alice"], [2, "Bob"]]table = load_json_table(Resource( data="data.json", fileDialect=JsonFileDialect(rowType="array"),))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"saving-with-json-pointer\">Saving with JSON Pointer\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#saving-with-json-pointer\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Saving with JSON Pointer”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Wrap data in a nested structure when saving:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> save_json_table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> JsonFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Output: {\"users\": [{\"id\": 1, \"name\": \"Alice\"}]}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">save_json_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">path\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">output.json\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">JsonFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">jsonPointer\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">users\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import save_json_tablefrom fairspec_metadata import JsonFileDialect# Output: {"users": [{"id": 1, "name": "Alice"}]}save_json_table(table, path="output.json", fileDialect=JsonFileDialect( jsonPointer="users",))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>",{"headings":599,"localImagePaths":625,"remoteImagePaths":626,"frontmatter":627,"imagePaths":629},[600,601,602,603,606,609,612,613,616,619,622],{"depth":31,"slug":35,"text":36},{"depth":31,"slug":484,"text":20},{"depth":31,"slug":486,"text":487},{"depth":74,"slug":604,"text":605},"loading-json-files","Loading JSON Files",{"depth":74,"slug":607,"text":608},"saving-json-files","Saving JSON Files",{"depth":31,"slug":610,"text":611},"standard-format","Standard Format",{"depth":31,"slug":495,"text":496},{"depth":74,"slug":614,"text":615},"json-pointer-extraction","JSON Pointer Extraction",{"depth":74,"slug":617,"text":618},"column-selection","Column Selection",{"depth":74,"slug":620,"text":621},"array-format-handling","Array Format Handling",{"depth":74,"slug":623,"text":624},"saving-with-json-pointer","Saving with JSON Pointer",[],[],{"title":588,"sidebar":628},{"label":591,"order":74},[],"python/jsonl",{"id":630,"data":632,"body":638,"filePath":639,"digest":640,"rendered":641},{"title":633,"editUrl":15,"head":634,"template":17,"sidebar":635,"pagefind":15,"draft":21},"Working with JSONL tables in Python",[],{"order":57,"label":636,"hidden":21,"attrs":637},"JSONL",{},"JSONL (JSON Lines) file handling with automatic format detection and high-performance data operations.\n\n## Installation\n\n```bash\npip install fairspec\n```\n\n## Getting Started\n\nThe JSONL format is handled by the JSON plugin, which provides:\n\n- `load_json_table` - Load JSONL files into tables\n- `save_json_table` - Save tables to JSONL files\n- `JsonPlugin` - Plugin for framework integration\n\nFor example:\n\n```python\nfrom fairspec import load_json_table, Resource\n\ntable = load_json_table(Resource(data=\"table.jsonl\"))\n# Newline-delimited JSON objects\n```\n\n## Basic Usage\n\n### Loading JSONL Files\n\n```python\nfrom fairspec import load_json_table, Resource\nfrom fairspec_metadata import JsonFileDialect\n\n# Load from local file\ntable = load_json_table(Resource(data=\"data.jsonl\"))\n\n# Load with explicit format\ntable = load_json_table(Resource(\n data=\"data.jsonl\",\n fileDialect=JsonFileDialect(format=\"jsonl\"),\n))\n\n# Load multiple files (concatenated)\ntable = load_json_table(Resource(data=[\"part1.jsonl\", \"part2.jsonl\"]))\n```\n\n### Saving JSONL Files\n\n```python\nfrom fairspec import save_json_table\nfrom fairspec_metadata import JsonFileDialect\n\n# Save as JSONL\nsave_json_table(table, path=\"output.jsonl\", fileDialect=JsonFileDialect(format=\"jsonl\"))\n```\n\n## Standard Format\n\nJSONL uses newline-delimited JSON objects:\n\n```jsonl\n{\"id\": 1, \"name\": \"Alice\", \"age\": 30}\n{\"id\": 2, \"name\": \"Bob\", \"age\": 25}\n{\"id\": 3, \"name\": \"Charlie\", \"age\": 35}\n```\n\n## Advanced Features\n\n### Column Selection\n\nSelect specific columns using `columnNames`:\n\n```python\nfrom fairspec import load_json_table, Resource\nfrom fairspec_metadata import JsonFileDialect\n\n# Only load specific columns\ntable = load_json_table(Resource(\n data=\"data.jsonl\",\n fileDialect=JsonFileDialect(format=\"jsonl\", columnNames=[\"name\", \"age\"]),\n))\n```\n\n### Array Format Handling\n\nHandle CSV-style array data with `rowType: \"array\"`:\n\n```python\nfrom fairspec import load_json_table, Resource\nfrom fairspec_metadata import JsonFileDialect\n\n# Input JSONL with arrays:\n# [\"id\", \"name\"]\n# [1, \"Alice\"]\n# [2, \"Bob\"]\n\ntable = load_json_table(Resource(\n data=\"data.jsonl\",\n fileDialect=JsonFileDialect(format=\"jsonl\", rowType=\"array\"),\n))\n```\n\n### Remote File Loading\n\n```python\nfrom fairspec import load_json_table, Resource\n\n# Load from URL\ntable = load_json_table(Resource(data=\"https://example.com/data.jsonl\"))\n\n# Load multiple remote files\ntable = load_json_table(Resource(data=[\n \"https://api.example.com/logs-2023.jsonl\",\n \"https://api.example.com/logs-2024.jsonl\",\n]))\n```","content/docs/python/jsonl.md","385c18f8b4e385a5",{"html":642,"metadata":643},"\u003Cp>JSONL (JSON Lines) file handling with automatic format detection and high-performance data operations.\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"installation\">Installation\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#installation\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Installation”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Clink rel=\"stylesheet\" href=\"/_astro/ec.8ol6u.css\">\u003Cscript type=\"module\" src=\"/_astro/ec.0vx5m.js\">\u003C/script>\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">pip\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">install\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">fairspec\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"pip install fairspec\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"getting-started\">Getting Started\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#getting-started\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Getting Started”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The JSONL format is handled by the JSON plugin, which provides:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">load_json_table\u003C/code> - Load JSONL files into tables\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">save_json_table\u003C/code> - Save tables to JSONL files\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">JsonPlugin\u003C/code> - Plugin for framework integration\u003C/li>\n\u003C/ul>\n\u003Cp>For example:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_json_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_json_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">table.jsonl\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Newline-delimited JSON objects\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_json_table, Resourcetable = load_json_table(Resource(data="table.jsonl"))# Newline-delimited JSON objects\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"basic-usage\">Basic Usage\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#basic-usage\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Basic Usage”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"loading-jsonl-files\">Loading JSONL Files\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#loading-jsonl-files\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Loading JSONL Files”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_json_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> JsonFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load from local file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_json_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.jsonl\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load with explicit format\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_json_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.jsonl\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">JsonFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">format\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">jsonl\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load multiple files (concatenated)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_json_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">part1.jsonl\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">part2.jsonl\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_json_table, Resourcefrom fairspec_metadata import JsonFileDialect# Load from local filetable = load_json_table(Resource(data="data.jsonl"))# Load with explicit formattable = load_json_table(Resource( data="data.jsonl", fileDialect=JsonFileDialect(format="jsonl"),))# Load multiple files (concatenated)table = load_json_table(Resource(data=["part1.jsonl", "part2.jsonl"]))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"saving-jsonl-files\">Saving JSONL Files\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#saving-jsonl-files\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Saving JSONL Files”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> save_json_table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> JsonFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Save as JSONL\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">save_json_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">path\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">output.jsonl\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">JsonFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">format\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">jsonl\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import save_json_tablefrom fairspec_metadata import JsonFileDialect# Save as JSONLsave_json_table(table, path="output.jsonl", fileDialect=JsonFileDialect(format="jsonl"))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"standard-format\">Standard Format\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#standard-format\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Standard Format”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>JSONL uses newline-delimited JSON objects:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"jsonl\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"id\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"name\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">Alice\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"age\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">30\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"id\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">2\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"name\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">Bob\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"age\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">25\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">{\u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"id\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">3\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"name\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#C789D6;--1:#7F5889\">Charlie\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">, \u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">\"age\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">: \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">35\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">}\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"{"id": 1, "name": "Alice", "age": 30}{"id": 2, "name": "Bob", "age": 25}{"id": 3, "name": "Charlie", "age": 35}\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"advanced-features\">Advanced Features\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#advanced-features\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Advanced Features”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"column-selection\">Column Selection\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#column-selection\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Column Selection”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Select specific columns using \u003Ccode dir=\"auto\">columnNames\u003C/code>:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_json_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> JsonFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Only load specific columns\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_json_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.jsonl\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">JsonFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">format\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">jsonl\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">columnNames\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">age\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_json_table, Resourcefrom fairspec_metadata import JsonFileDialect# Only load specific columnstable = load_json_table(Resource( data="data.jsonl", fileDialect=JsonFileDialect(format="jsonl", columnNames=["name", "age"]),))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"array-format-handling\">Array Format Handling\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#array-format-handling\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Array Format Handling”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Handle CSV-style array data with \u003Ccode dir=\"auto\">rowType: \"array\"\u003C/code>:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_json_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> JsonFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Input JSONL with arrays:\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># [\"id\", \"name\"]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># [1, \"Alice\"]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># [2, \"Bob\"]\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_json_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.jsonl\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">JsonFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">format\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">jsonl\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">rowType\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">array\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_json_table, Resourcefrom fairspec_metadata import JsonFileDialect# Input JSONL with arrays:# ["id", "name"]# [1, "Alice"]# [2, "Bob"]table = load_json_table(Resource( data="data.jsonl", fileDialect=JsonFileDialect(format="jsonl", rowType="array"),))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"remote-file-loading\">Remote File Loading\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#remote-file-loading\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Remote File Loading”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_json_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load from URL\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_json_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://example.com/data.jsonl\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load multiple remote files\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_json_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://api.example.com/logs-2023.jsonl\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://api.example.com/logs-2024.jsonl\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_json_table, Resource# Load from URLtable = load_json_table(Resource(data="https://example.com/data.jsonl"))# Load multiple remote filestable = load_json_table(Resource(data=[ "https://api.example.com/logs-2023.jsonl", "https://api.example.com/logs-2024.jsonl",]))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>",{"headings":644,"localImagePaths":659,"remoteImagePaths":660,"frontmatter":661,"imagePaths":663},[645,646,647,648,651,654,655,656,657,658],{"depth":31,"slug":35,"text":36},{"depth":31,"slug":484,"text":20},{"depth":31,"slug":486,"text":487},{"depth":74,"slug":649,"text":650},"loading-jsonl-files","Loading JSONL Files",{"depth":74,"slug":652,"text":653},"saving-jsonl-files","Saving JSONL Files",{"depth":31,"slug":610,"text":611},{"depth":31,"slug":495,"text":496},{"depth":74,"slug":617,"text":618},{"depth":74,"slug":620,"text":621},{"depth":74,"slug":498,"text":499},[],[],{"title":633,"sidebar":662},{"label":636,"order":57},[],"python/jupyter",{"id":664,"data":666,"body":673,"filePath":674,"assetImports":675,"digest":677,"rendered":678},{"title":667,"editUrl":15,"head":668,"template":17,"sidebar":669,"pagefind":15,"draft":21},"Using Fairspec Python in Jupyter Notebooks",[],{"order":670,"label":671,"hidden":21,"attrs":672},12,"Jupyter Notebooks",{},"For data scientists and data engineers, [Jupyter Notebooks](https://docs.jupyter.org/en/latest/) provide a powerful and flexible environment for exploring, visualizing, and analyzing data.\n\n## Installation\n\n1. **Install Jupyter:** `pip install jupyterlab` - Installs Jupyter Notebook, a web-based interactive computing environment for data science and data engineering. You can use another UI such as Jupyter CLI or Jupyter Desktop.\n\n1. **Install Fairspec:** `pip install fairspec` - Installs the Fairspec Python framework and all its dependencies.\n\n## Usage\n\n1. **Run Jupyter Notebooks:** `jupyter-lab` - Launches the Jupyter Notebook server in the current working directory, which allows you to create and run Jupyter notebooks.\n\n1. **Select Python Kernel in Notebook:** Choose the Python kernel from your notebook's kernel selection menu. VS Code users may need to install the default Jupyter kernel extensions.\n\n![Fairspec Python in Jupyter Notebooks](./assets/jupyter.png)\n\n## References\n\n- [Jupyter Documentation](https://docs.jupyter.org/en/latest/)\n- [JupyterLab Documentation](https://jupyterlab.readthedocs.io/en/latest/)","content/docs/python/jupyter.md",[676],"./assets/jupyter.png","0fae8ea4ef4e39f6",{"html":679,"metadata":680},"\u003Cp>For data scientists and data engineers, \u003Ca href=\"https://docs.jupyter.org/en/latest/\">Jupyter Notebooks\u003C/a> provide a powerful and flexible environment for exploring, visualizing, and analyzing data.\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"installation\">Installation\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#installation\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Installation”\u003C/span>\u003C/a>\u003C/div>\n\u003Col>\n\u003Cli>\n\u003Cp>\u003Cstrong>Install Jupyter:\u003C/strong> \u003Ccode dir=\"auto\">pip install jupyterlab\u003C/code> - Installs Jupyter Notebook, a web-based interactive computing environment for data science and data engineering. You can use another UI such as Jupyter CLI or Jupyter Desktop.\u003C/p>\n\u003C/li>\n\u003Cli>\n\u003Cp>\u003Cstrong>Install Fairspec:\u003C/strong> \u003Ccode dir=\"auto\">pip install fairspec\u003C/code> - Installs the Fairspec Python framework and all its dependencies.\u003C/p>\n\u003C/li>\n\u003C/ol>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"usage\">Usage\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#usage\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Usage”\u003C/span>\u003C/a>\u003C/div>\n\u003Col>\n\u003Cli>\n\u003Cp>\u003Cstrong>Run Jupyter Notebooks:\u003C/strong> \u003Ccode dir=\"auto\">jupyter-lab\u003C/code> - Launches the Jupyter Notebook server in the current working directory, which allows you to create and run Jupyter notebooks.\u003C/p>\n\u003C/li>\n\u003Cli>\n\u003Cp>\u003Cstrong>Select Python Kernel in Notebook:\u003C/strong> Choose the Python kernel from your notebook’s kernel selection menu. VS Code users may need to install the default Jupyter kernel extensions.\u003C/p>\n\u003C/li>\n\u003C/ol>\n\u003Cp>\u003Cimg __ASTRO_IMAGE_=\"{"src":"./assets/jupyter.png","alt":"Fairspec Python in Jupyter Notebooks","index":0}\">\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"references\">References\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#references\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “References”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ca href=\"https://docs.jupyter.org/en/latest/\">Jupyter Documentation\u003C/a>\u003C/li>\n\u003Cli>\u003Ca href=\"https://jupyterlab.readthedocs.io/en/latest/\">JupyterLab Documentation\u003C/a>\u003C/li>\n\u003C/ul>",{"headings":681,"localImagePaths":689,"remoteImagePaths":690,"frontmatter":691,"imagePaths":693},[682,683,686],{"depth":31,"slug":35,"text":36},{"depth":31,"slug":684,"text":685},"usage","Usage",{"depth":31,"slug":687,"text":688},"references","References",[676],[],{"title":667,"sidebar":692},{"label":671,"order":670},[676],"python/ods",{"id":694,"data":696,"body":703,"filePath":704,"digest":705,"rendered":706},{"title":697,"editUrl":15,"head":698,"template":17,"sidebar":699,"pagefind":15,"draft":21},"Working with ODS in Python",[],{"order":700,"label":701,"hidden":21,"attrs":702},6,"ODS",{},"OpenDocument Spreadsheet (ODS) file handling with sheet selection, advanced header processing, and high-performance data operations.\n\n## Installation\n\n```bash\npip install fairspec\n```\n\n## Getting Started\n\nODS format is handled by the XLSX plugin, which provides:\n\n- `load_xlsx_table` - Load ODS files into tables\n- `save_xlsx_table` - Save tables to ODS files\n- `XlsxPlugin` - Plugin for framework integration\n\nFor example:\n\n```python\nfrom fairspec import load_xlsx_table, Resource\n\ntable = load_xlsx_table(Resource(data=\"table.ods\"))\n# the column types will be automatically inferred\n```\n\n## Basic Usage\n\n### Loading ODS Files\n\n```python\nfrom fairspec import load_xlsx_table, Resource\nfrom fairspec_metadata import XlsxFileDialect\n\n# Load a simple ODS file\ntable = load_xlsx_table(Resource(data=\"data.ods\"))\n\n# Load with custom format (specify sheet)\ntable = load_xlsx_table(Resource(\n data=\"data.ods\",\n fileDialect=XlsxFileDialect(format=\"ods\", sheetName=\"Sheet2\"),\n))\n\n# Load multiple ODS files (concatenated)\ntable = load_xlsx_table(Resource(data=[\"part1.ods\", \"part2.ods\", \"part3.ods\"]))\n```\n\n### Saving ODS Files\n\n```python\nfrom fairspec import save_xlsx_table\nfrom fairspec_metadata import XlsxFileDialect\n\n# Save with default options\nsave_xlsx_table(table, path=\"output.ods\", fileDialect=XlsxFileDialect(format=\"ods\"))\n\n# Save with custom sheet name\nsave_xlsx_table(table, path=\"output.ods\", fileDialect=XlsxFileDialect(\n format=\"ods\",\n sheetName=\"Data\",\n))\n```\n\n## Advanced Features\n\n### Sheet Selection\n\n```python\nfrom fairspec import load_xlsx_table, Resource\nfrom fairspec_metadata import XlsxFileDialect\n\n# Select by sheet number (1-indexed)\ntable = load_xlsx_table(Resource(\n data=\"workbook.ods\",\n fileDialect=XlsxFileDialect(format=\"ods\", sheetNumber=2),\n))\n\n# Select by sheet name\ntable = load_xlsx_table(Resource(\n data=\"workbook.ods\",\n fileDialect=XlsxFileDialect(format=\"ods\", sheetName=\"Sales Data\"),\n))\n```\n\n### Multi-Header Row Processing\n\n```python\nfrom fairspec import load_xlsx_table, Resource\nfrom fairspec_metadata import XlsxFileDialect\n\n# ODS with multiple header rows\ntable = load_xlsx_table(Resource(\n data=\"multi-header.ods\",\n fileDialect=XlsxFileDialect(\n format=\"ods\",\n headerRows=[1, 2],\n headerJoin=\"_\",\n ),\n))\n# Resulting columns: [\"Year_Quarter\", \"2023_Q1\", \"2023_Q2\", \"2024_Q1\", \"2024_Q2\"]\n```\n\n### Comment Row Handling\n\n```python\nfrom fairspec import load_xlsx_table, Resource\nfrom fairspec_metadata import XlsxFileDialect\n\n# Skip specific comment rows\ntable = load_xlsx_table(Resource(\n data=\"with-comments.ods\",\n fileDialect=XlsxFileDialect(\n format=\"ods\",\n commentRows=[1, 2],\n headerRows=[3],\n ),\n))\n\n# Skip rows with comment prefix\ntable = load_xlsx_table(Resource(\n data=\"data.ods\",\n fileDialect=XlsxFileDialect(\n format=\"ods\",\n commentPrefix=\"#\",\n headerRows=[1],\n ),\n))\n```\n\n### Remote File Loading\n\n```python\nfrom fairspec import load_xlsx_table, Resource\n\n# Load from URL\ntable = load_xlsx_table(Resource(data=\"https://example.com/data.ods\"))\n\n# Load multiple remote files\ntable = load_xlsx_table(Resource(data=[\n \"https://api.example.com/data-2023.ods\",\n \"https://api.example.com/data-2024.ods\",\n]))\n```\n\n### Column Selection\n\n```python\nfrom fairspec import load_xlsx_table, Resource\nfrom fairspec_metadata import XlsxFileDialect\n\n# Select specific columns\ntable = load_xlsx_table(Resource(\n data=\"data.ods\",\n fileDialect=XlsxFileDialect(format=\"ods\", columnNames=[\"name\", \"age\", \"city\"]),\n))\n```","content/docs/python/ods.md","a8ce023fcd196f7f",{"html":707,"metadata":708},"\u003Cp>OpenDocument Spreadsheet (ODS) file handling with sheet selection, advanced header processing, and high-performance data operations.\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"installation\">Installation\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#installation\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Installation”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Clink rel=\"stylesheet\" href=\"/_astro/ec.8ol6u.css\">\u003Cscript type=\"module\" src=\"/_astro/ec.0vx5m.js\">\u003C/script>\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">pip\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">install\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">fairspec\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"pip install fairspec\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"getting-started\">Getting Started\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#getting-started\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Getting Started”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>ODS format is handled by the XLSX plugin, which provides:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">load_xlsx_table\u003C/code> - Load ODS files into tables\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">save_xlsx_table\u003C/code> - Save tables to ODS files\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">XlsxPlugin\u003C/code> - Plugin for framework integration\u003C/li>\n\u003C/ul>\n\u003Cp>For example:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_xlsx_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">table.ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># the column types will be automatically inferred\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_xlsx_table, Resourcetable = load_xlsx_table(Resource(data="table.ods"))# the column types will be automatically inferred\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"basic-usage\">Basic Usage\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#basic-usage\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Basic Usage”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"loading-ods-files\">Loading ODS Files\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#loading-ods-files\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Loading ODS Files”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_xlsx_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> XlsxFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load a simple ODS file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load with custom format (specify sheet)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">XlsxFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">format\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">sheetName\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Sheet2\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load multiple ODS files (concatenated)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">part1.ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">part2.ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">part3.ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_xlsx_table, Resourcefrom fairspec_metadata import XlsxFileDialect# Load a simple ODS filetable = load_xlsx_table(Resource(data="data.ods"))# Load with custom format (specify sheet)table = load_xlsx_table(Resource( data="data.ods", fileDialect=XlsxFileDialect(format="ods", sheetName="Sheet2"),))# Load multiple ODS files (concatenated)table = load_xlsx_table(Resource(data=["part1.ods", "part2.ods", "part3.ods"]))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"saving-ods-files\">Saving ODS Files\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#saving-ods-files\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Saving ODS Files”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> save_xlsx_table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> XlsxFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Save with default options\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">save_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">path\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">output.ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">XlsxFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">format\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Save with custom sheet name\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">save_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">path\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">output.ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">XlsxFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">format\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">sheetName\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Data\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import save_xlsx_tablefrom fairspec_metadata import XlsxFileDialect# Save with default optionssave_xlsx_table(table, path="output.ods", fileDialect=XlsxFileDialect(format="ods"))# Save with custom sheet namesave_xlsx_table(table, path="output.ods", fileDialect=XlsxFileDialect( format="ods", sheetName="Data",))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"advanced-features\">Advanced Features\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#advanced-features\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Advanced Features”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"sheet-selection\">Sheet Selection\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#sheet-selection\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Sheet Selection”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_xlsx_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> XlsxFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Select by sheet number (1-indexed)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">workbook.ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">XlsxFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">format\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">sheetNumber\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">2\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Select by sheet name\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">workbook.ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">XlsxFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">format\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">sheetName\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Sales Data\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_xlsx_table, Resourcefrom fairspec_metadata import XlsxFileDialect# Select by sheet number (1-indexed)table = load_xlsx_table(Resource( data="workbook.ods", fileDialect=XlsxFileDialect(format="ods", sheetNumber=2),))# Select by sheet nametable = load_xlsx_table(Resource( data="workbook.ods", fileDialect=XlsxFileDialect(format="ods", sheetName="Sales Data"),))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"multi-header-row-processing\">Multi-Header Row Processing\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#multi-header-row-processing\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Multi-Header Row Processing”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_xlsx_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> XlsxFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># ODS with multiple header rows\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">multi-header.ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">XlsxFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">format\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">headerRows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">2\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">headerJoin\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">_\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Resulting columns: [\"Year_Quarter\", \"2023_Q1\", \"2023_Q2\", \"2024_Q1\", \"2024_Q2\"]\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_xlsx_table, Resourcefrom fairspec_metadata import XlsxFileDialect# ODS with multiple header rowstable = load_xlsx_table(Resource( data="multi-header.ods", fileDialect=XlsxFileDialect( format="ods", headerRows=[1, 2], headerJoin="_", ),))# Resulting columns: ["Year_Quarter", "2023_Q1", "2023_Q2", "2024_Q1", "2024_Q2"]\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"comment-row-handling\">Comment Row Handling\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#comment-row-handling\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Comment Row Handling”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_xlsx_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> XlsxFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Skip specific comment rows\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">with-comments.ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">XlsxFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">format\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">commentRows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">2\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">headerRows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">3\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Skip rows with comment prefix\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">XlsxFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">format\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">commentPrefix\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">#\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">headerRows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_xlsx_table, Resourcefrom fairspec_metadata import XlsxFileDialect# Skip specific comment rowstable = load_xlsx_table(Resource( data="with-comments.ods", fileDialect=XlsxFileDialect( format="ods", commentRows=[1, 2], headerRows=[3], ),))# Skip rows with comment prefixtable = load_xlsx_table(Resource( data="data.ods", fileDialect=XlsxFileDialect( format="ods", commentPrefix="#", headerRows=[1], ),))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"remote-file-loading\">Remote File Loading\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#remote-file-loading\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Remote File Loading”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_xlsx_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load from URL\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://example.com/data.ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load multiple remote files\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://api.example.com/data-2023.ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://api.example.com/data-2024.ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_xlsx_table, Resource# Load from URLtable = load_xlsx_table(Resource(data="https://example.com/data.ods"))# Load multiple remote filestable = load_xlsx_table(Resource(data=[ "https://api.example.com/data-2023.ods", "https://api.example.com/data-2024.ods",]))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"column-selection\">Column Selection\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#column-selection\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Column Selection”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_xlsx_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> XlsxFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Select specific columns\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">XlsxFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">format\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">ods\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">columnNames\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">age\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">city\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_xlsx_table, Resourcefrom fairspec_metadata import XlsxFileDialect# Select specific columnstable = load_xlsx_table(Resource( data="data.ods", fileDialect=XlsxFileDialect(format="ods", columnNames=["name", "age", "city"]),))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>",{"headings":709,"localImagePaths":727,"remoteImagePaths":728,"frontmatter":729,"imagePaths":731},[710,711,712,713,716,719,720,723,724,725,726],{"depth":31,"slug":35,"text":36},{"depth":31,"slug":484,"text":20},{"depth":31,"slug":486,"text":487},{"depth":74,"slug":714,"text":715},"loading-ods-files","Loading ODS Files",{"depth":74,"slug":717,"text":718},"saving-ods-files","Saving ODS Files",{"depth":31,"slug":495,"text":496},{"depth":74,"slug":721,"text":722},"sheet-selection","Sheet Selection",{"depth":74,"slug":534,"text":535},{"depth":74,"slug":537,"text":538},{"depth":74,"slug":498,"text":499},{"depth":74,"slug":617,"text":618},[],[],{"title":697,"sidebar":730},{"label":701,"order":700},[],"python/parquet",{"id":732,"data":734,"body":741,"filePath":742,"digest":743,"rendered":744},{"title":735,"editUrl":15,"head":736,"template":17,"sidebar":737,"pagefind":15,"draft":21},"Working with Parquet in Python",[],{"order":738,"label":739,"hidden":21,"attrs":740},8,"Parquet",{},"Apache Parquet file handling with high-performance columnar data processing and compression.\n\n## Installation\n\n```bash\npip install fairspec\n```\n\n## Getting Started\n\nThe Parquet plugin provides:\n\n- `load_parquet_table` - Load Parquet files into tables\n- `save_parquet_table` - Save tables to Parquet files\n- `ParquetPlugin` - Plugin for framework integration\n\nFor example:\n\n```python\nfrom fairspec import load_parquet_table, Resource\n\ntable = load_parquet_table(Resource(data=\"table.parquet\"))\n# Efficient columnar format with compression\n```\n\n## Basic Usage\n\n### Loading Parquet Files\n\n```python\nfrom fairspec import load_parquet_table, Resource\n\n# Load from local file\ntable = load_parquet_table(Resource(data=\"data.parquet\"))\n\n# Load from remote URL\ntable = load_parquet_table(Resource(data=\"https://example.com/data.parquet\"))\n\n# Load multiple files (concatenated)\ntable = load_parquet_table(Resource(data=[\"file1.parquet\", \"file2.parquet\"]))\n```\n\n### Saving Parquet Files\n\n```python\nfrom fairspec import save_parquet_table\n\n# Save with default options\nsave_parquet_table(table, path=\"output.parquet\")\n```\n\n## Advanced Features\n\n### Remote File Loading\n\n```python\nfrom fairspec import load_parquet_table, Resource\n\n# Load from URL\ntable = load_parquet_table(Resource(data=\"https://example.com/data.parquet\"))\n\n# Load multiple remote files\ntable = load_parquet_table(Resource(data=[\n \"https://api.example.com/data-2023.parquet\",\n \"https://api.example.com/data-2024.parquet\",\n]))\n```","content/docs/python/parquet.md","4d54eb9598b7c641",{"html":745,"metadata":746},"\u003Cp>Apache Parquet file handling with high-performance columnar data processing and compression.\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"installation\">Installation\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#installation\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Installation”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Clink rel=\"stylesheet\" href=\"/_astro/ec.8ol6u.css\">\u003Cscript type=\"module\" src=\"/_astro/ec.0vx5m.js\">\u003C/script>\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">pip\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">install\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">fairspec\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"pip install fairspec\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"getting-started\">Getting Started\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#getting-started\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Getting Started”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The Parquet plugin provides:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">load_parquet_table\u003C/code> - Load Parquet files into tables\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">save_parquet_table\u003C/code> - Save tables to Parquet files\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">ParquetPlugin\u003C/code> - Plugin for framework integration\u003C/li>\n\u003C/ul>\n\u003Cp>For example:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_parquet_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_parquet_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">table.parquet\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Efficient columnar format with compression\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_parquet_table, Resourcetable = load_parquet_table(Resource(data="table.parquet"))# Efficient columnar format with compression\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"basic-usage\">Basic Usage\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#basic-usage\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Basic Usage”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"loading-parquet-files\">Loading Parquet Files\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#loading-parquet-files\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Loading Parquet Files”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_parquet_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load from local file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_parquet_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.parquet\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load from remote URL\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_parquet_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://example.com/data.parquet\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load multiple files (concatenated)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_parquet_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">file1.parquet\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">file2.parquet\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_parquet_table, Resource# Load from local filetable = load_parquet_table(Resource(data="data.parquet"))# Load from remote URLtable = load_parquet_table(Resource(data="https://example.com/data.parquet"))# Load multiple files (concatenated)table = load_parquet_table(Resource(data=["file1.parquet", "file2.parquet"]))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"saving-parquet-files\">Saving Parquet Files\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#saving-parquet-files\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Saving Parquet Files”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> save_parquet_table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Save with default options\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">save_parquet_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">path\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">output.parquet\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import save_parquet_table# Save with default optionssave_parquet_table(table, path="output.parquet")\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"advanced-features\">Advanced Features\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#advanced-features\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Advanced Features”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"remote-file-loading\">Remote File Loading\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#remote-file-loading\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Remote File Loading”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_parquet_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load from URL\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_parquet_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://example.com/data.parquet\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load multiple remote files\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_parquet_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://api.example.com/data-2023.parquet\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://api.example.com/data-2024.parquet\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_parquet_table, Resource# Load from URLtable = load_parquet_table(Resource(data="https://example.com/data.parquet"))# Load multiple remote filestable = load_parquet_table(Resource(data=[ "https://api.example.com/data-2023.parquet", "https://api.example.com/data-2024.parquet",]))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>",{"headings":747,"localImagePaths":759,"remoteImagePaths":760,"frontmatter":761,"imagePaths":763},[748,749,750,751,754,757,758],{"depth":31,"slug":35,"text":36},{"depth":31,"slug":484,"text":20},{"depth":31,"slug":486,"text":487},{"depth":74,"slug":752,"text":753},"loading-parquet-files","Loading Parquet Files",{"depth":74,"slug":755,"text":756},"saving-parquet-files","Saving Parquet Files",{"depth":31,"slug":495,"text":496},{"depth":74,"slug":498,"text":499},[],[],{"title":735,"sidebar":762},{"label":739,"order":738},[],"python/sqlite",{"id":764,"data":766,"body":773,"filePath":774,"digest":775,"rendered":776},{"title":767,"editUrl":15,"head":768,"template":17,"sidebar":769,"pagefind":15,"draft":21},"Working with SQLite in Python",[],{"order":770,"label":771,"hidden":21,"attrs":772},9,"SQLite",{},"SQLite database file handling with table loading and saving capabilities.\n\n## Installation\n\n```bash\npip install fairspec\n```\n\n## Getting Started\n\nThe SQLite plugin provides:\n\n- `load_sqlite_table` - Load tables from SQLite databases\n- `save_sqlite_table` - Save tables to SQLite databases\n- `SqlitePlugin` - Plugin for framework integration\n\nFor example:\n\n```python\nfrom fairspec import load_sqlite_table, Resource\nfrom fairspec_metadata import SqliteFileDialect\n\ntable = load_sqlite_table(Resource(\n data=\"database.db\",\n fileDialect=SqliteFileDialect(tableName=\"users\"),\n))\n# column types will be automatically inferred from database schema\n```\n\n## Basic Usage\n\n### Loading SQLite Tables\n\n```python\nfrom fairspec import load_sqlite_table, Resource\nfrom fairspec_metadata import SqliteFileDialect\n\n# Load a table from SQLite database\ntable = load_sqlite_table(Resource(\n data=\"data.db\",\n fileDialect=SqliteFileDialect(tableName=\"products\"),\n))\n\n# Load from a specific path\ntable = load_sqlite_table(Resource(\n data=\"/path/to/database.db\",\n fileDialect=SqliteFileDialect(tableName=\"orders\"),\n))\n```\n\n### Saving SQLite Tables\n\n```python\nfrom fairspec import save_sqlite_table\nfrom fairspec_metadata import SqliteFileDialect\n\n# Save table to SQLite database\nsave_sqlite_table(table, path=\"output.db\", fileDialect=SqliteFileDialect(\n tableName=\"results\",\n))\n\n# Overwrite existing table\nsave_sqlite_table(table, path=\"output.db\", fileDialect=SqliteFileDialect(\n tableName=\"results\",\n), overwrite=True)\n```\n\n## Advanced Features\n\n### Schema Inference\n\nTable schemas are automatically inferred from SQLite table definitions:\n\n```python\nfrom fairspec import load_sqlite_table, Resource\nfrom fairspec_metadata import SqliteFileDialect\n\n# Field types are automatically detected from database schema\ntable = load_sqlite_table(Resource(\n data=\"shop.db\",\n fileDialect=SqliteFileDialect(tableName=\"products\"),\n))\n# Types like INTEGER, TEXT, REAL are mapped to appropriate Table Schema types\n```\n\n### Creating New Tables\n\nWhen saving, the plugin automatically creates the table structure:\n\n```python\nfrom fairspec import save_sqlite_table\nfrom fairspec_metadata import SqliteFileDialect\n\n# Creates a new database file with the specified table\nsave_sqlite_table(table, path=\"new-database.db\", fileDialect=SqliteFileDialect(\n tableName=\"my_data\",\n))\n```\n\n### Working with Table Schema\n\nYou can provide a custom Table Schema when saving:\n\n```python\nfrom fairspec import save_sqlite_table\nfrom fairspec_metadata import SqliteFileDialect, TableSchema, IntegerColumnProperty, StringColumnProperty\n\nsave_sqlite_table(table, path=\"output.db\", fileDialect=SqliteFileDialect(\n tableName=\"customers\",\n), tableSchema=TableSchema(properties={\n \"id\": IntegerColumnProperty(),\n \"name\": StringColumnProperty(),\n \"email\": StringColumnProperty(),\n}))\n```","content/docs/python/sqlite.md","220edd15001fea47",{"html":777,"metadata":778},"\u003Cp>SQLite database file handling with table loading and saving capabilities.\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"installation\">Installation\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#installation\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Installation”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Clink rel=\"stylesheet\" href=\"/_astro/ec.8ol6u.css\">\u003Cscript type=\"module\" src=\"/_astro/ec.0vx5m.js\">\u003C/script>\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">pip\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">install\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">fairspec\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"pip install fairspec\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"getting-started\">Getting Started\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#getting-started\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Getting Started”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The SQLite plugin provides:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">load_sqlite_table\u003C/code> - Load tables from SQLite databases\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">save_sqlite_table\u003C/code> - Save tables to SQLite databases\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">SqlitePlugin\u003C/code> - Plugin for framework integration\u003C/li>\n\u003C/ul>\n\u003Cp>For example:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_sqlite_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> SqliteFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_sqlite_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">database.db\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">SqliteFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">tableName\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">users\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># column types will be automatically inferred from database schema\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_sqlite_table, Resourcefrom fairspec_metadata import SqliteFileDialecttable = load_sqlite_table(Resource( data="database.db", fileDialect=SqliteFileDialect(tableName="users"),))# column types will be automatically inferred from database schema\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"basic-usage\">Basic Usage\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#basic-usage\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Basic Usage”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"loading-sqlite-tables\">Loading SQLite Tables\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#loading-sqlite-tables\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Loading SQLite Tables”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_sqlite_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> SqliteFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load a table from SQLite database\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_sqlite_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.db\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">SqliteFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">tableName\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">products\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load from a specific path\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_sqlite_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">/path/to/database.db\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">SqliteFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">tableName\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">orders\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_sqlite_table, Resourcefrom fairspec_metadata import SqliteFileDialect# Load a table from SQLite databasetable = load_sqlite_table(Resource( data="data.db", fileDialect=SqliteFileDialect(tableName="products"),))# Load from a specific pathtable = load_sqlite_table(Resource( data="/path/to/database.db", fileDialect=SqliteFileDialect(tableName="orders"),))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"saving-sqlite-tables\">Saving SQLite Tables\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#saving-sqlite-tables\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Saving SQLite Tables”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> save_sqlite_table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> SqliteFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Save table to SQLite database\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">save_sqlite_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">path\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">output.db\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">SqliteFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">tableName\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">results\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Overwrite existing table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">save_sqlite_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">path\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">output.db\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">SqliteFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">tableName\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">results\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">overwrite\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#FF5874;--1:#A54A4A\">True\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import save_sqlite_tablefrom fairspec_metadata import SqliteFileDialect# Save table to SQLite databasesave_sqlite_table(table, path="output.db", fileDialect=SqliteFileDialect( tableName="results",))# Overwrite existing tablesave_sqlite_table(table, path="output.db", fileDialect=SqliteFileDialect( tableName="results",), overwrite=True)\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"advanced-features\">Advanced Features\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#advanced-features\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Advanced Features”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"schema-inference\">Schema Inference\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#schema-inference\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Schema Inference”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Table schemas are automatically inferred from SQLite table definitions:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_sqlite_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> SqliteFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Field types are automatically detected from database schema\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_sqlite_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">shop.db\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">SqliteFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">tableName\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">products\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Types like INTEGER, TEXT, REAL are mapped to appropriate Table Schema types\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_sqlite_table, Resourcefrom fairspec_metadata import SqliteFileDialect# Field types are automatically detected from database schematable = load_sqlite_table(Resource( data="shop.db", fileDialect=SqliteFileDialect(tableName="products"),))# Types like INTEGER, TEXT, REAL are mapped to appropriate Table Schema types\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"creating-new-tables\">Creating New Tables\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#creating-new-tables\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Creating New Tables”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>When saving, the plugin automatically creates the table structure:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> save_sqlite_table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> SqliteFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Creates a new database file with the specified table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">save_sqlite_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">path\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">new-database.db\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">SqliteFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">tableName\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">my_data\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import save_sqlite_tablefrom fairspec_metadata import SqliteFileDialect# Creates a new database file with the specified tablesave_sqlite_table(table, path="new-database.db", fileDialect=SqliteFileDialect( tableName="my_data",))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"working-with-table-schema\">Working with Table Schema\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#working-with-table-schema\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Working with Table Schema”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>You can provide a custom Table Schema when saving:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> save_sqlite_table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> SqliteFileDialect, TableSchema, IntegerColumnProperty, StringColumnProperty\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">save_sqlite_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">path\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">output.db\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">SqliteFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">tableName\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">customers\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">tableSchema\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">TableSchema\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">properties\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">IntegerColumnProperty\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">StringColumnProperty\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">email\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">StringColumnProperty\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">}\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import save_sqlite_tablefrom fairspec_metadata import SqliteFileDialect, TableSchema, IntegerColumnProperty, StringColumnPropertysave_sqlite_table(table, path="output.db", fileDialect=SqliteFileDialect( tableName="customers",), tableSchema=TableSchema(properties={ "id": IntegerColumnProperty(), "name": StringColumnProperty(), "email": StringColumnProperty(),}))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>",{"headings":779,"localImagePaths":799,"remoteImagePaths":800,"frontmatter":801,"imagePaths":803},[780,781,782,783,786,789,790,793,796],{"depth":31,"slug":35,"text":36},{"depth":31,"slug":484,"text":20},{"depth":31,"slug":486,"text":487},{"depth":74,"slug":784,"text":785},"loading-sqlite-tables","Loading SQLite Tables",{"depth":74,"slug":787,"text":788},"saving-sqlite-tables","Saving SQLite Tables",{"depth":31,"slug":495,"text":496},{"depth":74,"slug":791,"text":792},"schema-inference","Schema Inference",{"depth":74,"slug":794,"text":795},"creating-new-tables","Creating New Tables",{"depth":74,"slug":797,"text":798},"working-with-table-schema","Working with Table Schema",[],[],{"title":767,"sidebar":802},{"label":771,"order":770},[],"python/table",{"id":804,"data":806,"body":813,"filePath":814,"digest":815,"rendered":816},{"title":807,"editUrl":15,"head":808,"template":17,"sidebar":809,"pagefind":15,"draft":21},"Working with Tabular Data in Python",[],{"order":810,"label":811,"hidden":21,"attrs":812},11,"Tabular Data",{},"High-performance data processing and schema validation for tabular data built on **Polars** (a Rust-based DataFrame library).\n\n## Installation\n\n```bash\npip install fairspec\n```\n\n## Getting Started\n\nThe table package provides core utilities for working with tabular data:\n\n- `normalize_table` - Convert table data to match a schema\n- `denormalize_table` - Convert normalized data back to raw format\n- `infer_table_schema_from_table` - Automatically infer schema from table data\n- `inspect_table` - Get table structure information\n- `query_table` - Query tables using SQL-like syntax\n\nFor example:\n\n```python\nfrom fairspec import load_csv_table, infer_table_schema_from_table, Resource\n\ntable = load_csv_table(Resource(data=\"data.csv\"))\nschema = infer_table_schema_from_table(table)\n```\n\n## Basic Usage\n\n### Schema Inference\n\nAutomatically infer Table Schema from data:\n\n```python\nimport polars as pl\nfrom fairspec import infer_table_schema_from_table\n\ntable = pl.DataFrame({\n \"id\": [\"1\", \"2\", \"3\"],\n \"price\": [\"10.50\", \"25.00\", \"15.75\"],\n \"date\": [\"2023-01-15\", \"2023-02-20\", \"2023-03-25\"],\n \"active\": [\"true\", \"false\", \"true\"],\n}).lazy()\n\nschema = infer_table_schema_from_table(table, sample_rows=100, confidence=0.9)\n\n# Result: automatically detected integer, number, date, and boolean types\n```\n\n### Table Normalization\n\nConvert table data to match a Table Schema (type conversion):\n\n```python\nimport polars as pl\nfrom fairspec import normalize_table\nfrom fairspec_metadata import TableSchema, IntegerColumnProperty, NumberColumnProperty, BooleanColumnProperty, DateColumnProperty\n\ntable = pl.DataFrame({\n \"id\": [\"1\", \"2\", \"3\"],\n \"price\": [\"10.50\", \"25.00\", \"15.75\"],\n \"active\": [\"true\", \"false\", \"true\"],\n \"date\": [\"2023-01-15\", \"2023-02-20\", \"2023-03-25\"],\n}).lazy()\n\nschema = TableSchema(properties={\n \"id\": IntegerColumnProperty(),\n \"price\": NumberColumnProperty(),\n \"active\": BooleanColumnProperty(),\n \"date\": DateColumnProperty(),\n})\n\nnormalized = normalize_table(table, schema)\nresult = normalized.collect()\n\n# Result has properly typed columns:\n# { id: 1, price: 10.50, active: True, date: Date(\"2023-01-15\") }\n```\n\n### Table Denormalization\n\nConvert normalized data back to raw format (for saving):\n\n```python\nfrom fairspec import denormalize_table\n\ndenormalized = denormalize_table(table, schema, native_types=[\"string\", \"number\", \"boolean\"])\n```\n\n## Advanced Features\n\n### Working with Table Schema\n\nDefine schemas with column properties and constraints:\n\n```python\nfrom fairspec_metadata import TableSchema, IntegerColumnProperty, StringColumnProperty\n\nschema = TableSchema(\n properties={\n \"id\": IntegerColumnProperty(minimum=1),\n \"name\": StringColumnProperty(minLength=1, maxLength=100),\n \"email\": StringColumnProperty(pattern=r\"^[^@]+@[^@]+\\.[^@]+$\"),\n \"age\": IntegerColumnProperty(minimum=0, maximum=150),\n \"status\": StringColumnProperty(enum=[\"active\", \"inactive\", \"pending\"]),\n },\n required=[\"id\", \"name\", \"email\"],\n primaryKey=[\"id\"],\n)\n```\n\n### Schema Inference Options\n\nCustomize how schemas are inferred:\n\n```python\nfrom fairspec import infer_table_schema_from_table\n\nschema = infer_table_schema_from_table(\n table,\n sample_rows=100,\n confidence=0.9,\n keep_strings=False,\n column_types={\"id\": \"integer\", \"status\": \"categorical\"},\n)\n```\n\n### Handling Missing Values\n\nDefine missing value indicators:\n\n```python\nfrom fairspec_metadata import TableSchema, NumberColumnProperty\n\nschema = TableSchema(\n properties={\"value\": NumberColumnProperty()},\n missingValues=[\"\", \"N/A\", \"null\", -999],\n)\n```\n\n### Primary Keys and Constraints\n\nDefine table-level constraints:\n\n```python\nfrom fairspec_metadata import TableSchema, IntegerColumnProperty, StringColumnProperty, UniqueKey\n\nschema = TableSchema(\n properties={\n \"user_id\": IntegerColumnProperty(),\n \"email\": StringColumnProperty(),\n },\n primaryKey=[\"user_id\"],\n uniqueKeys=[UniqueKey(columnNames=[\"email\"])],\n)\n```\n\n## Supported Column Types\n\n### Primitive Types\n- `string` - Text data\n- `integer` - Whole numbers\n- `number` - Decimal numbers\n- `boolean` - True/false values\n\n### Temporal Types\n- `date` - Calendar dates\n- `datetime` - Date and time\n- `time` - Time of day\n- `duration` - Time spans\n\n### Spatial Types\n- `geojson` - GeoJSON geometries\n- `wkt` - Well-Known Text geometries\n- `wkb` - Well-Known Binary geometries\n\n### Complex Types\n- `array` - Fixed-length arrays\n- `list` - Variable-length lists\n- `object` - JSON objects\n\n### Specialized Types\n- `email` - Email addresses\n- `url` - URLs\n- `categorical` - Categorical data\n- `base64` - Base64 encoded data\n- `hex` - Hexadecimal data\n\n## Table Type\n\nThe package uses `LazyFrame` from Polars for efficient processing:\n\n```python\nimport polars as pl\nfrom fairspec_table import Table\n\n# Table is an alias for pl.LazyFrame\ntable: Table = pl.DataFrame({\"id\": [1, 2, 3]}).lazy()\n```","content/docs/python/table.md","ee7db826a252c0ef",{"html":817,"metadata":818},"\u003Cp>High-performance data processing and schema validation for tabular data built on \u003Cstrong>Polars\u003C/strong> (a Rust-based DataFrame library).\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"installation\">Installation\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#installation\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Installation”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Clink rel=\"stylesheet\" href=\"/_astro/ec.8ol6u.css\">\u003Cscript type=\"module\" src=\"/_astro/ec.0vx5m.js\">\u003C/script>\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">pip\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">install\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">fairspec\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"pip install fairspec\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"getting-started\">Getting Started\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#getting-started\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Getting Started”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The table package provides core utilities for working with tabular data:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">normalize_table\u003C/code> - Convert table data to match a schema\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">denormalize_table\u003C/code> - Convert normalized data back to raw format\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">infer_table_schema_from_table\u003C/code> - Automatically infer schema from table data\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">inspect_table\u003C/code> - Get table structure information\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">query_table\u003C/code> - Query tables using SQL-like syntax\u003C/li>\n\u003C/ul>\n\u003Cp>For example:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_csv_table, infer_table_schema_from_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.csv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">schema \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">infer_table_schema_from_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_csv_table, infer_table_schema_from_table, Resourcetable = load_csv_table(Resource(data="data.csv"))schema = infer_table_schema_from_table(table)\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"basic-usage\">Basic Usage\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#basic-usage\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Basic Usage”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"schema-inference\">Schema Inference\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#schema-inference\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Schema Inference”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Automatically infer Table Schema from data:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> polars \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">as\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> pl\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> infer_table_schema_from_table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> pl.\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">DataFrame\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">1\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">2\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">3\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">price\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">10.50\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">25.00\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">15.75\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">date\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">2023-01-15\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">2023-02-20\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">2023-03-25\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">active\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">true\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">false\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">true\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">}\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">).\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">lazy\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">schema \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">infer_table_schema_from_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">sample_rows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">100\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">confidence\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">0.9\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Result: automatically detected integer, number, date, and boolean types\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"import polars as plfrom fairspec import infer_table_schema_from_tabletable = pl.DataFrame({ "id": ["1", "2", "3"], "price": ["10.50", "25.00", "15.75"], "date": ["2023-01-15", "2023-02-20", "2023-03-25"], "active": ["true", "false", "true"],}).lazy()schema = infer_table_schema_from_table(table, sample_rows=100, confidence=0.9)# Result: automatically detected integer, number, date, and boolean types\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"table-normalization\">Table Normalization\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#table-normalization\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Table Normalization”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Convert table data to match a Table Schema (type conversion):\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> polars \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">as\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> pl\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> normalize_table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> TableSchema, IntegerColumnProperty, NumberColumnProperty, BooleanColumnProperty, DateColumnProperty\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> pl.\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">DataFrame\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">1\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">2\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">3\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">price\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">10.50\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">25.00\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">15.75\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">active\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">true\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">false\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">true\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">date\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">2023-01-15\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">2023-02-20\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">2023-03-25\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">}\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">).\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">lazy\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">schema \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">TableSchema\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">properties\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">IntegerColumnProperty\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">price\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">NumberColumnProperty\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">active\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">BooleanColumnProperty\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">date\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">DateColumnProperty\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">}\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">normalized \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">normalize_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> schema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">result \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> normalized.\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">collect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Result has properly typed columns:\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># { id: 1, price: 10.50, active: True, date: Date(\"2023-01-15\") }\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"import polars as plfrom fairspec import normalize_tablefrom fairspec_metadata import TableSchema, IntegerColumnProperty, NumberColumnProperty, BooleanColumnProperty, DateColumnPropertytable = pl.DataFrame({ "id": ["1", "2", "3"], "price": ["10.50", "25.00", "15.75"], "active": ["true", "false", "true"], "date": ["2023-01-15", "2023-02-20", "2023-03-25"],}).lazy()schema = TableSchema(properties={ "id": IntegerColumnProperty(), "price": NumberColumnProperty(), "active": BooleanColumnProperty(), "date": DateColumnProperty(),})normalized = normalize_table(table, schema)result = normalized.collect()# Result has properly typed columns:# { id: 1, price: 10.50, active: True, date: Date("2023-01-15") }\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"table-denormalization\">Table Denormalization\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#table-denormalization\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Table Denormalization”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Convert normalized data back to raw format (for saving):\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> denormalize_table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">denormalized \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">denormalize_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> schema\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">native_types\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">string\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">number\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">boolean\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import denormalize_tabledenormalized = denormalize_table(table, schema, native_types=["string", "number", "boolean"])\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"advanced-features\">Advanced Features\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#advanced-features\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Advanced Features”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"working-with-table-schema\">Working with Table Schema\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#working-with-table-schema\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Working with Table Schema”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Define schemas with column properties and constraints:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> TableSchema, IntegerColumnProperty, StringColumnProperty\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">schema \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">TableSchema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">properties\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">IntegerColumnProperty\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">minimum\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">StringColumnProperty\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">minLength\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">maxLength\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">100\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">email\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">StringColumnProperty\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">pattern\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">r\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#5CA7E4;--1:#3A6A90\">^\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">[\u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">^\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">@]\u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">+\u003C/span>\u003Cspan style=\"--0:#5CA7E4;--1:#3A6A90\">@\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">[\u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">^\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">@]\u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">+\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\.\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">[\u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">^\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">@]\u003C/span>\u003Cspan style=\"--0:#7FDBCA;--1:#097174\">+\u003C/span>\u003Cspan style=\"--0:#5CA7E4;--1:#3A6A90\">$\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">age\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">IntegerColumnProperty\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">minimum\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">0\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">maximum\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">150\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">status\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">StringColumnProperty\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">enum\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">active\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">inactive\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">pending\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">}\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">required\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">email\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">primaryKey\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec_metadata import TableSchema, IntegerColumnProperty, StringColumnPropertyschema = TableSchema( properties={ "id": IntegerColumnProperty(minimum=1), "name": StringColumnProperty(minLength=1, maxLength=100), "email": StringColumnProperty(pattern=r"^[^@]+@[^@]+\\.[^@]+$"), "age": IntegerColumnProperty(minimum=0, maximum=150), "status": StringColumnProperty(enum=["active", "inactive", "pending"]), }, required=["id", "name", "email"], primaryKey=["id"],)\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"schema-inference-options\">Schema Inference Options\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#schema-inference-options\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Schema Inference Options”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Customize how schemas are inferred:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> infer_table_schema_from_table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">schema \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">infer_table_schema_from_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">sample_rows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">100\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">confidence\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">0.9\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">keep_strings\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#FF5874;--1:#A54A4A\">False\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">column_types\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">integer\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">status\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">categorical\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">}\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import infer_table_schema_from_tableschema = infer_table_schema_from_table( table, sample_rows=100, confidence=0.9, keep_strings=False, column_types={"id": "integer", "status": "categorical"},)\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"handling-missing-values\">Handling Missing Values\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#handling-missing-values\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Handling Missing Values”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Define missing value indicators:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> TableSchema, NumberColumnProperty\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">schema \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">TableSchema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">properties\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">value\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">NumberColumnProperty\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">}\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">missingValues\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">N/A\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">null\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">-\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">999\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec_metadata import TableSchema, NumberColumnPropertyschema = TableSchema( properties={"value": NumberColumnProperty()}, missingValues=["", "N/A", "null", -999],)\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"primary-keys-and-constraints\">Primary Keys and Constraints\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#primary-keys-and-constraints\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Primary Keys and Constraints”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>Define table-level constraints:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> TableSchema, IntegerColumnProperty, StringColumnProperty, UniqueKey\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">schema \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">TableSchema\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">properties\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">user_id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">IntegerColumnProperty\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">email\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">StringColumnProperty\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">}\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">primaryKey\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">user_id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">uniqueKeys\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">UniqueKey\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">columnNames\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">email\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec_metadata import TableSchema, IntegerColumnProperty, StringColumnProperty, UniqueKeyschema = TableSchema( properties={ "user_id": IntegerColumnProperty(), "email": StringColumnProperty(), }, primaryKey=["user_id"], uniqueKeys=[UniqueKey(columnNames=["email"])],)\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"supported-column-types\">Supported Column Types\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#supported-column-types\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Supported Column Types”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"primitive-types\">Primitive Types\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#primitive-types\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Primitive Types”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">string\u003C/code> - Text data\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">integer\u003C/code> - Whole numbers\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">number\u003C/code> - Decimal numbers\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">boolean\u003C/code> - True/false values\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"temporal-types\">Temporal Types\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#temporal-types\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Temporal Types”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">date\u003C/code> - Calendar dates\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">datetime\u003C/code> - Date and time\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">time\u003C/code> - Time of day\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">duration\u003C/code> - Time spans\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"spatial-types\">Spatial Types\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#spatial-types\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Spatial Types”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">geojson\u003C/code> - GeoJSON geometries\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">wkt\u003C/code> - Well-Known Text geometries\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">wkb\u003C/code> - Well-Known Binary geometries\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"complex-types\">Complex Types\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#complex-types\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Complex Types”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">array\u003C/code> - Fixed-length arrays\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">list\u003C/code> - Variable-length lists\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">object\u003C/code> - JSON objects\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"specialized-types\">Specialized Types\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#specialized-types\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Specialized Types”\u003C/span>\u003C/a>\u003C/div>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">email\u003C/code> - Email addresses\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">url\u003C/code> - URLs\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">categorical\u003C/code> - Categorical data\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">base64\u003C/code> - Base64 encoded data\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">hex\u003C/code> - Hexadecimal data\u003C/li>\n\u003C/ul>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"table-type\">Table Type\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#table-type\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Table Type”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The package uses \u003Ccode dir=\"auto\">LazyFrame\u003C/code> from Polars for efficient processing:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> polars \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">as\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> pl\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> Table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Table is an alias for pl.LazyFrame\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table: Table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> pl.\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">DataFrame\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">{\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">id\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">: \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">2\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">3\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">}\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">).\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">lazy\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">()\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"import polars as plfrom fairspec_table import Table# Table is an alias for pl.LazyFrametable: Table = pl.DataFrame({"id": [1, 2, 3]}).lazy()\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>",{"headings":819,"localImagePaths":860,"remoteImagePaths":861,"frontmatter":862,"imagePaths":864},[820,821,822,823,824,827,830,831,832,833,836,839,842,845,848,851,854,857],{"depth":31,"slug":35,"text":36},{"depth":31,"slug":484,"text":20},{"depth":31,"slug":486,"text":487},{"depth":74,"slug":791,"text":792},{"depth":74,"slug":825,"text":826},"table-normalization","Table Normalization",{"depth":74,"slug":828,"text":829},"table-denormalization","Table Denormalization",{"depth":31,"slug":495,"text":496},{"depth":74,"slug":797,"text":798},{"depth":74,"slug":257,"text":258},{"depth":74,"slug":834,"text":835},"handling-missing-values","Handling Missing Values",{"depth":74,"slug":837,"text":838},"primary-keys-and-constraints","Primary Keys and Constraints",{"depth":31,"slug":840,"text":841},"supported-column-types","Supported Column Types",{"depth":74,"slug":843,"text":844},"primitive-types","Primitive Types",{"depth":74,"slug":846,"text":847},"temporal-types","Temporal Types",{"depth":74,"slug":849,"text":850},"spatial-types","Spatial Types",{"depth":74,"slug":852,"text":853},"complex-types","Complex Types",{"depth":74,"slug":855,"text":856},"specialized-types","Specialized Types",{"depth":31,"slug":858,"text":859},"table-type","Table Type",[],[],{"title":807,"sidebar":863},{"label":811,"order":810},[],"python/tsv",{"id":865,"data":867,"body":873,"filePath":874,"digest":875,"rendered":876},{"title":868,"editUrl":15,"head":869,"template":17,"sidebar":870,"pagefind":15,"draft":21},"Working with TSV in Python",[],{"order":31,"label":871,"hidden":21,"attrs":872},"TSV",{},"Tab-separated values (TSV) file handling with automatic format detection and high-performance data operations.\n\n## Installation\n\n```bash\npip install fairspec\n```\n\n## Getting Started\n\nThe TSV format is handled by the CSV plugin, which provides:\n\n- `load_csv_table` - Load TSV files into tables\n- `save_csv_table` - Save tables to TSV files\n- `CsvPlugin` - Plugin for framework integration\n\nFor example:\n\n```python\nfrom fairspec import load_csv_table, Resource\n\ntable = load_csv_table(Resource(data=\"table.tsv\"))\n# the column types will be automatically inferred\n```\n\n## Basic Usage\n\n### Loading TSV Files\n\n```python\nfrom fairspec import load_csv_table, Resource\nfrom fairspec_metadata import CsvFileDialect\n\n# Load a simple TSV file\ntable = load_csv_table(Resource(data=\"data.tsv\"))\n\n# Load with explicit format\ntable = load_csv_table(Resource(\n data=\"data.tsv\",\n fileDialect=CsvFileDialect(delimiter=\"\\t\", headerRows=[1]),\n))\n\n# Load multiple TSV files (concatenated)\ntable = load_csv_table(Resource(data=[\"part1.tsv\", \"part2.tsv\", \"part3.tsv\"]))\n```\n\n### Saving TSV Files\n\n```python\nfrom fairspec import save_csv_table\nfrom fairspec_metadata import CsvFileDialect\n\n# Save with default options\nsave_csv_table(table, path=\"output.tsv\", fileDialect=CsvFileDialect(delimiter=\"\\t\"))\n\n# Save with line terminator option\nsave_csv_table(table, path=\"output.tsv\", fileDialect=CsvFileDialect(\n delimiter=\"\\t\",\n lineTerminator=\"\\r\\n\",\n))\n```\n\n## Advanced Features\n\n### Multi-Header Row Processing\n\n```python\nfrom fairspec import load_csv_table, Resource\nfrom fairspec_metadata import CsvFileDialect\n\n# TSV with multiple header rows\ntable = load_csv_table(Resource(\n data=\"multi-header.tsv\",\n fileDialect=CsvFileDialect(\n delimiter=\"\\t\",\n headerRows=[1, 2],\n headerJoin=\"_\",\n ),\n))\n```\n\n### Comment Handling\n\n```python\nfrom fairspec import load_csv_table, Resource\nfrom fairspec_metadata import CsvFileDialect\n\n# TSV with comment lines\ntable = load_csv_table(Resource(\n data=\"with-comments.tsv\",\n fileDialect=CsvFileDialect(\n delimiter=\"\\t\",\n commentPrefix=\"#\",\n headerRows=[1],\n ),\n))\n\n# Or specify specific comment row numbers\ntable = load_csv_table(Resource(\n data=\"with-comments.tsv\",\n fileDialect=CsvFileDialect(\n delimiter=\"\\t\",\n commentRows=[1, 2],\n headerRows=[3],\n ),\n))\n```\n\n### Remote File Loading\n\n```python\nfrom fairspec import load_csv_table, Resource\n\n# Load from URL\ntable = load_csv_table(Resource(data=\"https://example.com/data.tsv\"))\n\n# Load multiple remote files\ntable = load_csv_table(Resource(data=[\n \"https://api.example.com/data-2023.tsv\",\n \"https://api.example.com/data-2024.tsv\",\n]))\n```","content/docs/python/tsv.md","497f374d979ba013",{"html":877,"metadata":878},"\u003Cp>Tab-separated values (TSV) file handling with automatic format detection and high-performance data operations.\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"installation\">Installation\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#installation\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Installation”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Clink rel=\"stylesheet\" href=\"/_astro/ec.8ol6u.css\">\u003Cscript type=\"module\" src=\"/_astro/ec.0vx5m.js\">\u003C/script>\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">pip\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">install\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">fairspec\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"pip install fairspec\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"getting-started\">Getting Started\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#getting-started\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Getting Started”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The TSV format is handled by the CSV plugin, which provides:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">load_csv_table\u003C/code> - Load TSV files into tables\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">save_csv_table\u003C/code> - Save tables to TSV files\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">CsvPlugin\u003C/code> - Plugin for framework integration\u003C/li>\n\u003C/ul>\n\u003Cp>For example:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_csv_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">table.tsv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># the column types will be automatically inferred\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_csv_table, Resourcetable = load_csv_table(Resource(data="table.tsv"))# the column types will be automatically inferred\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"basic-usage\">Basic Usage\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#basic-usage\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Basic Usage”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"loading-tsv-files\">Loading TSV Files\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#loading-tsv-files\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Loading TSV Files”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_csv_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> CsvFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load a simple TSV file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.tsv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load with explicit format\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.tsv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">CsvFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">delimiter\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\t\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">headerRows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load multiple TSV files (concatenated)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">part1.tsv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">part2.tsv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">part3.tsv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_csv_table, Resourcefrom fairspec_metadata import CsvFileDialect# Load a simple TSV filetable = load_csv_table(Resource(data="data.tsv"))# Load with explicit formattable = load_csv_table(Resource( data="data.tsv", fileDialect=CsvFileDialect(delimiter="\\t", headerRows=[1]),))# Load multiple TSV files (concatenated)table = load_csv_table(Resource(data=["part1.tsv", "part2.tsv", "part3.tsv"]))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"saving-tsv-files\">Saving TSV Files\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#saving-tsv-files\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Saving TSV Files”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> save_csv_table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> CsvFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Save with default options\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">save_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">path\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">output.tsv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">CsvFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">delimiter\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\t\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Save with line terminator option\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">save_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">path\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">output.tsv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">CsvFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">delimiter\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\t\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">lineTerminator\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\r\\n\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import save_csv_tablefrom fairspec_metadata import CsvFileDialect# Save with default optionssave_csv_table(table, path="output.tsv", fileDialect=CsvFileDialect(delimiter="\\t"))# Save with line terminator optionsave_csv_table(table, path="output.tsv", fileDialect=CsvFileDialect( delimiter="\\t", lineTerminator="\\r\\n",))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"advanced-features\">Advanced Features\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#advanced-features\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Advanced Features”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"multi-header-row-processing\">Multi-Header Row Processing\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#multi-header-row-processing\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Multi-Header Row Processing”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_csv_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> CsvFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># TSV with multiple header rows\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">multi-header.tsv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">CsvFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">delimiter\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\t\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">headerRows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">2\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">headerJoin\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">_\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_csv_table, Resourcefrom fairspec_metadata import CsvFileDialect# TSV with multiple header rowstable = load_csv_table(Resource( data="multi-header.tsv", fileDialect=CsvFileDialect( delimiter="\\t", headerRows=[1, 2], headerJoin="_", ),))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"comment-handling\">Comment Handling\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#comment-handling\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Comment Handling”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_csv_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> CsvFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># TSV with comment lines\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">with-comments.tsv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">CsvFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">delimiter\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\t\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">commentPrefix\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">#\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">headerRows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Or specify specific comment row numbers\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">with-comments.tsv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">CsvFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">delimiter\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">\\t\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">commentRows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">2\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">headerRows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">3\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_csv_table, Resourcefrom fairspec_metadata import CsvFileDialect# TSV with comment linestable = load_csv_table(Resource( data="with-comments.tsv", fileDialect=CsvFileDialect( delimiter="\\t", commentPrefix="#", headerRows=[1], ),))# Or specify specific comment row numberstable = load_csv_table(Resource( data="with-comments.tsv", fileDialect=CsvFileDialect( delimiter="\\t", commentRows=[1, 2], headerRows=[3], ),))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"remote-file-loading\">Remote File Loading\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#remote-file-loading\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Remote File Loading”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_csv_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load from URL\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://example.com/data.tsv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load multiple remote files\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_csv_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://api.example.com/data-2023.tsv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://api.example.com/data-2024.tsv\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_csv_table, Resource# Load from URLtable = load_csv_table(Resource(data="https://example.com/data.tsv"))# Load multiple remote filestable = load_csv_table(Resource(data=[ "https://api.example.com/data-2023.tsv", "https://api.example.com/data-2024.tsv",]))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>",{"headings":879,"localImagePaths":895,"remoteImagePaths":896,"frontmatter":897,"imagePaths":899},[880,881,882,883,886,889,890,891,894],{"depth":31,"slug":35,"text":36},{"depth":31,"slug":484,"text":20},{"depth":31,"slug":486,"text":487},{"depth":74,"slug":884,"text":885},"loading-tsv-files","Loading TSV Files",{"depth":74,"slug":887,"text":888},"saving-tsv-files","Saving TSV Files",{"depth":31,"slug":495,"text":496},{"depth":74,"slug":534,"text":535},{"depth":74,"slug":892,"text":893},"comment-handling","Comment Handling",{"depth":74,"slug":498,"text":499},[],[],{"title":868,"sidebar":898},{"label":871,"order":31},[],"python/xlsx",{"id":900,"data":902,"body":909,"filePath":910,"digest":911,"rendered":912},{"title":903,"editUrl":15,"head":904,"template":17,"sidebar":905,"pagefind":15,"draft":21},"Working with XLSX in Python",[],{"order":906,"label":907,"hidden":21,"attrs":908},5,"XLSX",{},"Excel (.xlsx) file handling with sheet selection, advanced header processing, and high-performance data operations.\n\n## Installation\n\n```bash\npip install fairspec\n```\n\n## Getting Started\n\nThe XLSX plugin provides:\n\n- `load_xlsx_table` - Load Excel files into tables\n- `save_xlsx_table` - Save tables to Excel files\n- `XlsxPlugin` - Plugin for framework integration\n\nFor example:\n\n```python\nfrom fairspec import load_xlsx_table, Resource\n\ntable = load_xlsx_table(Resource(data=\"table.xlsx\"))\n# the column types will be automatically inferred\n```\n\n## Basic Usage\n\n### Loading XLSX Files\n\n```python\nfrom fairspec import load_xlsx_table, Resource\nfrom fairspec_metadata import XlsxFileDialect\n\n# Load a simple XLSX file\ntable = load_xlsx_table(Resource(data=\"data.xlsx\"))\n\n# Load with custom format (specify sheet)\ntable = load_xlsx_table(Resource(\n data=\"data.xlsx\",\n fileDialect=XlsxFileDialect(sheetName=\"Sheet2\"),\n))\n\n# Load multiple XLSX files (concatenated)\ntable = load_xlsx_table(Resource(data=[\"part1.xlsx\", \"part2.xlsx\", \"part3.xlsx\"]))\n```\n\n### Saving XLSX Files\n\n```python\nfrom fairspec import save_xlsx_table\nfrom fairspec_metadata import XlsxFileDialect\n\n# Save with default options\nsave_xlsx_table(table, path=\"output.xlsx\")\n\n# Save with custom sheet name\nsave_xlsx_table(table, path=\"output.xlsx\", fileDialect=XlsxFileDialect(sheetName=\"Data\"))\n```\n\n## Advanced Features\n\n### Sheet Selection\n\n```python\nfrom fairspec import load_xlsx_table, Resource\nfrom fairspec_metadata import XlsxFileDialect\n\n# Select by sheet number (1-indexed)\ntable = load_xlsx_table(Resource(\n data=\"workbook.xlsx\",\n fileDialect=XlsxFileDialect(sheetNumber=2),\n))\n\n# Select by sheet name\ntable = load_xlsx_table(Resource(\n data=\"workbook.xlsx\",\n fileDialect=XlsxFileDialect(sheetName=\"Sales Data\"),\n))\n```\n\n### Multi-Header Row Processing\n\n```python\nfrom fairspec import load_xlsx_table, Resource\nfrom fairspec_metadata import XlsxFileDialect\n\n# XLSX with multiple header rows\ntable = load_xlsx_table(Resource(\n data=\"multi-header.xlsx\",\n fileDialect=XlsxFileDialect(\n headerRows=[1, 2],\n headerJoin=\"_\",\n ),\n))\n# Resulting columns: [\"Year_Quarter\", \"2023_Q1\", \"2023_Q2\", \"2024_Q1\", \"2024_Q2\"]\n```\n\n### Comment Row Handling\n\n```python\nfrom fairspec import load_xlsx_table, Resource\nfrom fairspec_metadata import XlsxFileDialect\n\n# Skip specific comment rows\ntable = load_xlsx_table(Resource(\n data=\"with-comments.xlsx\",\n fileDialect=XlsxFileDialect(\n commentRows=[1, 2],\n headerRows=[3],\n ),\n))\n\n# Skip rows with comment prefix\ntable = load_xlsx_table(Resource(\n data=\"data.xlsx\",\n fileDialect=XlsxFileDialect(\n commentPrefix=\"#\",\n headerRows=[1],\n ),\n))\n```\n\n### Remote File Loading\n\n```python\nfrom fairspec import load_xlsx_table, Resource\n\n# Load from URL\ntable = load_xlsx_table(Resource(data=\"https://example.com/data.xlsx\"))\n\n# Load multiple remote files\ntable = load_xlsx_table(Resource(data=[\n \"https://api.example.com/data-2023.xlsx\",\n \"https://api.example.com/data-2024.xlsx\",\n]))\n```\n\n### Column Selection\n\n```python\nfrom fairspec import load_xlsx_table, Resource\nfrom fairspec_metadata import XlsxFileDialect\n\n# Select specific columns\ntable = load_xlsx_table(Resource(\n data=\"data.xlsx\",\n fileDialect=XlsxFileDialect(columnNames=[\"name\", \"age\", \"city\"]),\n))\n```","content/docs/python/xlsx.md","070db384a59abcb8",{"html":913,"metadata":914},"\u003Cp>Excel (.xlsx) file handling with sheet selection, advanced header processing, and high-performance data operations.\u003C/p>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"installation\">Installation\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#installation\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Installation”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Clink rel=\"stylesheet\" href=\"/_astro/ec.8ol6u.css\">\u003Cscript type=\"module\" src=\"/_astro/ec.0vx5m.js\">\u003C/script>\u003Cfigure class=\"frame is-terminal not-content\">\u003Cfigcaption class=\"header\">\u003Cspan class=\"title\">\u003C/span>\u003Cspan class=\"sr-only\">Terminal window\u003C/span>\u003C/figcaption>\u003Cpre data-language=\"bash\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">pip\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">install\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#3C63B3\">fairspec\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"pip install fairspec\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"getting-started\">Getting Started\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#getting-started\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Getting Started”\u003C/span>\u003C/a>\u003C/div>\n\u003Cp>The XLSX plugin provides:\u003C/p>\n\u003Cul>\n\u003Cli>\u003Ccode dir=\"auto\">load_xlsx_table\u003C/code> - Load Excel files into tables\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">save_xlsx_table\u003C/code> - Save tables to Excel files\u003C/li>\n\u003Cli>\u003Ccode dir=\"auto\">XlsxPlugin\u003C/code> - Plugin for framework integration\u003C/li>\n\u003C/ul>\n\u003Cp>For example:\u003C/p>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_xlsx_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">table.xlsx\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># the column types will be automatically inferred\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_xlsx_table, Resourcetable = load_xlsx_table(Resource(data="table.xlsx"))# the column types will be automatically inferred\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"basic-usage\">Basic Usage\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#basic-usage\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Basic Usage”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"loading-xlsx-files\">Loading XLSX Files\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#loading-xlsx-files\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Loading XLSX Files”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_xlsx_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> XlsxFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load a simple XLSX file\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.xlsx\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load with custom format (specify sheet)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.xlsx\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">XlsxFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">sheetName\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Sheet2\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load multiple XLSX files (concatenated)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">part1.xlsx\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">part2.xlsx\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">part3.xlsx\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_xlsx_table, Resourcefrom fairspec_metadata import XlsxFileDialect# Load a simple XLSX filetable = load_xlsx_table(Resource(data="data.xlsx"))# Load with custom format (specify sheet)table = load_xlsx_table(Resource( data="data.xlsx", fileDialect=XlsxFileDialect(sheetName="Sheet2"),))# Load multiple XLSX files (concatenated)table = load_xlsx_table(Resource(data=["part1.xlsx", "part2.xlsx", "part3.xlsx"]))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"saving-xlsx-files\">Saving XLSX Files\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#saving-xlsx-files\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Saving XLSX Files”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> save_xlsx_table\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> XlsxFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Save with default options\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">save_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">path\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">output.xlsx\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Save with custom sheet name\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">save_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">table\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">path\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">output.xlsx\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">XlsxFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">sheetName\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Data\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import save_xlsx_tablefrom fairspec_metadata import XlsxFileDialect# Save with default optionssave_xlsx_table(table, path="output.xlsx")# Save with custom sheet namesave_xlsx_table(table, path="output.xlsx", fileDialect=XlsxFileDialect(sheetName="Data"))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h2\">\u003Ch2 id=\"advanced-features\">Advanced Features\u003C/h2>\u003Ca class=\"sl-anchor-link\" href=\"#advanced-features\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Advanced Features”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"sheet-selection\">Sheet Selection\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#sheet-selection\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Sheet Selection”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_xlsx_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> XlsxFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Select by sheet number (1-indexed)\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">workbook.xlsx\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">XlsxFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">sheetNumber\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">2\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Select by sheet name\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">workbook.xlsx\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">XlsxFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">sheetName\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">Sales Data\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_xlsx_table, Resourcefrom fairspec_metadata import XlsxFileDialect# Select by sheet number (1-indexed)table = load_xlsx_table(Resource( data="workbook.xlsx", fileDialect=XlsxFileDialect(sheetNumber=2),))# Select by sheet nametable = load_xlsx_table(Resource( data="workbook.xlsx", fileDialect=XlsxFileDialect(sheetName="Sales Data"),))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"multi-header-row-processing\">Multi-Header Row Processing\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#multi-header-row-processing\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Multi-Header Row Processing”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_xlsx_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> XlsxFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># XLSX with multiple header rows\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">multi-header.xlsx\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">XlsxFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">headerRows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">2\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">headerJoin\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">_\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Resulting columns: [\"Year_Quarter\", \"2023_Q1\", \"2023_Q2\", \"2024_Q1\", \"2024_Q2\"]\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_xlsx_table, Resourcefrom fairspec_metadata import XlsxFileDialect# XLSX with multiple header rowstable = load_xlsx_table(Resource( data="multi-header.xlsx", fileDialect=XlsxFileDialect( headerRows=[1, 2], headerJoin="_", ),))# Resulting columns: ["Year_Quarter", "2023_Q1", "2023_Q2", "2024_Q1", "2024_Q2"]\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"comment-row-handling\">Comment Row Handling\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#comment-row-handling\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Comment Row Handling”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_xlsx_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> XlsxFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Skip specific comment rows\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">with-comments.xlsx\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">XlsxFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">commentRows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">2\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">headerRows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">3\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Skip rows with comment prefix\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.xlsx\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">XlsxFileDialect\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">commentPrefix\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">#\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">headerRows\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#F78C6C;--1:#AA0982\">1\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">],\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_xlsx_table, Resourcefrom fairspec_metadata import XlsxFileDialect# Skip specific comment rowstable = load_xlsx_table(Resource( data="with-comments.xlsx", fileDialect=XlsxFileDialect( commentRows=[1, 2], headerRows=[3], ),))# Skip rows with comment prefixtable = load_xlsx_table(Resource( data="data.xlsx", fileDialect=XlsxFileDialect( commentPrefix="#", headerRows=[1], ),))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"remote-file-loading\">Remote File Loading\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#remote-file-loading\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Remote File Loading”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_xlsx_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load from URL\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://example.com/data.xlsx\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Load multiple remote files\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">data\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://api.example.com/data-2023.xlsx\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">https://api.example.com/data-2024.xlsx\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_xlsx_table, Resource# Load from URLtable = load_xlsx_table(Resource(data="https://example.com/data.xlsx"))# Load multiple remote filestable = load_xlsx_table(Resource(data=[ "https://api.example.com/data-2023.xlsx", "https://api.example.com/data-2024.xlsx",]))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>\n\u003Cdiv class=\"sl-heading-wrapper level-h3\">\u003Ch3 id=\"column-selection\">Column Selection\u003C/h3>\u003Ca class=\"sl-anchor-link\" href=\"#column-selection\">\u003Cspan aria-hidden=\"true\" class=\"sl-anchor-icon\">\u003Csvg width=\"16\" height=\"16\" viewBox=\"0 0 24 24\">\u003Cpath fill=\"currentcolor\" d=\"m12.11 15.39-3.88 3.88a2.52 2.52 0 0 1-3.5 0 2.47 2.47 0 0 1 0-3.5l3.88-3.88a1 1 0 0 0-1.42-1.42l-3.88 3.89a4.48 4.48 0 0 0 6.33 6.33l3.89-3.88a1 1 0 1 0-1.42-1.42Zm8.58-12.08a4.49 4.49 0 0 0-6.33 0l-3.89 3.88a1 1 0 0 0 1.42 1.42l3.88-3.88a2.52 2.52 0 0 1 3.5 0 2.47 2.47 0 0 1 0 3.5l-3.88 3.88a1 1 0 1 0 1.42 1.42l3.88-3.89a4.49 4.49 0 0 0 0-6.33ZM8.83 15.17a1 1 0 0 0 1.1.22 1 1 0 0 0 .32-.22l4.92-4.92a1 1 0 0 0-1.42-1.42l-4.92 4.92a1 1 0 0 0 0 1.42Z\">\u003C/path>\u003C/svg>\u003C/span>\u003Cspan class=\"sr-only\">Section titled “Column Selection”\u003C/span>\u003C/a>\u003C/div>\n\u003Cdiv class=\"expressive-code\">\u003Cfigure class=\"frame not-content\">\u003Cfigcaption class=\"header\">\u003C/figcaption>\u003Cpre data-language=\"python\">\u003Ccode>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> load_xlsx_table, Resource\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">from\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> fairspec_metadata \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">import\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> XlsxFileDialect\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\n\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#809191;--1:#616671\"># Select specific columns\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">table \u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\"> \u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">load_xlsx_table\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">Resource\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">(\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">data\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">data.xlsx\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan class=\"indent\"> \u003C/span>\u003Cspan style=\"--0:#D7DBE0;--1:#403F53\">fileDialect\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#B2CCD6;--1:#097174\">XlsxFileDialect\u003C/span>\u003Cspan style=\"--1:#403F53\">\u003Cspan style=\"--0:#D6DEEB\">(\u003C/span>\u003Cspan style=\"--0:#D7DBE0\">columnNames\u003C/span>\u003C/span>\u003Cspan style=\"--0:#C792EA;--1:#8D46B4\">=\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">[\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">name\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">age\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#82AAFF;--1:#3C63B3\">, \u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#ECC48D;--1:#9B504E\">city\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">\"\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">]\u003C/span>\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">)\u003C/span>\u003Cspan style=\"--0:#D9F5DD;--1:#111111\">,\u003C/span>\u003C/div>\u003C/div>\u003Cdiv class=\"ec-line\">\u003Cdiv class=\"code\">\u003Cspan style=\"--0:#D6DEEB;--1:#403F53\">))\u003C/span>\u003C/div>\u003C/div>\u003C/code>\u003C/pre>\u003Cdiv class=\"copy\">\u003Cdiv aria-live=\"polite\">\u003C/div>\u003Cbutton title=\"Copy to clipboard\" data-copied=\"Copied!\" data-code=\"from fairspec import load_xlsx_table, Resourcefrom fairspec_metadata import XlsxFileDialect# Select specific columnstable = load_xlsx_table(Resource( data="data.xlsx", fileDialect=XlsxFileDialect(columnNames=["name", "age", "city"]),))\">\u003Cdiv>\u003C/div>\u003C/button>\u003C/div>\u003C/figure>\u003C/div>",{"headings":915,"localImagePaths":931,"remoteImagePaths":932,"frontmatter":933,"imagePaths":935},[916,917,918,919,922,925,926,927,928,929,930],{"depth":31,"slug":35,"text":36},{"depth":31,"slug":484,"text":20},{"depth":31,"slug":486,"text":487},{"depth":74,"slug":920,"text":921},"loading-xlsx-files","Loading XLSX Files",{"depth":74,"slug":923,"text":924},"saving-xlsx-files","Saving XLSX Files",{"depth":31,"slug":495,"text":496},{"depth":74,"slug":721,"text":722},{"depth":74,"slug":534,"text":535},{"depth":74,"slug":537,"text":538},{"depth":74,"slug":498,"text":499},{"depth":74,"slug":617,"text":618},[],[],{"title":903,"sidebar":934},{"label":907,"order":906},[],"meta:changelogs",["Map",938,939],"etag","\"74d1db57f0fec3481bb6b4d9bcdc020d656635ec6c53ee589d27a8831cbbe280\""] \ No newline at end of file diff --git a/website/.astro/integrations/_astrojs_starlight/i18n-plugins.d.ts b/website/.astro/integrations/_astrojs_starlight/i18n-plugins.d.ts new file mode 100644 index 0000000..e5ccc4a --- /dev/null +++ b/website/.astro/integrations/_astrojs_starlight/i18n-plugins.d.ts @@ -0,0 +1,14 @@ +declare namespace StarlightApp { + type PluginUIStringKeys = { + 'starlightChangelogs.compare.label': string; + 'starlightChangelogs.compare.title': string; + 'starlightChangelogs.pagination.next': string; + 'starlightChangelogs.pagination.prev': string; + 'starlightChangelogs.version.date': string; + 'starlightChangelogs.version.find': string; + 'starlightChangelogs.version.open': string; + 'starlightChangelogs.version.title': string; + 'starlightChangelogs.versions.all': string; + }; + interface I18n extends PluginUIStringKeys {} +} \ No newline at end of file diff --git a/website/.astro/settings.json b/website/.astro/settings.json new file mode 100644 index 0000000..271de06 --- /dev/null +++ b/website/.astro/settings.json @@ -0,0 +1,5 @@ +{ + "_variables": { + "lastUpdateCheck": 1770892022056 + } +} \ No newline at end of file diff --git a/website/.astro/starlight-changelogs.json b/website/.astro/starlight-changelogs.json new file mode 100644 index 0000000..933c05d --- /dev/null +++ b/website/.astro/starlight-changelogs.json @@ -0,0 +1 @@ +[{"base":"changelog","pagefind":true,"pageSize":10,"title":"Changelog"}] \ No newline at end of file diff --git a/website/.astro/types.d.ts b/website/.astro/types.d.ts new file mode 100644 index 0000000..63784aa --- /dev/null +++ b/website/.astro/types.d.ts @@ -0,0 +1,3 @@ +/// +/// +/// \ No newline at end of file diff --git a/website/assets/fairspec-logo.svg b/website/assets/fairspec-logo.svg new file mode 100644 index 0000000..a207b8e --- /dev/null +++ b/website/assets/fairspec-logo.svg @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + diff --git a/website/astro.config.ts b/website/astro.config.ts new file mode 100644 index 0000000..87e4912 --- /dev/null +++ b/website/astro.config.ts @@ -0,0 +1,84 @@ +import starlight from "@astrojs/starlight"; +import { defineConfig } from "astro/config"; +import starlightChangelogs, { + makeChangelogsSidebarLinks, +} from "starlight-changelogs"; +import starlightGitHubAlerts from "starlight-github-alerts"; +import starlightScrollToTop from "starlight-scroll-to-top"; +import packageJson from "./package.json" with { type: "json" }; + +const { origin, hostname, pathname } = new URL(packageJson.homepage); +const basedir = import.meta.env.PROD ? pathname : "/"; + +export default defineConfig({ + site: origin, + base: basedir, + srcDir: ".", + outDir: "build", + integrations: [ + starlight({ + title: packageJson.title, + description: packageJson.description, + customCss: ["/styles/general.css"], + components: { + SocialIcons: "./components/builtin/SocialIcons.astro", + }, + logo: { + src: "/assets/fairspec-logo.svg", + alt: "Fairspec Logo", + }, + social: [ + { + icon: "github", + label: "GitHub", + href: "https://github.com/fairspec", + }, + ], + favicon: "fairspec-logo.png", + editLink: { + baseUrl: `${packageJson.repository}/edit/main`, + }, + lastUpdated: true, + tableOfContents: { minHeadingLevel: 2, maxHeadingLevel: 4 }, + expressiveCode: { + themes: ["starlight-dark", "starlight-light"], + }, + plugins: [ + starlightGitHubAlerts(), + starlightScrollToTop(), + starlightChangelogs(), + ], + sidebar: [ + { + label: "Overview", + items: [ + { label: "Getting Started", slug: "index" }, + { label: "Contributing", slug: "overview/contributing" }, + ], + }, + { label: "Terminal", autogenerate: { directory: "terminal" } }, + { label: "Python", autogenerate: { directory: "python" } }, + { + label: "Changelog", + items: makeChangelogsSidebarLinks([ + { + type: "recent", + base: "changelog", + count: 1, + }, + ]), + }, + ], + head: [ + { + tag: "script", + attrs: { + src: "https://plausible.io/js/script.js", + "data-domain": hostname.split(".").slice(-2).join("."), + defer: true, + }, + }, + ], + }), + ], +}); diff --git a/website/components/builtin/SocialIcons.astro b/website/components/builtin/SocialIcons.astro new file mode 100644 index 0000000..f0ac301 --- /dev/null +++ b/website/components/builtin/SocialIcons.astro @@ -0,0 +1,39 @@ +--- +import Default from "@astrojs/starlight/components/SocialIcons.astro" +--- + + + Home + + + + Application + + + + Standard + + + + TypeScript + + + + Python + + + + + + diff --git a/website/content.config.ts b/website/content.config.ts new file mode 100644 index 0000000..ab73f5d --- /dev/null +++ b/website/content.config.ts @@ -0,0 +1,23 @@ +import { defineCollection } from "astro:content" +import { docsLoader } from "@astrojs/starlight/loaders" +import { docsSchema } from "@astrojs/starlight/schema" +import { changelogsLoader } from "starlight-changelogs/loader" +import packageJson from "./package.json" with { type: "json" } + +const [owner, repo] = new URL(packageJson.repository).pathname + .split("/") + .slice(1) + +export const collections = { + docs: defineCollection({ loader: docsLoader(), schema: docsSchema() }), + changelogs: defineCollection({ + loader: changelogsLoader([ + { + base: "changelog", + provider: "github", + owner, + repo, + }, + ]), + }), +} diff --git a/website/content/docs/index.md b/website/content/docs/index.md new file mode 100644 index 0000000..f2e7939 --- /dev/null +++ b/website/content/docs/index.md @@ -0,0 +1,112 @@ +--- +title: Fairspec Python +sidebar: + order: 1 + label: Getting Started +--- + +This guide will help you get started with Fairspec Python. If you are new to the core framework's technologies, please take a look at the [Fairspec standard](https://fairspec.org/) and [Polars DataFrames](https://pola.rs/) documentation. + +## Runtimes + +> [!TIP] +> - It is possible to use Fairspec Python in [Jupyter Notebooks](/python/jupyter)! + +Fairspec Python requires: + +- **Python 3.12+** + +## Installation + +The framework can be installed as one package: + +```bash +pip install fairspec +``` + +You can cherry-pick from individual packages: + +```bash +pip install fairspec-metadata fairspec-table +``` + +## Type Hints + +Fairspec Python is built with type safety in mind. It uses Python type hints to provide type definitions for all packages and to enforce type safety throughout the framework. It's highly recommended to use a type-aware editor such as VS Code with Pylance or PyCharm to work with the project. + +## Examples + +Loading a Dataset from Zenodo merging system Zenodo metadata into a user dataset and validating its metadata: + +```python +from fairspec import load_dataset + +dataset = load_dataset("https://zenodo.org/records/10053903") + +print(dataset) +# { +# "id": "https://doi.org/10.5281/zenodo.10053903", +# ... +# } +``` + +Validating an in-memory dataset descriptor: + +```python +from fairspec import validate_dataset + +report = validate_dataset({"resources": "bad"}) + +print(report.valid) +# False +print(report.errors) +# [ +# { +# "type": "metadata", +# "message": "must have type array", +# "jsonPointer": "/resources", +# } +# ] +``` + +Loading a dataset from a remote descriptor and saving it locally as a zip archive, and then using it as a local dataset: + +```python +from fairspec import ( + load_dataset, + load_dataset_from_zip, + save_dataset_to_zip, + get_temp_file_path, +) + +archive_path = get_temp_file_path() +source_dataset = load_dataset( + "https://raw.githubusercontent.com/roll/currency-codes/refs/heads/master/datapackage.json", +) + +save_dataset_to_zip(source_dataset, archive_path=archive_path) +target_dataset = load_dataset_from_zip(archive_path) +print(target_dataset) +``` + +Reading a CSV table: + +```python +from fairspec import load_table, Resource +from fairspec_metadata import CsvFileDialect + +table = load_table(Resource(data="data.csv")) + +# Load with custom format +table = load_table(Resource( + data="data.csv", + fileDialect=CsvFileDialect( + delimiter=";", + headerRows=[1], + ), +)) +``` + +## Reference + +Note that `fairspec` and `fairspec-library` packages re-export most of the functionality. diff --git a/website/content/docs/overview/contributing.md b/website/content/docs/overview/contributing.md new file mode 120000 index 0000000..069558f --- /dev/null +++ b/website/content/docs/overview/contributing.md @@ -0,0 +1 @@ +../../../../CONTRIBUTING.md \ No newline at end of file diff --git a/website/content/docs/python/arrow.md b/website/content/docs/python/arrow.md new file mode 100644 index 0000000..bb83d51 --- /dev/null +++ b/website/content/docs/python/arrow.md @@ -0,0 +1,74 @@ +--- +title: Working with Arrow in Python +sidebar: + label: Arrow + order: 7 +--- + +Apache Arrow IPC file handling with high-performance columnar data processing. + +## Installation + +```bash +pip install fairspec +``` + +## Getting Started + +The Arrow plugin provides: + +- `load_arrow_table` - Load Arrow IPC files into tables +- `save_arrow_table` - Save tables to Arrow IPC files +- `ArrowPlugin` - Plugin for framework integration + +For example: + +```python +from fairspec import load_arrow_table, Resource + +table = load_arrow_table(Resource(data="table.arrow")) +# High-performance columnar format +``` + +## Basic Usage + +### Loading Arrow Files + +```python +from fairspec import load_arrow_table, Resource + +# Load from local file +table = load_arrow_table(Resource(data="data.arrow")) + +# Load from remote URL +table = load_arrow_table(Resource(data="https://example.com/data.arrow")) + +# Load multiple files (concatenated) +table = load_arrow_table(Resource(data=["file1.arrow", "file2.arrow"])) +``` + +### Saving Arrow Files + +```python +from fairspec import save_arrow_table + +# Save with default options +save_arrow_table(table, path="output.arrow") +``` + +## Advanced Features + +### Remote File Loading + +```python +from fairspec import load_arrow_table, Resource + +# Load from URL +table = load_arrow_table(Resource(data="https://example.com/data.arrow")) + +# Load multiple remote files +table = load_arrow_table(Resource(data=[ + "https://api.example.com/data-2023.arrow", + "https://api.example.com/data-2024.arrow", +])) +``` diff --git a/website/content/docs/python/assets/jupyter.png b/website/content/docs/python/assets/jupyter.png new file mode 100644 index 0000000..5fd6bed Binary files /dev/null and b/website/content/docs/python/assets/jupyter.png differ diff --git a/website/content/docs/python/csv.md b/website/content/docs/python/csv.md new file mode 100644 index 0000000..5f52f71 --- /dev/null +++ b/website/content/docs/python/csv.md @@ -0,0 +1,163 @@ +--- +title: Working with CSV in Python +sidebar: + label: CSV + order: 1 +--- +Comprehensive CSV file handling with automatic format detection, advanced header processing, and high-performance data operations. + +## Installation + +```bash +pip install fairspec +``` + +## Getting Started + +The CSV plugin provides these capabilities: + +- `load_csv_table` - Load CSV/TSV files into tables +- `save_csv_table` - Save tables to CSV/TSV files +- `CsvPlugin` - Plugin for framework integration + +For example: + +```python +from fairspec import load_csv_table, Resource + +table = load_csv_table(Resource(data="table.csv")) +# the column types will be automatically inferred +# or you can provide a Table Schema +``` + +## Basic Usage + +### Loading CSV Files + +```python +from fairspec import load_csv_table, Resource +from fairspec_metadata import CsvFileDialect + +# Load a simple CSV file +table = load_csv_table(Resource(data="data.csv")) + +# Load with custom format +table = load_csv_table(Resource( + data="data.csv", + fileDialect=CsvFileDialect( + delimiter=";", + headerRows=[1], + ), +)) + +# Load multiple CSV files (concatenated) +table = load_csv_table(Resource(data=["part1.csv", "part2.csv", "part3.csv"])) +``` + +### Saving CSV Files + +```python +from fairspec import save_csv_table +from fairspec_metadata import CsvFileDialect + +# Save with default options +save_csv_table(table, path="output.csv") + +# Save with custom format +save_csv_table(table, path="output.csv", fileDialect=CsvFileDialect( + delimiter="\t", + quoteChar="'", +)) + +# Save as TSV +save_csv_table(table, path="output.tsv", fileDialect=CsvFileDialect(delimiter="\t")) +``` + +### Automatic Format Detection + +```python +from fairspec import load_csv_table, Resource +from fairspec_metadata import CsvFileDialect + +# Format is automatically detected when not specified +table = load_csv_table(Resource(data="unknown-dialect.csv")) +# The CSV plugin will automatically infer delimiter, quote characters, etc. + +# You can also explicitly specify the format if detection isn't accurate +table = load_csv_table(Resource( + data="data.csv", + fileDialect=CsvFileDialect( + delimiter=",", + quoteChar='"', + headerRows=[1], + ), +)) +``` + +## Advanced Features + +### Multi-Header Row Processing + +```python +# CSV with multiple header rows: +# Year,2023,2023,2024,2024 +# Quarter,Q1,Q2,Q1,Q2 +# Revenue,100,120,110,130 + +from fairspec import load_csv_table, Resource +from fairspec_metadata import CsvFileDialect + +table = load_csv_table(Resource( + data="multi-header.csv", + fileDialect=CsvFileDialect( + headerRows=[1, 2], + headerJoin="_", + ), +)) +# Resulting columns: ["Year_Quarter", "2023_Q1", "2023_Q2", "2024_Q1", "2024_Q2"] +``` + +### Comment Row Handling + +```python +# CSV with comment rows: +# # This is a comment +# # Generated on 2024-01-01 +# Name,Age,City +# John,25,NYC + +from fairspec import load_csv_table, Resource +from fairspec_metadata import CsvFileDialect + +table = load_csv_table(Resource( + data="with-comments.csv", + fileDialect=CsvFileDialect( + commentRows=[1, 2], + headerRows=[3], + ), +)) + +# Or use commentPrefix to skip lines starting with a specific character +table = load_csv_table(Resource( + data="with-comments.csv", + fileDialect=CsvFileDialect( + commentPrefix="#", + headerRows=[1], + ), +)) +``` + +### Remote File Loading + +```python +from fairspec import load_csv_table, Resource + +# Load from URL +table = load_csv_table(Resource(data="https://example.com/data.csv")) + +# Load multiple remote files +table = load_csv_table(Resource(data=[ + "https://api.example.com/data-2023.csv", + "https://api.example.com/data-2024.csv", +])) +``` diff --git a/website/content/docs/python/inline.md b/website/content/docs/python/inline.md new file mode 100644 index 0000000..08b54c5 --- /dev/null +++ b/website/content/docs/python/inline.md @@ -0,0 +1,130 @@ +--- +title: Working with Inline Data tables in Python +sidebar: + label: Inline Data + order: 10 +--- + +Inline data handling for tables embedded directly in resource definitions. + +## Installation + +```bash +pip install fairspec +``` + +## Getting Started + +The Inline plugin provides: + +- `load_inline_table` - Load tables from inline data +- `InlinePlugin` - Plugin for framework integration + +For example: + +```python +from fairspec import load_inline_table, Resource + +table = load_inline_table(Resource(data=[ + {"id": 1, "name": "Alice"}, + {"id": 2, "name": "Bob"}, +])) +``` + +## Basic Usage + +### Object Format Data + +The most common format is an array of objects: + +```python +from fairspec import load_inline_table, Resource + +table = load_inline_table(Resource(data=[ + {"id": 1, "name": "english", "native": "English"}, + {"id": 2, "name": "chinese", "native": "\u4e2d\u6587"}, + {"id": 3, "name": "spanish", "native": "Espa\u00f1ol"}, +])) +``` + +### Array Format Data + +You can also use array-of-arrays format with the first row as headers: + +```python +from fairspec import load_inline_table, Resource + +table = load_inline_table(Resource(data=[ + ["id", "name", "native"], + [1, "english", "English"], + [2, "chinese", "\u4e2d\u6587"], + [3, "spanish", "Espa\u00f1ol"], +])) +``` + +## Advanced Features + +### With Table Schema + +Provide a Table Schema for type validation and conversion: + +```python +from fairspec import load_inline_table, Resource +from fairspec_metadata import TableSchema, IntegerColumnProperty, StringColumnProperty, BooleanColumnProperty + +table = load_inline_table(Resource( + data=[ + {"id": 1, "name": "english", "active": True}, + {"id": 2, "name": "chinese", "active": False}, + ], + tableSchema=TableSchema(properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + "active": BooleanColumnProperty(), + }), +)) +``` + +### Mixed with File Data + +Inline data can be used alongside file-based resources in datasets: + +```python +from fairspec import load_inline_table, load_csv_table, Resource + +# Load inline reference data +languages = load_inline_table(Resource( + name="languages", + data=[ + {"id": 1, "name": "english"}, + {"id": 2, "name": "chinese"}, + ], +)) + +# Load main data from file +users = load_csv_table(Resource(name="users", data="users.csv")) +``` + +### Resource Metadata + +You can include metadata with inline data resources: + +```python +from fairspec import load_inline_table, Resource +from fairspec_metadata import TableSchema, StringColumnProperty + +table = load_inline_table(Resource( + name="countries", + title="Country Reference Data", + description="ISO country codes and names", + data=[ + {"code": "US", "name": "United States"}, + {"code": "CN", "name": "China"}, + {"code": "ES", "name": "Spain"}, + ], + tableSchema=TableSchema(properties={ + "code": StringColumnProperty(), + "name": StringColumnProperty(), + }), +)) +``` diff --git a/website/content/docs/python/json.md b/website/content/docs/python/json.md new file mode 100644 index 0000000..472c511 --- /dev/null +++ b/website/content/docs/python/json.md @@ -0,0 +1,133 @@ +--- +title: Working with JSON tables in Python +sidebar: + label: JSON + order: 3 +--- + +JSON file handling with automatic format detection and high-performance data operations. + +## Installation + +```bash +pip install fairspec +``` + +## Getting Started + +The JSON plugin provides: + +- `load_json_table` - Load JSON files into tables +- `save_json_table` - Save tables to JSON files +- `JsonPlugin` - Plugin for framework integration + +For example: + +```python +from fairspec import load_json_table, Resource + +table = load_json_table(Resource(data="table.json")) +# Standard JSON array of objects format +``` + +## Basic Usage + +### Loading JSON Files + +```python +from fairspec import load_json_table, Resource + +# Load from local file +table = load_json_table(Resource(data="data.json")) + +# Load from remote URL +table = load_json_table(Resource(data="https://example.com/data.json")) + +# Load multiple files (concatenated) +table = load_json_table(Resource(data=["file1.json", "file2.json"])) +``` + +### Saving JSON Files + +```python +from fairspec import save_json_table +from fairspec_metadata import JsonFileDialect + +# Save with default options +save_json_table(table, path="output.json") + +# Save with explicit format +save_json_table(table, path="output.json", fileDialect=JsonFileDialect()) +``` + +## Standard Format + +JSON tables use an array of objects format: + +```json +[ + {"id": 1, "name": "Alice", "age": 30}, + {"id": 2, "name": "Bob", "age": 25} +] +``` + +## Advanced Features + +### JSON Pointer Extraction + +Extract data from nested objects using `jsonPointer`: + +```python +from fairspec import load_json_table, Resource +from fairspec_metadata import JsonFileDialect + +# Input: {"users": [{"id": 1, "name": "Alice"}]} +table = load_json_table(Resource( + data="data.json", + fileDialect=JsonFileDialect(jsonPointer="users"), +)) +``` + +### Column Selection + +Select specific columns using `columnNames`: + +```python +from fairspec import load_json_table, Resource +from fairspec_metadata import JsonFileDialect + +# Only load specific columns +table = load_json_table(Resource( + data="data.json", + fileDialect=JsonFileDialect(columnNames=["name", "age"]), +)) +``` + +### Array Format Handling + +Handle CSV-style array data with `rowType: "array"`: + +```python +from fairspec import load_json_table, Resource +from fairspec_metadata import JsonFileDialect + +# Input: [["id", "name"], [1, "Alice"], [2, "Bob"]] +table = load_json_table(Resource( + data="data.json", + fileDialect=JsonFileDialect(rowType="array"), +)) +``` + +### Saving with JSON Pointer + +Wrap data in a nested structure when saving: + +```python +from fairspec import save_json_table +from fairspec_metadata import JsonFileDialect + +# Output: {"users": [{"id": 1, "name": "Alice"}]} +save_json_table(table, path="output.json", fileDialect=JsonFileDialect( + jsonPointer="users", +)) +``` diff --git a/website/content/docs/python/jsonl.md b/website/content/docs/python/jsonl.md new file mode 100644 index 0000000..831b450 --- /dev/null +++ b/website/content/docs/python/jsonl.md @@ -0,0 +1,123 @@ +--- +title: Working with JSONL tables in Python +sidebar: + label: JSONL + order: 4 +--- + +JSONL (JSON Lines) file handling with automatic format detection and high-performance data operations. + +## Installation + +```bash +pip install fairspec +``` + +## Getting Started + +The JSONL format is handled by the JSON plugin, which provides: + +- `load_json_table` - Load JSONL files into tables +- `save_json_table` - Save tables to JSONL files +- `JsonPlugin` - Plugin for framework integration + +For example: + +```python +from fairspec import load_json_table, Resource + +table = load_json_table(Resource(data="table.jsonl")) +# Newline-delimited JSON objects +``` + +## Basic Usage + +### Loading JSONL Files + +```python +from fairspec import load_json_table, Resource +from fairspec_metadata import JsonFileDialect + +# Load from local file +table = load_json_table(Resource(data="data.jsonl")) + +# Load with explicit format +table = load_json_table(Resource( + data="data.jsonl", + fileDialect=JsonFileDialect(format="jsonl"), +)) + +# Load multiple files (concatenated) +table = load_json_table(Resource(data=["part1.jsonl", "part2.jsonl"])) +``` + +### Saving JSONL Files + +```python +from fairspec import save_json_table +from fairspec_metadata import JsonFileDialect + +# Save as JSONL +save_json_table(table, path="output.jsonl", fileDialect=JsonFileDialect(format="jsonl")) +``` + +## Standard Format + +JSONL uses newline-delimited JSON objects: + +```jsonl +{"id": 1, "name": "Alice", "age": 30} +{"id": 2, "name": "Bob", "age": 25} +{"id": 3, "name": "Charlie", "age": 35} +``` + +## Advanced Features + +### Column Selection + +Select specific columns using `columnNames`: + +```python +from fairspec import load_json_table, Resource +from fairspec_metadata import JsonFileDialect + +# Only load specific columns +table = load_json_table(Resource( + data="data.jsonl", + fileDialect=JsonFileDialect(format="jsonl", columnNames=["name", "age"]), +)) +``` + +### Array Format Handling + +Handle CSV-style array data with `rowType: "array"`: + +```python +from fairspec import load_json_table, Resource +from fairspec_metadata import JsonFileDialect + +# Input JSONL with arrays: +# ["id", "name"] +# [1, "Alice"] +# [2, "Bob"] + +table = load_json_table(Resource( + data="data.jsonl", + fileDialect=JsonFileDialect(format="jsonl", rowType="array"), +)) +``` + +### Remote File Loading + +```python +from fairspec import load_json_table, Resource + +# Load from URL +table = load_json_table(Resource(data="https://example.com/data.jsonl")) + +# Load multiple remote files +table = load_json_table(Resource(data=[ + "https://api.example.com/logs-2023.jsonl", + "https://api.example.com/logs-2024.jsonl", +])) +``` diff --git a/website/content/docs/python/jupyter.md b/website/content/docs/python/jupyter.md new file mode 100644 index 0000000..32ba6da --- /dev/null +++ b/website/content/docs/python/jupyter.md @@ -0,0 +1,27 @@ +--- +title: Using Fairspec Python in Jupyter Notebooks +sidebar: + label: Jupyter Notebooks + order: 12 +--- + +For data scientists and data engineers, [Jupyter Notebooks](https://docs.jupyter.org/en/latest/) provide a powerful and flexible environment for exploring, visualizing, and analyzing data. + +## Installation + +1. **Install Jupyter:** `pip install jupyterlab` - Installs Jupyter Notebook, a web-based interactive computing environment for data science and data engineering. You can use another UI such as Jupyter CLI or Jupyter Desktop. + +1. **Install Fairspec:** `pip install fairspec` - Installs the Fairspec Python framework and all its dependencies. + +## Usage + +1. **Run Jupyter Notebooks:** `jupyter-lab` - Launches the Jupyter Notebook server in the current working directory, which allows you to create and run Jupyter notebooks. + +1. **Select Python Kernel in Notebook:** Choose the Python kernel from your notebook's kernel selection menu. VS Code users may need to install the default Jupyter kernel extensions. + +![Fairspec Python in Jupyter Notebooks](./assets/jupyter.png) + +## References + +- [Jupyter Documentation](https://docs.jupyter.org/en/latest/) +- [JupyterLab Documentation](https://jupyterlab.readthedocs.io/en/latest/) diff --git a/website/content/docs/python/ods.md b/website/content/docs/python/ods.md new file mode 100644 index 0000000..0775606 --- /dev/null +++ b/website/content/docs/python/ods.md @@ -0,0 +1,162 @@ +--- +title: Working with ODS in Python +sidebar: + label: ODS + order: 6 +--- + +OpenDocument Spreadsheet (ODS) file handling with sheet selection, advanced header processing, and high-performance data operations. + +## Installation + +```bash +pip install fairspec +``` + +## Getting Started + +ODS format is handled by the XLSX plugin, which provides: + +- `load_xlsx_table` - Load ODS files into tables +- `save_xlsx_table` - Save tables to ODS files +- `XlsxPlugin` - Plugin for framework integration + +For example: + +```python +from fairspec import load_xlsx_table, Resource + +table = load_xlsx_table(Resource(data="table.ods")) +# the column types will be automatically inferred +``` + +## Basic Usage + +### Loading ODS Files + +```python +from fairspec import load_xlsx_table, Resource +from fairspec_metadata import XlsxFileDialect + +# Load a simple ODS file +table = load_xlsx_table(Resource(data="data.ods")) + +# Load with custom format (specify sheet) +table = load_xlsx_table(Resource( + data="data.ods", + fileDialect=XlsxFileDialect(format="ods", sheetName="Sheet2"), +)) + +# Load multiple ODS files (concatenated) +table = load_xlsx_table(Resource(data=["part1.ods", "part2.ods", "part3.ods"])) +``` + +### Saving ODS Files + +```python +from fairspec import save_xlsx_table +from fairspec_metadata import XlsxFileDialect + +# Save with default options +save_xlsx_table(table, path="output.ods", fileDialect=XlsxFileDialect(format="ods")) + +# Save with custom sheet name +save_xlsx_table(table, path="output.ods", fileDialect=XlsxFileDialect( + format="ods", + sheetName="Data", +)) +``` + +## Advanced Features + +### Sheet Selection + +```python +from fairspec import load_xlsx_table, Resource +from fairspec_metadata import XlsxFileDialect + +# Select by sheet number (1-indexed) +table = load_xlsx_table(Resource( + data="workbook.ods", + fileDialect=XlsxFileDialect(format="ods", sheetNumber=2), +)) + +# Select by sheet name +table = load_xlsx_table(Resource( + data="workbook.ods", + fileDialect=XlsxFileDialect(format="ods", sheetName="Sales Data"), +)) +``` + +### Multi-Header Row Processing + +```python +from fairspec import load_xlsx_table, Resource +from fairspec_metadata import XlsxFileDialect + +# ODS with multiple header rows +table = load_xlsx_table(Resource( + data="multi-header.ods", + fileDialect=XlsxFileDialect( + format="ods", + headerRows=[1, 2], + headerJoin="_", + ), +)) +# Resulting columns: ["Year_Quarter", "2023_Q1", "2023_Q2", "2024_Q1", "2024_Q2"] +``` + +### Comment Row Handling + +```python +from fairspec import load_xlsx_table, Resource +from fairspec_metadata import XlsxFileDialect + +# Skip specific comment rows +table = load_xlsx_table(Resource( + data="with-comments.ods", + fileDialect=XlsxFileDialect( + format="ods", + commentRows=[1, 2], + headerRows=[3], + ), +)) + +# Skip rows with comment prefix +table = load_xlsx_table(Resource( + data="data.ods", + fileDialect=XlsxFileDialect( + format="ods", + commentPrefix="#", + headerRows=[1], + ), +)) +``` + +### Remote File Loading + +```python +from fairspec import load_xlsx_table, Resource + +# Load from URL +table = load_xlsx_table(Resource(data="https://example.com/data.ods")) + +# Load multiple remote files +table = load_xlsx_table(Resource(data=[ + "https://api.example.com/data-2023.ods", + "https://api.example.com/data-2024.ods", +])) +``` + +### Column Selection + +```python +from fairspec import load_xlsx_table, Resource +from fairspec_metadata import XlsxFileDialect + +# Select specific columns +table = load_xlsx_table(Resource( + data="data.ods", + fileDialect=XlsxFileDialect(format="ods", columnNames=["name", "age", "city"]), +)) +``` diff --git a/website/content/docs/python/parquet.md b/website/content/docs/python/parquet.md new file mode 100644 index 0000000..2be9671 --- /dev/null +++ b/website/content/docs/python/parquet.md @@ -0,0 +1,74 @@ +--- +title: Working with Parquet in Python +sidebar: + label: Parquet + order: 8 +--- + +Apache Parquet file handling with high-performance columnar data processing and compression. + +## Installation + +```bash +pip install fairspec +``` + +## Getting Started + +The Parquet plugin provides: + +- `load_parquet_table` - Load Parquet files into tables +- `save_parquet_table` - Save tables to Parquet files +- `ParquetPlugin` - Plugin for framework integration + +For example: + +```python +from fairspec import load_parquet_table, Resource + +table = load_parquet_table(Resource(data="table.parquet")) +# Efficient columnar format with compression +``` + +## Basic Usage + +### Loading Parquet Files + +```python +from fairspec import load_parquet_table, Resource + +# Load from local file +table = load_parquet_table(Resource(data="data.parquet")) + +# Load from remote URL +table = load_parquet_table(Resource(data="https://example.com/data.parquet")) + +# Load multiple files (concatenated) +table = load_parquet_table(Resource(data=["file1.parquet", "file2.parquet"])) +``` + +### Saving Parquet Files + +```python +from fairspec import save_parquet_table + +# Save with default options +save_parquet_table(table, path="output.parquet") +``` + +## Advanced Features + +### Remote File Loading + +```python +from fairspec import load_parquet_table, Resource + +# Load from URL +table = load_parquet_table(Resource(data="https://example.com/data.parquet")) + +# Load multiple remote files +table = load_parquet_table(Resource(data=[ + "https://api.example.com/data-2023.parquet", + "https://api.example.com/data-2024.parquet", +])) +``` diff --git a/website/content/docs/python/sqlite.md b/website/content/docs/python/sqlite.md new file mode 100644 index 0000000..1677f41 --- /dev/null +++ b/website/content/docs/python/sqlite.md @@ -0,0 +1,122 @@ +--- +title: Working with SQLite in Python +sidebar: + label: SQLite + order: 9 +--- + +SQLite database file handling with table loading and saving capabilities. + +## Installation + +```bash +pip install fairspec +``` + +## Getting Started + +The SQLite plugin provides: + +- `load_sqlite_table` - Load tables from SQLite databases +- `save_sqlite_table` - Save tables to SQLite databases +- `SqlitePlugin` - Plugin for framework integration + +For example: + +```python +from fairspec import load_sqlite_table, Resource +from fairspec_metadata import SqliteFileDialect + +table = load_sqlite_table(Resource( + data="database.db", + fileDialect=SqliteFileDialect(tableName="users"), +)) +# column types will be automatically inferred from database schema +``` + +## Basic Usage + +### Loading SQLite Tables + +```python +from fairspec import load_sqlite_table, Resource +from fairspec_metadata import SqliteFileDialect + +# Load a table from SQLite database +table = load_sqlite_table(Resource( + data="data.db", + fileDialect=SqliteFileDialect(tableName="products"), +)) + +# Load from a specific path +table = load_sqlite_table(Resource( + data="/path/to/database.db", + fileDialect=SqliteFileDialect(tableName="orders"), +)) +``` + +### Saving SQLite Tables + +```python +from fairspec import save_sqlite_table +from fairspec_metadata import SqliteFileDialect + +# Save table to SQLite database +save_sqlite_table(table, path="output.db", fileDialect=SqliteFileDialect( + tableName="results", +)) + +# Overwrite existing table +save_sqlite_table(table, path="output.db", fileDialect=SqliteFileDialect( + tableName="results", +), overwrite=True) +``` + +## Advanced Features + +### Schema Inference + +Table schemas are automatically inferred from SQLite table definitions: + +```python +from fairspec import load_sqlite_table, Resource +from fairspec_metadata import SqliteFileDialect + +# Field types are automatically detected from database schema +table = load_sqlite_table(Resource( + data="shop.db", + fileDialect=SqliteFileDialect(tableName="products"), +)) +# Types like INTEGER, TEXT, REAL are mapped to appropriate Table Schema types +``` + +### Creating New Tables + +When saving, the plugin automatically creates the table structure: + +```python +from fairspec import save_sqlite_table +from fairspec_metadata import SqliteFileDialect + +# Creates a new database file with the specified table +save_sqlite_table(table, path="new-database.db", fileDialect=SqliteFileDialect( + tableName="my_data", +)) +``` + +### Working with Table Schema + +You can provide a custom Table Schema when saving: + +```python +from fairspec import save_sqlite_table +from fairspec_metadata import SqliteFileDialect, TableSchema, IntegerColumnProperty, StringColumnProperty + +save_sqlite_table(table, path="output.db", fileDialect=SqliteFileDialect( + tableName="customers", +), tableSchema=TableSchema(properties={ + "id": IntegerColumnProperty(), + "name": StringColumnProperty(), + "email": StringColumnProperty(), +})) +``` diff --git a/website/content/docs/python/table.md b/website/content/docs/python/table.md new file mode 100644 index 0000000..8474bfe --- /dev/null +++ b/website/content/docs/python/table.md @@ -0,0 +1,206 @@ +--- +title: Working with Tabular Data in Python +sidebar: + label: Tabular Data + order: 11 +--- + +High-performance data processing and schema validation for tabular data built on **Polars** (a Rust-based DataFrame library). + +## Installation + +```bash +pip install fairspec +``` + +## Getting Started + +The table package provides core utilities for working with tabular data: + +- `normalize_table` - Convert table data to match a schema +- `denormalize_table` - Convert normalized data back to raw format +- `infer_table_schema_from_table` - Automatically infer schema from table data +- `inspect_table` - Get table structure information +- `query_table` - Query tables using SQL-like syntax + +For example: + +```python +from fairspec import load_csv_table, infer_table_schema_from_table, Resource + +table = load_csv_table(Resource(data="data.csv")) +schema = infer_table_schema_from_table(table) +``` + +## Basic Usage + +### Schema Inference + +Automatically infer Table Schema from data: + +```python +import polars as pl +from fairspec import infer_table_schema_from_table + +table = pl.DataFrame({ + "id": ["1", "2", "3"], + "price": ["10.50", "25.00", "15.75"], + "date": ["2023-01-15", "2023-02-20", "2023-03-25"], + "active": ["true", "false", "true"], +}).lazy() + +schema = infer_table_schema_from_table(table, sample_rows=100, confidence=0.9) + +# Result: automatically detected integer, number, date, and boolean types +``` + +### Table Normalization + +Convert table data to match a Table Schema (type conversion): + +```python +import polars as pl +from fairspec import normalize_table +from fairspec_metadata import TableSchema, IntegerColumnProperty, NumberColumnProperty, BooleanColumnProperty, DateColumnProperty + +table = pl.DataFrame({ + "id": ["1", "2", "3"], + "price": ["10.50", "25.00", "15.75"], + "active": ["true", "false", "true"], + "date": ["2023-01-15", "2023-02-20", "2023-03-25"], +}).lazy() + +schema = TableSchema(properties={ + "id": IntegerColumnProperty(), + "price": NumberColumnProperty(), + "active": BooleanColumnProperty(), + "date": DateColumnProperty(), +}) + +normalized = normalize_table(table, schema) +result = normalized.collect() + +# Result has properly typed columns: +# { id: 1, price: 10.50, active: True, date: Date("2023-01-15") } +``` + +### Table Denormalization + +Convert normalized data back to raw format (for saving): + +```python +from fairspec import denormalize_table + +denormalized = denormalize_table(table, schema, native_types=["string", "number", "boolean"]) +``` + +## Advanced Features + +### Working with Table Schema + +Define schemas with column properties and constraints: + +```python +from fairspec_metadata import TableSchema, IntegerColumnProperty, StringColumnProperty + +schema = TableSchema( + properties={ + "id": IntegerColumnProperty(minimum=1), + "name": StringColumnProperty(minLength=1, maxLength=100), + "email": StringColumnProperty(pattern=r"^[^@]+@[^@]+\.[^@]+$"), + "age": IntegerColumnProperty(minimum=0, maximum=150), + "status": StringColumnProperty(enum=["active", "inactive", "pending"]), + }, + required=["id", "name", "email"], + primaryKey=["id"], +) +``` + +### Schema Inference Options + +Customize how schemas are inferred: + +```python +from fairspec import infer_table_schema_from_table + +schema = infer_table_schema_from_table( + table, + sample_rows=100, + confidence=0.9, + keep_strings=False, + column_types={"id": "integer", "status": "categorical"}, +) +``` + +### Handling Missing Values + +Define missing value indicators: + +```python +from fairspec_metadata import TableSchema, NumberColumnProperty + +schema = TableSchema( + properties={"value": NumberColumnProperty()}, + missingValues=["", "N/A", "null", -999], +) +``` + +### Primary Keys and Constraints + +Define table-level constraints: + +```python +from fairspec_metadata import TableSchema, IntegerColumnProperty, StringColumnProperty, UniqueKey + +schema = TableSchema( + properties={ + "user_id": IntegerColumnProperty(), + "email": StringColumnProperty(), + }, + primaryKey=["user_id"], + uniqueKeys=[UniqueKey(columnNames=["email"])], +) +``` + +## Supported Column Types + +### Primitive Types +- `string` - Text data +- `integer` - Whole numbers +- `number` - Decimal numbers +- `boolean` - True/false values + +### Temporal Types +- `date` - Calendar dates +- `datetime` - Date and time +- `time` - Time of day +- `duration` - Time spans + +### Spatial Types +- `geojson` - GeoJSON geometries +- `wkt` - Well-Known Text geometries +- `wkb` - Well-Known Binary geometries + +### Complex Types +- `array` - Fixed-length arrays +- `list` - Variable-length lists +- `object` - JSON objects + +### Specialized Types +- `email` - Email addresses +- `url` - URLs +- `categorical` - Categorical data +- `base64` - Base64 encoded data +- `hex` - Hexadecimal data + +## Table Type + +The package uses `LazyFrame` from Polars for efficient processing: + +```python +import polars as pl +from fairspec_table import Table + +# Table is an alias for pl.LazyFrame +table: Table = pl.DataFrame({"id": [1, 2, 3]}).lazy() +``` diff --git a/website/content/docs/python/tsv.md b/website/content/docs/python/tsv.md new file mode 100644 index 0000000..a4385b2 --- /dev/null +++ b/website/content/docs/python/tsv.md @@ -0,0 +1,128 @@ +--- +title: Working with TSV in Python +sidebar: + label: TSV + order: 2 +--- +Tab-separated values (TSV) file handling with automatic format detection and high-performance data operations. + +## Installation + +```bash +pip install fairspec +``` + +## Getting Started + +The TSV format is handled by the CSV plugin, which provides: + +- `load_csv_table` - Load TSV files into tables +- `save_csv_table` - Save tables to TSV files +- `CsvPlugin` - Plugin for framework integration + +For example: + +```python +from fairspec import load_csv_table, Resource + +table = load_csv_table(Resource(data="table.tsv")) +# the column types will be automatically inferred +``` + +## Basic Usage + +### Loading TSV Files + +```python +from fairspec import load_csv_table, Resource +from fairspec_metadata import CsvFileDialect + +# Load a simple TSV file +table = load_csv_table(Resource(data="data.tsv")) + +# Load with explicit format +table = load_csv_table(Resource( + data="data.tsv", + fileDialect=CsvFileDialect(delimiter="\t", headerRows=[1]), +)) + +# Load multiple TSV files (concatenated) +table = load_csv_table(Resource(data=["part1.tsv", "part2.tsv", "part3.tsv"])) +``` + +### Saving TSV Files + +```python +from fairspec import save_csv_table +from fairspec_metadata import CsvFileDialect + +# Save with default options +save_csv_table(table, path="output.tsv", fileDialect=CsvFileDialect(delimiter="\t")) + +# Save with line terminator option +save_csv_table(table, path="output.tsv", fileDialect=CsvFileDialect( + delimiter="\t", + lineTerminator="\r\n", +)) +``` + +## Advanced Features + +### Multi-Header Row Processing + +```python +from fairspec import load_csv_table, Resource +from fairspec_metadata import CsvFileDialect + +# TSV with multiple header rows +table = load_csv_table(Resource( + data="multi-header.tsv", + fileDialect=CsvFileDialect( + delimiter="\t", + headerRows=[1, 2], + headerJoin="_", + ), +)) +``` + +### Comment Handling + +```python +from fairspec import load_csv_table, Resource +from fairspec_metadata import CsvFileDialect + +# TSV with comment lines +table = load_csv_table(Resource( + data="with-comments.tsv", + fileDialect=CsvFileDialect( + delimiter="\t", + commentPrefix="#", + headerRows=[1], + ), +)) + +# Or specify specific comment row numbers +table = load_csv_table(Resource( + data="with-comments.tsv", + fileDialect=CsvFileDialect( + delimiter="\t", + commentRows=[1, 2], + headerRows=[3], + ), +)) +``` + +### Remote File Loading + +```python +from fairspec import load_csv_table, Resource + +# Load from URL +table = load_csv_table(Resource(data="https://example.com/data.tsv")) + +# Load multiple remote files +table = load_csv_table(Resource(data=[ + "https://api.example.com/data-2023.tsv", + "https://api.example.com/data-2024.tsv", +])) +``` diff --git a/website/content/docs/python/xlsx.md b/website/content/docs/python/xlsx.md new file mode 100644 index 0000000..1da1ab1 --- /dev/null +++ b/website/content/docs/python/xlsx.md @@ -0,0 +1,156 @@ +--- +title: Working with XLSX in Python +sidebar: + label: XLSX + order: 5 +--- + +Excel (.xlsx) file handling with sheet selection, advanced header processing, and high-performance data operations. + +## Installation + +```bash +pip install fairspec +``` + +## Getting Started + +The XLSX plugin provides: + +- `load_xlsx_table` - Load Excel files into tables +- `save_xlsx_table` - Save tables to Excel files +- `XlsxPlugin` - Plugin for framework integration + +For example: + +```python +from fairspec import load_xlsx_table, Resource + +table = load_xlsx_table(Resource(data="table.xlsx")) +# the column types will be automatically inferred +``` + +## Basic Usage + +### Loading XLSX Files + +```python +from fairspec import load_xlsx_table, Resource +from fairspec_metadata import XlsxFileDialect + +# Load a simple XLSX file +table = load_xlsx_table(Resource(data="data.xlsx")) + +# Load with custom format (specify sheet) +table = load_xlsx_table(Resource( + data="data.xlsx", + fileDialect=XlsxFileDialect(sheetName="Sheet2"), +)) + +# Load multiple XLSX files (concatenated) +table = load_xlsx_table(Resource(data=["part1.xlsx", "part2.xlsx", "part3.xlsx"])) +``` + +### Saving XLSX Files + +```python +from fairspec import save_xlsx_table +from fairspec_metadata import XlsxFileDialect + +# Save with default options +save_xlsx_table(table, path="output.xlsx") + +# Save with custom sheet name +save_xlsx_table(table, path="output.xlsx", fileDialect=XlsxFileDialect(sheetName="Data")) +``` + +## Advanced Features + +### Sheet Selection + +```python +from fairspec import load_xlsx_table, Resource +from fairspec_metadata import XlsxFileDialect + +# Select by sheet number (1-indexed) +table = load_xlsx_table(Resource( + data="workbook.xlsx", + fileDialect=XlsxFileDialect(sheetNumber=2), +)) + +# Select by sheet name +table = load_xlsx_table(Resource( + data="workbook.xlsx", + fileDialect=XlsxFileDialect(sheetName="Sales Data"), +)) +``` + +### Multi-Header Row Processing + +```python +from fairspec import load_xlsx_table, Resource +from fairspec_metadata import XlsxFileDialect + +# XLSX with multiple header rows +table = load_xlsx_table(Resource( + data="multi-header.xlsx", + fileDialect=XlsxFileDialect( + headerRows=[1, 2], + headerJoin="_", + ), +)) +# Resulting columns: ["Year_Quarter", "2023_Q1", "2023_Q2", "2024_Q1", "2024_Q2"] +``` + +### Comment Row Handling + +```python +from fairspec import load_xlsx_table, Resource +from fairspec_metadata import XlsxFileDialect + +# Skip specific comment rows +table = load_xlsx_table(Resource( + data="with-comments.xlsx", + fileDialect=XlsxFileDialect( + commentRows=[1, 2], + headerRows=[3], + ), +)) + +# Skip rows with comment prefix +table = load_xlsx_table(Resource( + data="data.xlsx", + fileDialect=XlsxFileDialect( + commentPrefix="#", + headerRows=[1], + ), +)) +``` + +### Remote File Loading + +```python +from fairspec import load_xlsx_table, Resource + +# Load from URL +table = load_xlsx_table(Resource(data="https://example.com/data.xlsx")) + +# Load multiple remote files +table = load_xlsx_table(Resource(data=[ + "https://api.example.com/data-2023.xlsx", + "https://api.example.com/data-2024.xlsx", +])) +``` + +### Column Selection + +```python +from fairspec import load_xlsx_table, Resource +from fairspec_metadata import XlsxFileDialect + +# Select specific columns +table = load_xlsx_table(Resource( + data="data.xlsx", + fileDialect=XlsxFileDialect(columnNames=["name", "age", "city"]), +)) +``` diff --git a/website/content/docs/terminal/data.md b/website/content/docs/terminal/data.md new file mode 100644 index 0000000..65b08c9 --- /dev/null +++ b/website/content/docs/terminal/data.md @@ -0,0 +1,432 @@ +--- +title: Working with JSON Data in Terminal +sidebar: + order: 3 + label: Data +--- + +JSON data validation and schema operations using JSON Schema standards. + +## Available Commands + +The `fairspec data` command provides utilities for working with JSON data: + +- `validate` - Validate JSON data against a Data Schema (JSON Schema) +- `infer-schema` - Automatically generate a Data Schema from JSON data +- `validate-schema` - Validate a Data Schema itself +- `infer-dialect` - Infer file dialect + +## Validate JSON Data + +Validate JSON data files against a Data Schema (JSON Schema): + +```bash +# Validate JSON data with a schema +fairspec data validate data.json --schema schema.json + +# Validate from a remote source +fairspec data validate https://example.com/data.json --schema schema.json + +# Output validation report as JSON +fairspec data validate data.json --schema schema.json --json +``` + +### Options + +- `--schema ` (required) - Path to a Data Schema descriptor (JSON Schema) +- `--silent` - Suppress output messages +- `--debug` - Show debug information +- `--json` - Output as JSON + +### Validation Report + +Returns a validation report with: +- `valid` - Boolean indicating if validation passed +- `errors` - Array of validation errors (if any) + +Example validation errors: +```json +{ + "valid": false, + "errors": [ + { + "type": "data", + "instancePath": "/users/0/email", + "schemaPath": "#/properties/users/items/properties/email/format", + "keyword": "format", + "message": "must match format \"email\"" + } + ] +} +``` + +### Example Usage + +Create a JSON Schema file (`user-schema.json`): +```json +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "name": { "type": "string" }, + "email": { "type": "string", "format": "email" }, + "age": { "type": "integer", "minimum": 0 } + }, + "required": ["name", "email"] +} +``` + +Validate data against the schema: +```bash +fairspec data validate user.json --schema user-schema.json +``` + +## Infer Data Schema + +Automatically generate a Data Schema (JSON Schema) from JSON data: + +```bash +# Infer schema from local file +fairspec data infer-schema data.json + +# Infer schema from remote file +fairspec data infer-schema https://example.com/data.json + +# Save inferred schema to file +fairspec data infer-schema data.json --json > schema.json + +# Output for human reading +fairspec data infer-schema data.json +``` + +### Options + +- `--silent` - Suppress output messages +- `--debug` - Show debug information +- `--json` - Output as JSON + +### Generated Schema + +The inferred schema will automatically detect: +- Data types (string, number, integer, boolean, null) +- Object structures and nested properties +- Array items and their types +- Required properties based on presence +- Enum values for properties with limited options + +### Example + +Given this JSON data (`users.json`): +```json +[ + { + "id": 1, + "name": "Alice", + "email": "alice@example.com", + "age": 30, + "active": true + }, + { + "id": 2, + "name": "Bob", + "email": "bob@example.com", + "age": 25, + "active": false + } +] +``` + +Infer the schema: +```bash +fairspec data infer-schema users.json --json +``` + +Generated schema: +```json +{ + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { "type": "integer" }, + "name": { "type": "string" }, + "email": { "type": "string" }, + "age": { "type": "integer" }, + "active": { "type": "boolean" } + }, + "required": ["id", "name", "email", "age", "active"] + } +} +``` + +## Validate Data Schema + +Validate that a Data Schema (JSON Schema) file is valid: + +```bash +# Validate a schema file +fairspec data validate-schema schema.json + +# Validate from remote source +fairspec data validate-schema https://example.com/schema.json + +# Output as JSON +fairspec data validate-schema schema.json --json +``` + +### Options + +- `--silent` - Suppress output messages +- `--debug` - Show debug information +- `--json` - Output as JSON + +### Schema Validation + +This validates that the schema itself is: +- Valid JSON +- Compliant with JSON Schema Draft 2020-12 specification +- Has correct property definitions +- Uses valid keywords and formats + +### Validation Report + +```json +{ + "valid": true, + "errors": [] +} +``` + +Or if invalid: +```json +{ + "valid": false, + "errors": [ + { + "type": "schema/invalid", + "message": "Invalid schema property: 'typ' (did you mean 'type'?)" + } + ] +} +``` + +## Infer File Dialect + +Automatically detect the dialect of a data file: + +```bash +# Infer dialect from file +fairspec data infer-dialect data.json + +# Infer from remote file +fairspec data infer-dialect https://example.com/data.jsonl + +# Output as JSON +fairspec data infer-dialect data.json --json +``` + +### Options + +- `--sample-bytes ` - Sample size in bytes for file dialect detection +- `--silent` - Suppress output messages +- `--debug` - Show debug information +- `--json` - Output as JSON + +### Detected Formats + +The command can detect: +- `json` - Standard JSON format +- `jsonl` - JSON Lines (newline-delimited JSON) + +### Example Output + +```json +{ + "name": "json" +} +``` + +Or for JSONL: +```json +{ + "name": "jsonl" +} +``` + +## Common Workflows + +### Create and Validate with Schema + +```bash +# 1. Infer schema from existing data +fairspec data infer-schema sample-data.json --json > data-schema.json + +# 2. Validate new data against the schema +fairspec data validate new-data.json --schema data-schema.json + +# 3. Check if validation passed +if [ $? -eq 0 ]; then + echo "Data is valid!" +else + echo "Data validation failed" +fi +``` + +### Schema-Driven Development + +```bash +# 1. Create a schema for your data structure +cat > api-schema.json << 'EOF' +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "users": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { "type": "integer" }, + "username": { "type": "string", "minLength": 3 }, + "email": { "type": "string", "format": "email" } + }, + "required": ["id", "username", "email"] + } + } + } +} +EOF + +# 2. Validate the schema itself +fairspec data validate-schema api-schema.json + +# 3. Validate API responses against the schema +fairspec data validate response.json --schema api-schema.json +``` + +### Automated Testing + +```bash +# Validate data in a test script +for file in test-data/*.json; do + echo "Validating $file..." + if fairspec data validate "$file" --schema schema.json --silent; then + echo "✓ $file is valid" + else + echo "✗ $file failed validation" + exit 1 + fi +done +``` + +## Output Formats + +### Text Output (default) + +Human-readable output with colors and formatting: + +```bash +fairspec data validate data.json --schema schema.json +``` + +Output: +``` +✓ Data is valid +``` + +Or with errors: +``` +✗ Data validation failed + +Errors: + • /users/0/email: must match format "email" + • /users/1/age: must be >= 0 +``` + +### JSON Output + +Machine-readable JSON for automation and scripting: + +```bash +fairspec data validate data.json --schema schema.json --json +``` + +### Silent Mode + +Suppress all output except errors: + +```bash +fairspec data validate data.json --schema schema.json --silent +``` + +Use exit code to check success: +```bash +if fairspec data validate data.json --schema schema.json --silent; then + echo "Valid" +else + echo "Invalid" +fi +``` + +## Examples + +### API Response Validation + +```bash +# Fetch API response and validate +curl -s https://api.example.com/users > response.json +fairspec data infer-schema response.json --json > api-schema.json + +# Validate future responses +curl -s https://api.example.com/users | \ + fairspec data validate /dev/stdin --schema api-schema.json +``` + +### Configuration File Validation + +```bash +# Create schema for config files +cat > config-schema.json << 'EOF' +{ + "type": "object", + "properties": { + "host": { "type": "string" }, + "port": { "type": "integer", "minimum": 1, "maximum": 65535 }, + "ssl": { "type": "boolean" } + }, + "required": ["host", "port"] +} +EOF + +# Validate config file +fairspec data validate config.json --schema config-schema.json +``` + +### Data Pipeline Validation + +```bash +# Validate input data +fairspec data validate input.json --schema input-schema.json + +# Process data (your custom script) +./process-data.sh input.json output.json + +# Validate output data +fairspec data validate output.json --schema output-schema.json +``` + +### Schema Evolution + +```bash +# Start with inferred schema from v1 data +fairspec data infer-schema data-v1.json --json > schema-v1.json + +# Manually update schema for v2 (add optional properties) +# Edit schema-v1.json -> schema-v2.json + +# Validate that v2 schema is still valid +fairspec data validate-schema schema-v2.json + +# Ensure v1 data is still compatible with v2 schema +fairspec data validate data-v1.json --schema schema-v2.json +``` diff --git a/website/content/docs/terminal/dataset.md b/website/content/docs/terminal/dataset.md new file mode 100644 index 0000000..14206a0 --- /dev/null +++ b/website/content/docs/terminal/dataset.md @@ -0,0 +1,584 @@ +--- +title: Working with Datasets in Terminal +sidebar: + order: 1 + label: Dataset +--- + +Dataset operations for managing collections of tabular resources with metadata and schemas. + +## Available Commands + +The `fairspec dataset` command provides utilities for working with datasets: + +- `infer` - Automatically infer a dataset descriptor from data files +- `copy` - Copy datasets to a local folder +- `validate` - Validate dataset descriptors and their resources +- `list` - List resources in a dataset +- `script` - Interactive REPL session with loaded dataset + +## What is a Dataset? + +A dataset is a collection of related data resources (tables) with: +- Metadata describing the dataset (title, description, license, etc.) +- Resource definitions for each table (path, format, schema) +- Table Schemas defining the structure of each resource +- Relationships and foreign keys between resources + +Datasets use JSON descriptor files (often named `dataset.json`) following the Fairspec specification. + +## Infer Dataset + +Automatically generate a dataset descriptor from data files: + +```bash +# Infer from single file +fairspec dataset infer data.csv + +# Infer from multiple files +fairspec dataset infer users.csv products.csv orders.csv + +# Infer with remote files +fairspec dataset infer https://example.com/data1.csv data2.csv + +# Save to descriptor file +fairspec dataset infer *.csv --json > dataset.json +``` + +### Inference Process + +The infer command automatically: +1. Detects format for each file (CSV, JSON, Excel, etc.) +2. Infers Table Schema for each resource +3. Generates resource names from file names +4. Creates a complete dataset descriptor + +### Options + +- `--debug` - Show debug information +- `--json` - Output as JSON + +### Format Options + +Format detection and schema inference can be customized: + +- `--delimiter ` - CSV delimiter +- `--header-rows ` - Header row indices (JSON array) +- `--sample-rows ` - Sample size for schema inference +- `--confidence ` - Confidence threshold for type detection +- `--column-types ` - Override types for specific columns +- `--keep-strings` - Keep original string types +- `--comma-decimal` - Treat comma as decimal separator +- `--month-first` - Parse dates as month-first + +### Generated Descriptor + +Example generated dataset descriptor: + +```json +{ + "resources": [ + { + "name": "users", + "data": "users.csv", + "format": { + "name": "csv", + "delimiter": "," + }, + "tableSchema": { + "properties": { + "id": { "type": "integer" }, + "name": { "type": "string" }, + "email": { "type": "string" }, + "created_at": { "type": "date" } + }, + "required": ["id", "name", "email"] + } + }, + { + "name": "orders", + "data": "orders.csv", + "format": { + "name": "csv" + }, + "tableSchema": { + "properties": { + "order_id": { "type": "integer" }, + "user_id": { "type": "integer" }, + "amount": { "type": "number" }, + "status": { "type": "string" } + } + } + } + ] +} +``` + +## Copy Dataset + +Copy a dataset and all its resources to a local folder: + +```bash +# Copy dataset to local folder +fairspec dataset copy dataset.json --to-path ./local-dataset + +# Copy remote dataset +fairspec dataset copy https://example.com/dataset.json --to-path ./dataset + +# Silent mode for automation +fairspec dataset copy dataset.json --to-path ./output --silent +``` + +### Copy Behavior + +The copy command: +- Downloads all remote resources +- Preserves directory structure +- Updates resource paths in the descriptor to point to local files +- Creates the target directory if it doesn't exist +- Saves the updated descriptor to the target location + +### Options + +- `--to-path ` (required) - Target directory path +- `--silent` - Suppress output messages +- `--debug` - Show debug information +- `--json` - Output as JSON + +### Example + +Given a dataset with remote resources: +```json +{ + "resources": [ + { + "name": "users", + "data": "https://example.com/data/users.csv" + }, + { + "name": "products", + "data": "https://example.com/data/products.csv" + } + ] +} +``` + +After copying: +```bash +fairspec dataset copy dataset.json --to-path ./local +``` + +Results in: +``` +./local/ + dataset.json # Updated descriptor + users.csv # Downloaded resource + products.csv # Downloaded resource +``` + +## Validate Dataset + +Validate a dataset descriptor and all its resources: + +```bash +# Validate local dataset +fairspec dataset validate dataset.json + +# Validate remote dataset +fairspec dataset validate https://example.com/dataset.json + +# Output validation report as JSON +fairspec dataset validate dataset.json --json +``` + +### Validation Checks + +The validate command checks: +- **Descriptor validity** - Valid JSON and conforms to Data Package spec +- **Resource existence** - All referenced resources can be loaded +- **Schema validation** - Each resource validates against its Table Schema +- **Referential integrity** - Foreign key relationships are valid +- **Format compliance** - Resources match their declared formats + +### Validation Report + +Returns a validation report with: +- `valid` - Boolean indicating if validation passed +- `errors` - Array of validation errors (if any) + +Example validation errors: +```json +{ + "valid": false, + "errors": [ + { + "type": "dataset/resource-not-found", + "resourceName": "users", + "message": "Resource file 'users.csv' not found" + }, + { + "type": "table/schema", + "resourceName": "orders", + "rowNumber": 15, + "propertyName": "amount", + "message": "value must be a number" + }, + { + "type": "dataset/foreign-key", + "resourceName": "orders", + "message": "Foreign key 'user_id' references non-existent value in 'users'" + } + ] +} +``` + +### Options + +- `--debug` - Show debug information +- `--json` - Output as JSON + +## List Resources + +List all resources in a dataset: + +```bash +# List resources +fairspec dataset list dataset.json + +# List from remote dataset +fairspec dataset list https://example.com/dataset.json + +# Output as JSON array +fairspec dataset list dataset.json --json +``` + +### Output + +Returns an array of resource names in the dataset: + +Text output: +``` +users +products +orders +transactions +``` + +JSON output: +```json +["users", "products", "orders", "transactions"] +``` + +### Options + +- `--debug` - Show debug information +- `--json` - Output as JSON + +## Interactive Scripting + +Start an interactive REPL session with a loaded dataset: + +```bash +# Load dataset and start REPL +fairspec dataset script dataset.json + +# Script remote dataset +fairspec dataset script https://example.com/dataset.json +``` + +### Available in Session + +- `fairspec` - Full fairspec library +- `dataset` - Loaded dataset descriptor + +### Example Session + +```python +fairspec> dataset +{ + "resources": [ + {"name": "users", "data": "users.csv", ...}, + {"name": "orders", "data": "orders.csv", ...} + ] +} + +fairspec> len(dataset.resources) +2 + +fairspec> dataset.resources[0].name +'users' + +fairspec> table = fairspec.load_table(dataset.resources[0]) +fairspec> table.head(5).collect() +DataFrame { ... } +``` + +## Common Workflows + +### Create Dataset from Files + +```bash +# 1. Infer dataset from multiple files +fairspec dataset infer data/*.csv --json > dataset.json + +# 2. Manually edit dataset.json to add: +# - Title and description +# - License information +# - Foreign key relationships +# - Additional metadata + +# 3. Validate the dataset +fairspec dataset validate dataset.json + +# 4. List resources to confirm +fairspec dataset list dataset.json +``` + +### Clone Remote Dataset + +```bash +# 1. Copy remote dataset locally +fairspec dataset copy https://example.com/dataset.json --to-path ./local-data + +# 2. Validate local copy +fairspec dataset validate ./local-data/dataset.json + +# 3. List resources +fairspec dataset list ./local-data/dataset.json +``` + +### Dataset Quality Assurance + +```bash +# 1. Validate the dataset +fairspec dataset validate dataset.json + +# 2. If validation fails, check individual resources +fairspec table validate --from-dataset dataset.json --from-resource users + +# 3. Inspect resource schemas +fairspec table infer-schema --from-dataset dataset.json --from-resource users + +# 4. Generate schema documentation +fairspec table render-schema schema.json --to-format markdown --to-path docs/users-schema.md +``` + +### Dataset Evolution + +```bash +# 1. Start with existing dataset +fairspec dataset validate old-dataset.json + +# 2. Add new data files +fairspec dataset infer old-data/*.csv new-data/*.csv --json > dataset.json + +# 3. Merge metadata from old descriptor +# (manual step - copy title, license, etc.) + +# 4. Validate updated dataset +fairspec dataset validate dataset.json + +# 5. Verify all resources +fairspec dataset list dataset.json +``` + +### Automation and CI/CD + +```bash +#!/bin/bash + +# Validate dataset in CI pipeline +if fairspec dataset validate dataset.json --json | jq -e '.valid'; then + echo "✓ Dataset validation passed" + exit 0 +else + echo "✗ Dataset validation failed" + fairspec dataset validate dataset.json + exit 1 +fi +``` + +## Output Formats + +### Text Output (default) + +Human-readable output with colors and formatting: + +```bash +fairspec dataset list dataset.json +``` + +Output: +``` +users +products +orders +``` + +### JSON Output + +Machine-readable JSON for automation and scripting: + +```bash +fairspec dataset validate dataset.json --json +``` + +### Silent Mode + +Suppress all output except errors (for copy command): + +```bash +fairspec dataset copy dataset.json --to-path ./output --silent +``` + +Use exit code to check success: +```bash +if fairspec dataset copy dataset.json --to-path ./output --silent; then + echo "Success" +else + echo "Failed" +fi +``` + +## Examples + +### Create Multi-Table Dataset + +```bash +# Prepare your data files +# - customers.csv +# - orders.csv +# - products.csv + +# Infer the dataset +fairspec dataset infer customers.csv orders.csv products.csv --json > dataset.json + +# Enhance the descriptor +cat > dataset.json << 'EOF' +{ + "name": "sales-data", + "title": "Sales Database Export", + "description": "Customer orders and product catalog", + "license": "CC-BY-4.0", + "resources": [ + { + "name": "customers", + "data": "customers.csv", + "tableSchema": { "properties": { ... } } + }, + { + "name": "orders", + "data": "orders.csv", + "tableSchema": { + "properties": { ... }, + "foreignKeys": [ + { + "columns": ["customer_id"], + "reference": { + "resource": "customers", + "columns": ["id"] + } + } + ] + } + } + ] +} +EOF + +# Validate +fairspec dataset validate dataset.json +``` + +### Download and Validate Public Dataset + +```bash +# Copy public dataset +fairspec dataset copy https://data.example.org/climate/dataset.json \ + --to-path ./climate-data + +# Validate local copy +fairspec dataset validate ./climate-data/dataset.json + +# List available resources +fairspec dataset list ./climate-data/dataset.json + +# Explore specific resource +fairspec table describe --from-dataset ./climate-data/dataset.json \ + --from-resource temperature +``` + +### Dataset Testing + +```bash +# test-dataset.sh + +echo "Testing dataset integrity..." + +# 1. Validate descriptor +if ! fairspec dataset validate dataset.json --silent; then + echo "✗ Dataset validation failed" + fairspec dataset validate dataset.json + exit 1 +fi + +# 2. Check all resources exist +for resource in $(fairspec dataset list dataset.json --json | jq -r '.[]'); do + echo "Checking resource: $resource" + if ! fairspec table describe --from-dataset dataset.json --from-resource "$resource" --silent; then + echo "✗ Resource $resource could not be loaded" + exit 1 + fi +done + +echo "✓ All tests passed" +``` + +### Interactive Data Exploration + +```bash +# Start interactive session +fairspec dataset script dataset.json + +# In REPL, explore the dataset: +``` + +```python +# List all resources +[r.name for r in dataset.resources] + +# Load a specific resource +users = fairspec.load_table(next(r for r in dataset.resources if r.name == "users")) + +# Query the data +active_users = users.filter(pl.col("active").eq(True)).collect() +print(active_users) + +# Check schema +print(dataset.resources[0].tableSchema) +``` + +## Working with Resources + +All dataset commands integrate with table commands through the `--from-dataset` and `--from-resource` options: + +```bash +# Load resource from dataset +fairspec table describe --from-dataset dataset.json --from-resource users + +# Query resource +fairspec table query --from-dataset dataset.json --from-resource orders \ + "SELECT * FROM self WHERE status = 'shipped'" + +# Validate resource +fairspec table validate --from-dataset dataset.json --from-resource products + +# Infer resource schema +fairspec table infer-schema --from-dataset dataset.json --from-resource users +``` + +This approach allows you to: +- Work with resources without specifying paths or formats +- Use embedded Table Schemas automatically +- Maintain consistency across your dataset +- Simplify command-line usage diff --git a/website/content/docs/terminal/file.md b/website/content/docs/terminal/file.md new file mode 100644 index 0000000..2412715 --- /dev/null +++ b/website/content/docs/terminal/file.md @@ -0,0 +1,244 @@ +--- +title: Working with Files in Terminal +sidebar: + order: 4 + label: File +--- + +File operations for copying, describing, validating, and analyzing local or remote files. + +## Available Commands + +The `fairspec file` command provides utilities for working with files: + +- `copy` - Copy local or remote files +- `describe` - Get file statistics and metadata +- `validate` - Validate file integrity +- `infer-dialect` - Infer file dialect + +## Copy Files + +Copy files from local or remote sources to a local destination: + +```bash +# Copy a local file +fairspec file copy data.csv --to-path output.csv + +# Copy a remote file +fairspec file copy https://example.com/data.csv --to-path local-data.csv + +# Copy from a dataset resource +fairspec file copy --from-dataset dataset.json --from-resource users --to-path users.csv +``` + +### Options + +- `--to-path ` (required) - Local output path +- `--from-dataset ` - Load file from dataset descriptor +- `--from-resource ` - Specify resource name from dataset +- `--silent` - Suppress output messages +- `--debug` - Show debug information +- `--json` - Output as JSON + +## Describe Files + +Get detailed information about a file including size, type, and checksums: + +```bash +# Describe a local file +fairspec file describe data.csv + +# Describe with specific hash type +fairspec file describe data.csv --hash-type sha256 + +# Describe a remote file +fairspec file describe https://example.com/data.csv + +# Describe from a dataset +fairspec file describe --from-dataset dataset.json --from-resource users +``` + +### Output + +The describe command returns: +- `bytes` - File size in bytes +- `textual` - Whether the file is text-based +- `integrity` - Hash value and type + +### Options + +- `--hash-type ` - Hash algorithm to use + - Choices: `md5`, `sha1`, `sha256` (default), `sha512` +- `--from-dataset ` - Load file from dataset descriptor +- `--from-resource ` - Specify resource name from dataset +- `--silent` - Suppress output messages +- `--debug` - Show debug information +- `--json` - Output as JSON + +### Example Output + +```json +{ + "bytes": 1024, + "textual": true, + "integrity": { + "type": "sha256", + "hash": "a1b2c3d4e5f6..." + } +} +``` + +## Validate Files + +Validate file integrity using checksums: + +```bash +# Validate with expected hash +fairspec file validate data.csv --hash a1b2c3d4e5f6 --hash-type sha256 + +# Validate using MD5 +fairspec file validate data.csv --hash 098f6bcd4621 --hash-type md5 + +# Output as JSON for automation +fairspec file validate data.csv --hash a1b2c3d4 --json +``` + +### Options + +- `--hash ` - Expected file hash +- `--hash-type ` - Hash algorithm to use (default: `md5`) + - Choices: `md5`, `sha1`, `sha256`, `sha512` +- `--silent` - Suppress output messages +- `--debug` - Show debug information +- `--json` - Output as JSON + +### Validation Report + +Returns a validation report with: +- `valid` - Boolean indicating if validation passed +- `errors` - Array of validation errors (if any) + +Example error: +```json +{ + "valid": false, + "errors": [ + { + "type": "file/integrity", + "hashType": "sha256", + "expectedHash": "a1b2c3d4e5f6...", + "actualHash": "different..." + } + ] +} +``` + +## Infer File Dialect + +Automatically detect the dialect of a file: + +```bash +# Infer dialect from file +fairspec file infer-dialect data.csv + +# Infer from remote file +fairspec file infer-dialect https://example.com/data.json + +# Output as JSON +fairspec file infer-dialect data.xlsx --json +``` + +### Options + +- `--silent` - Suppress output messages +- `--debug` - Show debug information +- `--json` - Output as JSON + +### Supported Formats + +The command can detect: +- CSV/TSV files +- JSON/JSONL files +- Excel files (.xlsx, .xls) +- OpenDocument Spreadsheet (.ods) +- Parquet files +- Arrow/Feather files +- SQLite databases + +## Working with Datasets + +All file commands support loading files from dataset descriptors: + +```bash +# Describe a resource from a dataset +fairspec file describe --from-dataset dataset.json --from-resource sales-data + +# Copy a resource from a dataset +fairspec file copy --from-dataset dataset.json --from-resource users --to-path users.csv + +# Validate a resource from a dataset +fairspec file validate --from-dataset dataset.json --from-resource products --hash abc123 +``` + +## Output Formats + +### Text Output (default) + +Human-readable output with colors and formatting: + +```bash +fairspec file describe data.csv +``` + +### JSON Output + +Machine-readable JSON for automation and scripting: + +```bash +fairspec file describe data.csv --json +``` + +### Silent Mode + +Suppress all output except errors: + +```bash +fairspec file copy data.csv --to-path output.csv --silent +``` + +## Examples + +### Copy and Validate + +```bash +# Copy a file and get its hash +fairspec file copy remote-data.csv --to-path local-data.csv +fairspec file describe local-data.csv --hash-type sha256 + +# Validate the copied file +fairspec file validate local-data.csv --hash --hash-type sha256 +``` + +### Process Dataset Resources + +```bash +# Describe all details of a dataset resource +fairspec file describe --from-dataset dataset.json --from-resource sales + +# Copy the resource locally +fairspec file copy --from-dataset dataset.json --from-resource sales --to-path sales.csv + +# Infer its dialect +fairspec file infer-dialect sales.csv +``` + +### Automation with JSON + +```bash +# Get file info as JSON for scripting +INFO=$(fairspec file describe data.csv --json) +HASH=$(echo $INFO | jq -r '.integrity.hash') + +# Use in validation +fairspec file validate data.csv --hash $HASH --hash-type sha256 +``` diff --git a/website/content/docs/terminal/table.md b/website/content/docs/terminal/table.md new file mode 100644 index 0000000..19cc55f --- /dev/null +++ b/website/content/docs/terminal/table.md @@ -0,0 +1,581 @@ +--- +title: Working with Tables in Terminal +sidebar: + order: 2 + label: Table +--- + +Table operations including querying, validation, statistics, and schema management for tabular data files. + +## Available Commands + +The `fairspec table` command provides utilities for working with tables: + +- `describe` - Get table statistics and summary information +- `query` - Query tables using SQL syntax +- `validate` - Validate table data against a Table Schema +- `infer-schema` - Automatically infer Table Schema from table data +- `render-schema` - Render Table Schema as HTML or Markdown documentation +- `validate-schema` - Validate a Table Schema file +- `infer-dialect` - Infer file dialect +- `script` - Interactive REPL session with loaded table + +## Describe Tables + +Get statistical summary information about a table: + +```bash +# Describe a CSV file +fairspec table describe data.csv + +# Describe a remote table +fairspec table describe https://example.com/data.csv + +# Describe from a dataset +fairspec table describe --from-dataset dataset.json --from-resource sales + +# Output as JSON +fairspec table describe data.csv --json +``` + +### Output + +Returns statistics for each column including: +- `count` - Number of non-null values +- `null_count` - Number of null values +- `mean` - Average value (numeric columns) +- `std` - Standard deviation (numeric columns) +- `min` - Minimum value +- `max` - Maximum value +- `median` - Median value (numeric columns) + +### Options + +- `--from-dataset ` - Load table from dataset descriptor +- `--from-resource ` - Specify resource name from dataset +- `--debug` - Show debug information +- `--json` - Output as JSON + +### Format Options + +All standard format options are available (see Format Options section below). + +## Query Tables + +Execute SQL queries on tables using Polars SQL engine: + +```bash +# Basic query +fairspec table query data.csv "SELECT * FROM self WHERE age > 25" + +# Aggregate data +fairspec table query sales.csv "SELECT region, SUM(amount) as total FROM self GROUP BY region" + +# Filter and sort +fairspec table query users.csv "SELECT name, email FROM self WHERE active = true ORDER BY name" + +# Query from dataset resource +fairspec table query --from-dataset dataset.json --from-resource users \ + "SELECT * FROM self WHERE created_at > '2024-01-01'" +``` + +### SQL Syntax + +- Use `self` as the table name in queries +- Supports SELECT, WHERE, GROUP BY, ORDER BY, LIMIT, JOIN, etc. +- Full Polars SQL syntax supported +- Results are output as formatted tables + +### Options + +- `--from-dataset ` - Load table from dataset descriptor +- `--from-resource ` - Specify resource name from dataset +- `--debug` - Show debug information +- `--json` - Output as JSON + +## Validate Tables + +Validate table data against a Table Schema: + +```bash +# Validate with explicit schema +fairspec table validate data.csv --table-schema schema.json + +# Validate with inferred schema +fairspec table validate data.csv + +# Validate from dataset (uses embedded schema) +fairspec table validate --from-dataset dataset.json --from-resource users + +# Output validation report as JSON +fairspec table validate data.csv --table-schema schema.json --json +``` + +### Validation Report + +Returns a validation report with: +- `valid` - Boolean indicating if validation passed +- `errors` - Array of validation errors (if any) + +Example validation errors: +```json +{ + "valid": false, + "errors": [ + { + "type": "table/constraint", + "propertyName": "age", + "rowNumber": 5, + "message": "value 200 exceeds maximum of 150" + }, + { + "type": "table/type", + "propertyName": "email", + "rowNumber": 12, + "message": "invalid email format" + } + ] +} +``` + +### Options + +- `--table-schema ` - Path to Table Schema file +- `--from-dataset ` - Load table from dataset descriptor +- `--from-resource ` - Specify resource name from dataset +- `--debug` - Show debug information +- `--json` - Output as JSON + +## Infer Table Schema + +Automatically generate a Table Schema from table data: + +```bash +# Infer schema from local file +fairspec table infer-schema data.csv + +# Infer from remote file +fairspec table infer-schema https://example.com/data.csv + +# Save inferred schema to file +fairspec table infer-schema data.csv --json > schema.json + +# Infer with custom options +fairspec table infer-schema data.csv --sample-rows 1000 --confidence 0.95 +``` + +### Schema Inference Options + +- `--sample-rows ` - Number of rows to sample for inference (default: 100) +- `--confidence ` - Confidence threshold for type detection (0-1, default: 0.9) +- `--keep-strings` - Keep original string types instead of inferring +- `--column-types ` - Override types for specific columns +- `--comma-decimal` - Treat comma as decimal separator +- `--month-first` - Parse dates as month-first (MM/DD/YYYY) + +### Generated Schema + +The inferred schema automatically detects: +- Column types (string, integer, number, boolean, date, datetime, etc.) +- Required columns based on presence +- Enum values for columns with limited distinct values +- Numeric constraints (minimum, maximum) +- String patterns +- Missing value indicators + +### Example + +Given this CSV data: +```csv +id,name,price,quantity,active,created_at +1,Product A,19.99,100,true,2024-01-15 +2,Product B,29.99,50,false,2024-01-20 +3,Product C,39.99,75,true,2024-02-01 +``` + +Infer the schema: +```bash +fairspec table infer-schema products.csv --json +``` + +Generated schema: +```json +{ + "properties": { + "id": { "type": "integer" }, + "name": { "type": "string" }, + "price": { "type": "number" }, + "quantity": { "type": "integer" }, + "active": { "type": "boolean" }, + "created_at": { "type": "date" } + }, + "required": ["id", "name", "price", "quantity", "active", "created_at"] +} +``` + +## Render Table Schema + +Render a Table Schema as human-readable HTML or Markdown documentation: + +```bash +# Render as Markdown +fairspec table render-schema schema.json --to-format markdown + +# Render as HTML +fairspec table render-schema schema.json --to-format html + +# Save to file +fairspec table render-schema schema.json --to-format markdown --to-path schema.md +fairspec table render-schema schema.json --to-format html --to-path schema.html +``` + +### Output Formats + +- `markdown` - Generates Markdown documentation with column descriptions, types, and constraints +- `html` - Generates styled HTML table documentation + +### Options + +- `--to-format ` (required) - Output format (markdown or html) +- `--to-path ` - Save to file instead of stdout +- `--silent` - Suppress output messages +- `--debug` - Show debug information + +## Validate Table Schema + +Validate that a Table Schema file is valid: + +```bash +# Validate a schema file +fairspec table validate-schema schema.json + +# Validate from remote source +fairspec table validate-schema https://example.com/schema.json + +# Output as JSON +fairspec table validate-schema schema.json --json +``` + +### Schema Validation + +This validates that the schema itself is: +- Valid JSON +- Compliant with Table Schema specification +- Has correct property definitions +- Uses valid column types and constraints + +### Validation Report + +```json +{ + "valid": true, + "errors": [] +} +``` + +Or if invalid: +```json +{ + "valid": false, + "errors": [ + { + "type": "schema/invalid", + "message": "Invalid column type: 'txt' (did you mean 'text'?)" + } + ] +} +``` + +### Options + +- `--silent` - Suppress output messages +- `--debug` - Show debug information +- `--json` - Output as JSON + +## Infer File Dialect + +Automatically detect the dialect of a table file: + +```bash +# Infer dialect from file +fairspec table infer-dialect data.csv + +# Infer from remote file +fairspec table infer-dialect https://example.com/data.xlsx + +# Output as JSON +fairspec table infer-dialect data.parquet --json +``` + +### Detected Formats + +The command can detect: +- `csv` - Comma-separated values +- `tsv` - Tab-separated values +- `json` - JSON format +- `jsonl` - JSON Lines (newline-delimited JSON) +- `xlsx` - Excel spreadsheet +- `ods` - OpenDocument Spreadsheet +- `parquet` - Apache Parquet +- `arrow` - Apache Arrow/Feather +- `sqlite` - SQLite database + +### Example Output + +```json +{ + "name": "csv", + "delimiter": ",", + "quoteChar": "\"" +} +``` + +## Interactive Scripting + +Start an interactive REPL session with a loaded table: + +```bash +# Load table and start REPL +fairspec table script data.csv + +# Script table from dataset +fairspec table script --from-dataset dataset.json --from-resource users +``` + +### Available in Session + +- `fairspec` - Full fairspec library +- `table` - Loaded table (LazyFrame) + +### Example Session + +```python +fairspec> table +LazyFrame { ... } + +fairspec> table.collect() +DataFrame { ... } + +fairspec> table.select(["name", "age"]).collect() +DataFrame { ... } + +fairspec> table.filter(pl.col("age").gt(25)).collect() +DataFrame { ... } +``` + +## Format Options + +All table commands support these format options for loading data: + +### CSV/TSV Options + +- `--format ` - Format name (csv, tsv, etc.) +- `--delimiter ` - Column delimiter (default: `,`) +- `--line-terminator ` - Row terminator (default: `\n`) +- `--quote-char ` - Quote character (default: `"`) +- `--null-sequence ` - Null value indicator +- `--header-rows ` - Header row indices (e.g., `[1,2]`) +- `--header-join ` - Character to join multi-row headers +- `--comment-rows ` - Comment row indices to skip +- `--comment-prefix ` - Comment line prefix (e.g., `#`) +- `--column-names ` - Override column names (JSON array) + +### JSON Options + +- `--json-pointer ` - JSON pointer to data array (e.g., `/data/users`) +- `--row-type ` - Row format: `object` or `array` + +### Excel/ODS Options + +- `--sheet-number ` - Sheet index (0-based) +- `--sheet-name ` - Sheet name + +### SQLite Options + +- `--table-name ` - Table name in database + +## Table Schema Options + +All table commands support these schema-related options: + +### Type Inference + +- `--sample-rows ` - Sample size for type inference +- `--confidence ` - Confidence threshold (0-1) +- `--keep-strings` - Don't infer types, keep as strings +- `--column-types ` - Override types (e.g., `{"age":"integer"}`) + +### Value Parsing + +- `--missing-values ` - Missing value indicators (JSON array) +- `--decimal-char ` - Decimal separator (default: `.`) +- `--group-char ` - Thousands separator (default: `,`) +- `--comma-decimal` - Use comma as decimal (shorthand) +- `--true-values ` - Custom true values (JSON array) +- `--false-values ` - Custom false values (JSON array) + +### Date/Time Parsing + +- `--datetime-format ` - Datetime format string +- `--date-format ` - Date format string +- `--time-format ` - Time format string +- `--month-first` - Parse dates as month-first + +### Array/List Parsing + +- `--array-type ` - Array item type +- `--list-delimiter ` - List delimiter (default: `;`) +- `--list-item-type ` - List item type + +## Common Workflows + +### Explore Unknown Data + +```bash +# 1. Infer the dialect +fairspec table infer-dialect unknown-data.txt + +# 2. Get basic statistics +fairspec table describe unknown-data.txt + +# 3. Infer the schema +fairspec table infer-schema unknown-data.txt --json > schema.json + +# 4. Query the data +fairspec table query unknown-data.txt "SELECT * FROM self LIMIT 10" +``` + +### Schema-Driven Validation + +```bash +# 1. Create schema from sample data +fairspec table infer-schema sample.csv --json > schema.json + +# 2. Validate the schema itself +fairspec table validate-schema schema.json + +# 3. Generate documentation +fairspec table render-schema schema.json --to-format markdown --to-path docs.md + +# 4. Validate production data +fairspec table validate production.csv --table-schema schema.json +``` + +### Data Quality Checks + +```bash +# Check for data quality issues +fairspec table validate data.csv --table-schema schema.json + +# Get detailed statistics +fairspec table describe data.csv + +# Query for specific issues +fairspec table query data.csv "SELECT * FROM self WHERE email NOT LIKE '%@%'" + +# Find duplicates +fairspec table query data.csv "SELECT id, COUNT(*) as cnt FROM self GROUP BY id HAVING cnt > 1" +``` + +### Interactive Analysis + +```bash +# Start interactive session +fairspec table script data.csv + +# In REPL: +# - Explore: table.head(10).collect() +# - Filter: table.filter(pl.col("status").eq("active")).collect() +# - Aggregate: table.group_by("category").agg(pl.sum("amount")).collect() +# - Transform: table.with_columns(pl.col("price").mul(1.1).alias("new_price")).collect() +``` + +### Format Conversion + +```bash +# Query and output as JSON +fairspec table query data.csv "SELECT * FROM self" --json > output.json + +# Get statistics and save +fairspec table describe large-file.parquet --json > stats.json +``` + +## Output Formats + +### Text Output (default) + +Human-readable output with formatted tables: + +```bash +fairspec table describe data.csv +``` + +Output: +``` +# count mean std min max +id 100 50.5 29.01 1 100 +price 100 29.99 15.43 9.99 99.99 +quantity 100 75 28.87 1 150 +``` + +### JSON Output + +Machine-readable JSON for automation: + +```bash +fairspec table describe data.csv --json +``` + +## Examples + +### CSV Data Analysis + +```bash +# Get overview of sales data +fairspec table describe sales.csv + +# Find top customers +fairspec table query sales.csv \ + "SELECT customer, SUM(amount) as total FROM self GROUP BY customer ORDER BY total DESC LIMIT 10" + +# Validate data quality +fairspec table validate sales.csv --table-schema sales-schema.json +``` + +### Multi-Format Pipeline + +```bash +# Load Excel data +fairspec table describe report.xlsx --sheet-name "Q1 Sales" + +# Query specific sheet +fairspec table query report.xlsx --sheet-name "Q1 Sales" \ + "SELECT region, SUM(revenue) FROM self GROUP BY region" + +# Validate against schema +fairspec table validate report.xlsx --sheet-name "Q1 Sales" --table-schema schema.json +``` + +### Remote Data Validation + +```bash +# Infer schema from remote data +fairspec table infer-schema https://api.example.com/export.csv --json > remote-schema.json + +# Validate local data against remote schema +fairspec table validate local-data.csv --table-schema remote-schema.json +``` + +### Database Export Validation + +```bash +# Validate SQLite export +fairspec table validate export.db --table-name users --table-schema expected-schema.json + +# Get statistics from database +fairspec table describe export.db --table-name users + +# Query database table +fairspec table query export.db --table-name users \ + "SELECT status, COUNT(*) FROM self GROUP BY status" +``` diff --git a/website/package.json b/website/package.json new file mode 100644 index 0000000..d24b8c5 --- /dev/null +++ b/website/package.json @@ -0,0 +1,24 @@ +{ + "name": "@fairspec/website", + "type": "module", + "version": "0.0.0-dev", + "private": true, + "title": "Fairspec Python", + "description": "Fairspec Python is a fast data management framework built on top of the Fairspec standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub", + "repository": "https://github.com/fairspec/fairspec-python", + "homepage": "https://python.fairspec.org", + "scripts": { + "build": "astro build", + "preview": "astro preview --port 5000", + "start": "astro dev --port 5000", + "type": "echo 'enable tsc --noEmit'" + }, + "devDependencies": { + "@astrojs/starlight": "0.36.0", + "astro": "5.14.1", + "sharp": "0.34.2", + "starlight-changelogs": "0.1.1", + "starlight-github-alerts": "0.1.0", + "starlight-scroll-to-top": "0.3.1" + } +} diff --git a/website/pnpm-lock.yaml b/website/pnpm-lock.yaml new file mode 100644 index 0000000..469e470 --- /dev/null +++ b/website/pnpm-lock.yaml @@ -0,0 +1,4222 @@ +lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +importers: + + .: + devDependencies: + '@astrojs/starlight': + specifier: 0.36.0 + version: 0.36.0(astro@5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3)) + astro: + specifier: 5.14.1 + version: 5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3) + sharp: + specifier: 0.34.2 + version: 0.34.2 + starlight-changelogs: + specifier: 0.1.1 + version: 0.1.1(@astrojs/starlight@0.36.0(astro@5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3)))(astro@5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3)) + starlight-github-alerts: + specifier: 0.1.0 + version: 0.1.0(@astrojs/starlight@0.36.0(astro@5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3))) + starlight-scroll-to-top: + specifier: 0.3.1 + version: 0.3.1(@astrojs/starlight@0.36.0(astro@5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3))) + +packages: + + '@ascorbic/loader-utils@1.0.2': + resolution: {integrity: sha512-pg43g83gojVtEsAkXfjWuzJhuXneJp4wM/leBftGkCPV3yxKgB92EWA+nWu735BgbBMph3P7DrVqVc3ikt+dJA==} + peerDependencies: + astro: ^4.14.0 || ^5.0.0-beta.0 + + '@astrojs/compiler@2.13.1': + resolution: {integrity: sha512-f3FN83d2G/v32ipNClRKgYv30onQlMZX1vCeZMjPsMMPl1mDpmbl0+N5BYo4S/ofzqJyS5hvwacEo0CCVDn/Qg==} + + '@astrojs/internal-helpers@0.7.3': + resolution: {integrity: sha512-6Pl0bQEIChuW5wqN7jdKrzWfCscW2rG/Cz+fzt4PhSQX2ivBpnhXgFUCs0M3DCYvjYHnPVG2W36X5rmFjZ62sw==} + + '@astrojs/internal-helpers@0.7.5': + resolution: {integrity: sha512-vreGnYSSKhAjFJCWAwe/CNhONvoc5lokxtRoZims+0wa3KbHBdPHSSthJsKxPd8d/aic6lWKpRTYGY/hsgK6EA==} + + '@astrojs/markdown-remark@6.3.10': + resolution: {integrity: sha512-kk4HeYR6AcnzC4QV8iSlOfh+N8TZ3MEStxPyenyCtemqn8IpEATBFMTJcfrNW32dgpt6MY3oCkMM/Tv3/I4G3A==} + + '@astrojs/markdown-remark@6.3.7': + resolution: {integrity: sha512-KXGdq6/BC18doBCYXp08alHlWChH0hdD2B1qv9wIyOHbvwI5K6I7FhSta8dq1hBQNdun8YkKPR013D/Hm8xd0g==} + + '@astrojs/mdx@4.3.13': + resolution: {integrity: sha512-IHDHVKz0JfKBy3//52JSiyWv089b7GVSChIXLrlUOoTLWowG3wr2/8hkaEgEyd/vysvNQvGk+QhysXpJW5ve6Q==} + engines: {node: 18.20.8 || ^20.3.0 || >=22.0.0} + peerDependencies: + astro: ^5.0.0 + + '@astrojs/prism@3.3.0': + resolution: {integrity: sha512-q8VwfU/fDZNoDOf+r7jUnMC2//H2l0TuQ6FkGJL8vD8nw/q5KiL3DS1KKBI3QhI9UQhpJ5dc7AtqfbXWuOgLCQ==} + engines: {node: 18.20.8 || ^20.3.0 || >=22.0.0} + + '@astrojs/sitemap@3.7.0': + resolution: {integrity: sha512-+qxjUrz6Jcgh+D5VE1gKUJTA3pSthuPHe6Ao5JCxok794Lewx8hBFaWHtOnN0ntb2lfOf7gvOi9TefUswQ/ZVA==} + + '@astrojs/starlight@0.36.0': + resolution: {integrity: sha512-aVJVBfvFuE2avsMDhmRzn6I5GjDhUwIQFlu3qH9a1C0fNsPYDw2asxHQODAD7EfGiKGvvHCJgHb+9jbJ8lCfNQ==} + peerDependencies: + astro: ^5.5.0 + + '@astrojs/telemetry@3.3.0': + resolution: {integrity: sha512-UFBgfeldP06qu6khs/yY+q1cDAaArM2/7AEIqQ9Cuvf7B1hNLq0xDrZkct+QoIGyjq56y8IaE2I3CTvG99mlhQ==} + engines: {node: 18.20.8 || ^20.3.0 || >=22.0.0} + + '@babel/helper-string-parser@7.27.1': + resolution: {integrity: sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==} + engines: {node: '>=6.9.0'} + + '@babel/helper-validator-identifier@7.28.5': + resolution: {integrity: sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q==} + engines: {node: '>=6.9.0'} + + '@babel/parser@7.29.0': + resolution: {integrity: sha512-IyDgFV5GeDUVX4YdF/3CPULtVGSXXMLh1xVIgdCgxApktqnQV0r7/8Nqthg+8YLGaAtdyIlo2qIdZrbCv4+7ww==} + engines: {node: '>=6.0.0'} + hasBin: true + + '@babel/runtime@7.28.6': + resolution: {integrity: sha512-05WQkdpL9COIMz4LjTxGpPNCdlpyimKppYNoJ5Di5EUObifl8t4tuLuUBBZEpoLYOmfvIWrsp9fCl0HoPRVTdA==} + engines: {node: '>=6.9.0'} + + '@babel/types@7.29.0': + resolution: {integrity: sha512-LwdZHpScM4Qz8Xw2iKSzS+cfglZzJGvofQICy7W7v4caru4EaAmyUuO6BGrbyQ2mYV11W0U8j5mBhd14dd3B0A==} + engines: {node: '>=6.9.0'} + + '@capsizecss/unpack@2.4.0': + resolution: {integrity: sha512-GrSU71meACqcmIUxPYOJvGKF0yryjN/L1aCuE9DViCTJI7bfkjgYDPD1zbNDcINJwSSP6UaBZY9GAbYDO7re0Q==} + + '@ctrl/tinycolor@4.2.0': + resolution: {integrity: sha512-kzyuwOAQnXJNLS9PSyrk0CWk35nWJW/zl/6KvnTBMFK65gm7U1/Z5BqjxeapjZCIhQcM/DsrEmcbRwDyXyXK4A==} + engines: {node: '>=14'} + + '@emnapi/runtime@1.8.1': + resolution: {integrity: sha512-mehfKSMWjjNol8659Z8KxEMrdSJDDot5SXMq00dM8BN4o+CLNXQ0xH2V7EchNHV4RmbZLmmPdEaXZc5H2FXmDg==} + + '@esbuild/aix-ppc64@0.25.12': + resolution: {integrity: sha512-Hhmwd6CInZ3dwpuGTF8fJG6yoWmsToE+vYgD4nytZVxcu1ulHpUQRAB1UJ8+N1Am3Mz4+xOByoQoSZf4D+CpkA==} + engines: {node: '>=18'} + cpu: [ppc64] + os: [aix] + + '@esbuild/android-arm64@0.25.12': + resolution: {integrity: sha512-6AAmLG7zwD1Z159jCKPvAxZd4y/VTO0VkprYy+3N2FtJ8+BQWFXU+OxARIwA46c5tdD9SsKGZ/1ocqBS/gAKHg==} + engines: {node: '>=18'} + cpu: [arm64] + os: [android] + + '@esbuild/android-arm@0.25.12': + resolution: {integrity: sha512-VJ+sKvNA/GE7Ccacc9Cha7bpS8nyzVv0jdVgwNDaR4gDMC/2TTRc33Ip8qrNYUcpkOHUT5OZ0bUcNNVZQ9RLlg==} + engines: {node: '>=18'} + cpu: [arm] + os: [android] + + '@esbuild/android-x64@0.25.12': + resolution: {integrity: sha512-5jbb+2hhDHx5phYR2By8GTWEzn6I9UqR11Kwf22iKbNpYrsmRB18aX/9ivc5cabcUiAT/wM+YIZ6SG9QO6a8kg==} + engines: {node: '>=18'} + cpu: [x64] + os: [android] + + '@esbuild/darwin-arm64@0.25.12': + resolution: {integrity: sha512-N3zl+lxHCifgIlcMUP5016ESkeQjLj/959RxxNYIthIg+CQHInujFuXeWbWMgnTo4cp5XVHqFPmpyu9J65C1Yg==} + engines: {node: '>=18'} + cpu: [arm64] + os: [darwin] + + '@esbuild/darwin-x64@0.25.12': + resolution: {integrity: sha512-HQ9ka4Kx21qHXwtlTUVbKJOAnmG1ipXhdWTmNXiPzPfWKpXqASVcWdnf2bnL73wgjNrFXAa3yYvBSd9pzfEIpA==} + engines: {node: '>=18'} + cpu: [x64] + os: [darwin] + + '@esbuild/freebsd-arm64@0.25.12': + resolution: {integrity: sha512-gA0Bx759+7Jve03K1S0vkOu5Lg/85dou3EseOGUes8flVOGxbhDDh/iZaoek11Y8mtyKPGF3vP8XhnkDEAmzeg==} + engines: {node: '>=18'} + cpu: [arm64] + os: [freebsd] + + '@esbuild/freebsd-x64@0.25.12': + resolution: {integrity: sha512-TGbO26Yw2xsHzxtbVFGEXBFH0FRAP7gtcPE7P5yP7wGy7cXK2oO7RyOhL5NLiqTlBh47XhmIUXuGciXEqYFfBQ==} + engines: {node: '>=18'} + cpu: [x64] + os: [freebsd] + + '@esbuild/linux-arm64@0.25.12': + resolution: {integrity: sha512-8bwX7a8FghIgrupcxb4aUmYDLp8pX06rGh5HqDT7bB+8Rdells6mHvrFHHW2JAOPZUbnjUpKTLg6ECyzvas2AQ==} + engines: {node: '>=18'} + cpu: [arm64] + os: [linux] + + '@esbuild/linux-arm@0.25.12': + resolution: {integrity: sha512-lPDGyC1JPDou8kGcywY0YILzWlhhnRjdof3UlcoqYmS9El818LLfJJc3PXXgZHrHCAKs/Z2SeZtDJr5MrkxtOw==} + engines: {node: '>=18'} + cpu: [arm] + os: [linux] + + '@esbuild/linux-ia32@0.25.12': + resolution: {integrity: sha512-0y9KrdVnbMM2/vG8KfU0byhUN+EFCny9+8g202gYqSSVMonbsCfLjUO+rCci7pM0WBEtz+oK/PIwHkzxkyharA==} + engines: {node: '>=18'} + cpu: [ia32] + os: [linux] + + '@esbuild/linux-loong64@0.25.12': + resolution: {integrity: sha512-h///Lr5a9rib/v1GGqXVGzjL4TMvVTv+s1DPoxQdz7l/AYv6LDSxdIwzxkrPW438oUXiDtwM10o9PmwS/6Z0Ng==} + engines: {node: '>=18'} + cpu: [loong64] + os: [linux] + + '@esbuild/linux-mips64el@0.25.12': + resolution: {integrity: sha512-iyRrM1Pzy9GFMDLsXn1iHUm18nhKnNMWscjmp4+hpafcZjrr2WbT//d20xaGljXDBYHqRcl8HnxbX6uaA/eGVw==} + engines: {node: '>=18'} + cpu: [mips64el] + os: [linux] + + '@esbuild/linux-ppc64@0.25.12': + resolution: {integrity: sha512-9meM/lRXxMi5PSUqEXRCtVjEZBGwB7P/D4yT8UG/mwIdze2aV4Vo6U5gD3+RsoHXKkHCfSxZKzmDssVlRj1QQA==} + engines: {node: '>=18'} + cpu: [ppc64] + os: [linux] + + '@esbuild/linux-riscv64@0.25.12': + resolution: {integrity: sha512-Zr7KR4hgKUpWAwb1f3o5ygT04MzqVrGEGXGLnj15YQDJErYu/BGg+wmFlIDOdJp0PmB0lLvxFIOXZgFRrdjR0w==} + engines: {node: '>=18'} + cpu: [riscv64] + os: [linux] + + '@esbuild/linux-s390x@0.25.12': + resolution: {integrity: sha512-MsKncOcgTNvdtiISc/jZs/Zf8d0cl/t3gYWX8J9ubBnVOwlk65UIEEvgBORTiljloIWnBzLs4qhzPkJcitIzIg==} + engines: {node: '>=18'} + cpu: [s390x] + os: [linux] + + '@esbuild/linux-x64@0.25.12': + resolution: {integrity: sha512-uqZMTLr/zR/ed4jIGnwSLkaHmPjOjJvnm6TVVitAa08SLS9Z0VM8wIRx7gWbJB5/J54YuIMInDquWyYvQLZkgw==} + engines: {node: '>=18'} + cpu: [x64] + os: [linux] + + '@esbuild/netbsd-arm64@0.25.12': + resolution: {integrity: sha512-xXwcTq4GhRM7J9A8Gv5boanHhRa/Q9KLVmcyXHCTaM4wKfIpWkdXiMog/KsnxzJ0A1+nD+zoecuzqPmCRyBGjg==} + engines: {node: '>=18'} + cpu: [arm64] + os: [netbsd] + + '@esbuild/netbsd-x64@0.25.12': + resolution: {integrity: sha512-Ld5pTlzPy3YwGec4OuHh1aCVCRvOXdH8DgRjfDy/oumVovmuSzWfnSJg+VtakB9Cm0gxNO9BzWkj6mtO1FMXkQ==} + engines: {node: '>=18'} + cpu: [x64] + os: [netbsd] + + '@esbuild/openbsd-arm64@0.25.12': + resolution: {integrity: sha512-fF96T6KsBo/pkQI950FARU9apGNTSlZGsv1jZBAlcLL1MLjLNIWPBkj5NlSz8aAzYKg+eNqknrUJ24QBybeR5A==} + engines: {node: '>=18'} + cpu: [arm64] + os: [openbsd] + + '@esbuild/openbsd-x64@0.25.12': + resolution: {integrity: sha512-MZyXUkZHjQxUvzK7rN8DJ3SRmrVrke8ZyRusHlP+kuwqTcfWLyqMOE3sScPPyeIXN/mDJIfGXvcMqCgYKekoQw==} + engines: {node: '>=18'} + cpu: [x64] + os: [openbsd] + + '@esbuild/openharmony-arm64@0.25.12': + resolution: {integrity: sha512-rm0YWsqUSRrjncSXGA7Zv78Nbnw4XL6/dzr20cyrQf7ZmRcsovpcRBdhD43Nuk3y7XIoW2OxMVvwuRvk9XdASg==} + engines: {node: '>=18'} + cpu: [arm64] + os: [openharmony] + + '@esbuild/sunos-x64@0.25.12': + resolution: {integrity: sha512-3wGSCDyuTHQUzt0nV7bocDy72r2lI33QL3gkDNGkod22EsYl04sMf0qLb8luNKTOmgF/eDEDP5BFNwoBKH441w==} + engines: {node: '>=18'} + cpu: [x64] + os: [sunos] + + '@esbuild/win32-arm64@0.25.12': + resolution: {integrity: sha512-rMmLrur64A7+DKlnSuwqUdRKyd3UE7oPJZmnljqEptesKM8wx9J8gx5u0+9Pq0fQQW8vqeKebwNXdfOyP+8Bsg==} + engines: {node: '>=18'} + cpu: [arm64] + os: [win32] + + '@esbuild/win32-ia32@0.25.12': + resolution: {integrity: sha512-HkqnmmBoCbCwxUKKNPBixiWDGCpQGVsrQfJoVGYLPT41XWF8lHuE5N6WhVia2n4o5QK5M4tYr21827fNhi4byQ==} + engines: {node: '>=18'} + cpu: [ia32] + os: [win32] + + '@esbuild/win32-x64@0.25.12': + resolution: {integrity: sha512-alJC0uCZpTFrSL0CCDjcgleBXPnCrEAhTBILpeAp7M/OFgoqtAetfBzX0xM00MUsVVPpVjlPuMbREqnZCXaTnA==} + engines: {node: '>=18'} + cpu: [x64] + os: [win32] + + '@expressive-code/core@0.41.6': + resolution: {integrity: sha512-FvJQP+hG0jWi/FLBSmvHInDqWR7jNANp9PUDjdMqSshHb0y7sxx3vHuoOr6SgXjWw+MGLqorZyPQ0aAlHEok6g==} + + '@expressive-code/plugin-frames@0.41.6': + resolution: {integrity: sha512-d+hkSYXIQot6fmYnOmWAM+7TNWRv/dhfjMsNq+mIZz8Tb4mPHOcgcfZeEM5dV9TDL0ioQNvtcqQNuzA1sRPjxg==} + + '@expressive-code/plugin-shiki@0.41.6': + resolution: {integrity: sha512-Y6zmKBmsIUtWTzdefqlzm/h9Zz0Rc4gNdt2GTIH7fhHH2I9+lDYCa27BDwuBhjqcos6uK81Aca9dLUC4wzN+ng==} + + '@expressive-code/plugin-text-markers@0.41.6': + resolution: {integrity: sha512-PBFa1wGyYzRExMDzBmAWC6/kdfG1oLn4pLpBeTfIRrALPjcGA/59HP3e7q9J0Smk4pC7U+lWkA2LHR8FYV8U7Q==} + + '@img/sharp-darwin-arm64@0.34.2': + resolution: {integrity: sha512-OfXHZPppddivUJnqyKoi5YVeHRkkNE2zUFT2gbpKxp/JZCFYEYubnMg+gOp6lWfasPrTS+KPosKqdI+ELYVDtg==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [darwin] + + '@img/sharp-darwin-x64@0.34.2': + resolution: {integrity: sha512-dYvWqmjU9VxqXmjEtjmvHnGqF8GrVjM2Epj9rJ6BUIXvk8slvNDJbhGFvIoXzkDhrJC2jUxNLz/GUjjvSzfw+g==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [darwin] + + '@img/sharp-libvips-darwin-arm64@1.1.0': + resolution: {integrity: sha512-HZ/JUmPwrJSoM4DIQPv/BfNh9yrOA8tlBbqbLz4JZ5uew2+o22Ik+tHQJcih7QJuSa0zo5coHTfD5J8inqj9DA==} + cpu: [arm64] + os: [darwin] + + '@img/sharp-libvips-darwin-x64@1.1.0': + resolution: {integrity: sha512-Xzc2ToEmHN+hfvsl9wja0RlnXEgpKNmftriQp6XzY/RaSfwD9th+MSh0WQKzUreLKKINb3afirxW7A0fz2YWuQ==} + cpu: [x64] + os: [darwin] + + '@img/sharp-libvips-linux-arm64@1.1.0': + resolution: {integrity: sha512-IVfGJa7gjChDET1dK9SekxFFdflarnUB8PwW8aGwEoF3oAsSDuNUTYS+SKDOyOJxQyDC1aPFMuRYLoDInyV9Ew==} + cpu: [arm64] + os: [linux] + + '@img/sharp-libvips-linux-arm@1.1.0': + resolution: {integrity: sha512-s8BAd0lwUIvYCJyRdFqvsj+BJIpDBSxs6ivrOPm/R7piTs5UIwY5OjXrP2bqXC9/moGsyRa37eYWYCOGVXxVrA==} + cpu: [arm] + os: [linux] + + '@img/sharp-libvips-linux-ppc64@1.1.0': + resolution: {integrity: sha512-tiXxFZFbhnkWE2LA8oQj7KYR+bWBkiV2nilRldT7bqoEZ4HiDOcePr9wVDAZPi/Id5fT1oY9iGnDq20cwUz8lQ==} + cpu: [ppc64] + os: [linux] + + '@img/sharp-libvips-linux-s390x@1.1.0': + resolution: {integrity: sha512-xukSwvhguw7COyzvmjydRb3x/09+21HykyapcZchiCUkTThEQEOMtBj9UhkaBRLuBrgLFzQ2wbxdeCCJW/jgJA==} + cpu: [s390x] + os: [linux] + + '@img/sharp-libvips-linux-x64@1.1.0': + resolution: {integrity: sha512-yRj2+reB8iMg9W5sULM3S74jVS7zqSzHG3Ol/twnAAkAhnGQnpjj6e4ayUz7V+FpKypwgs82xbRdYtchTTUB+Q==} + cpu: [x64] + os: [linux] + + '@img/sharp-libvips-linuxmusl-arm64@1.1.0': + resolution: {integrity: sha512-jYZdG+whg0MDK+q2COKbYidaqW/WTz0cc1E+tMAusiDygrM4ypmSCjOJPmFTvHHJ8j/6cAGyeDWZOsK06tP33w==} + cpu: [arm64] + os: [linux] + + '@img/sharp-libvips-linuxmusl-x64@1.1.0': + resolution: {integrity: sha512-wK7SBdwrAiycjXdkPnGCPLjYb9lD4l6Ze2gSdAGVZrEL05AOUJESWU2lhlC+Ffn5/G+VKuSm6zzbQSzFX/P65A==} + cpu: [x64] + os: [linux] + + '@img/sharp-linux-arm64@0.34.2': + resolution: {integrity: sha512-D8n8wgWmPDakc83LORcfJepdOSN6MvWNzzz2ux0MnIbOqdieRZwVYY32zxVx+IFUT8er5KPcyU3XXsn+GzG/0Q==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [linux] + + '@img/sharp-linux-arm@0.34.2': + resolution: {integrity: sha512-0DZzkvuEOqQUP9mo2kjjKNok5AmnOr1jB2XYjkaoNRwpAYMDzRmAqUIa1nRi58S2WswqSfPOWLNOr0FDT3H5RQ==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm] + os: [linux] + + '@img/sharp-linux-s390x@0.34.2': + resolution: {integrity: sha512-EGZ1xwhBI7dNISwxjChqBGELCWMGDvmxZXKjQRuqMrakhO8QoMgqCrdjnAqJq/CScxfRn+Bb7suXBElKQpPDiw==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [s390x] + os: [linux] + + '@img/sharp-linux-x64@0.34.2': + resolution: {integrity: sha512-sD7J+h5nFLMMmOXYH4DD9UtSNBD05tWSSdWAcEyzqW8Cn5UxXvsHAxmxSesYUsTOBmUnjtxghKDl15EvfqLFbQ==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [linux] + + '@img/sharp-linuxmusl-arm64@0.34.2': + resolution: {integrity: sha512-NEE2vQ6wcxYav1/A22OOxoSOGiKnNmDzCYFOZ949xFmrWZOVII1Bp3NqVVpvj+3UeHMFyN5eP/V5hzViQ5CZNA==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [linux] + + '@img/sharp-linuxmusl-x64@0.34.2': + resolution: {integrity: sha512-DOYMrDm5E6/8bm/yQLCWyuDJwUnlevR8xtF8bs+gjZ7cyUNYXiSf/E8Kp0Ss5xasIaXSHzb888V1BE4i1hFhAA==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [linux] + + '@img/sharp-wasm32@0.34.2': + resolution: {integrity: sha512-/VI4mdlJ9zkaq53MbIG6rZY+QRN3MLbR6usYlgITEzi4Rpx5S6LFKsycOQjkOGmqTNmkIdLjEvooFKwww6OpdQ==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [wasm32] + + '@img/sharp-win32-arm64@0.34.2': + resolution: {integrity: sha512-cfP/r9FdS63VA5k0xiqaNaEoGxBg9k7uE+RQGzuK9fHt7jib4zAVVseR9LsE4gJcNWgT6APKMNnCcnyOtmSEUQ==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [win32] + + '@img/sharp-win32-ia32@0.34.2': + resolution: {integrity: sha512-QLjGGvAbj0X/FXl8n1WbtQ6iVBpWU7JO94u/P2M4a8CFYsvQi4GW2mRy/JqkRx0qpBzaOdKJKw8uc930EX2AHw==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [ia32] + os: [win32] + + '@img/sharp-win32-x64@0.34.2': + resolution: {integrity: sha512-aUdT6zEYtDKCaxkofmmJDJYGCf0+pJg3eU9/oBuqvEeoB9dKI6ZLc/1iLJCTuJQDO4ptntAlkUmHgGjyuobZbw==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [win32] + + '@jridgewell/sourcemap-codec@1.5.5': + resolution: {integrity: sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==} + + '@mdx-js/mdx@3.1.1': + resolution: {integrity: sha512-f6ZO2ifpwAQIpzGWaBQT2TXxPv6z3RBzQKpVftEWN78Vl/YweF1uwussDx8ECAXVtr3Rs89fKyG9YlzUs9DyGQ==} + + '@oslojs/encoding@1.1.0': + resolution: {integrity: sha512-70wQhgYmndg4GCPxPPxPGevRKqTIJ2Nh4OkiMWmDAVYsTQ+Ta7Sq+rPevXyXGdzr30/qZBnyOalCszoMxlyldQ==} + + '@pagefind/darwin-arm64@1.4.0': + resolution: {integrity: sha512-2vMqkbv3lbx1Awea90gTaBsvpzgRs7MuSgKDxW0m9oV1GPZCZbZBJg/qL83GIUEN2BFlY46dtUZi54pwH+/pTQ==} + cpu: [arm64] + os: [darwin] + + '@pagefind/darwin-x64@1.4.0': + resolution: {integrity: sha512-e7JPIS6L9/cJfow+/IAqknsGqEPjJnVXGjpGm25bnq+NPdoD3c/7fAwr1OXkG4Ocjx6ZGSCijXEV4ryMcH2E3A==} + cpu: [x64] + os: [darwin] + + '@pagefind/default-ui@1.4.0': + resolution: {integrity: sha512-wie82VWn3cnGEdIjh4YwNESyS1G6vRHwL6cNjy9CFgNnWW/PGRjsLq300xjVH5sfPFK3iK36UxvIBymtQIEiSQ==} + + '@pagefind/freebsd-x64@1.4.0': + resolution: {integrity: sha512-WcJVypXSZ+9HpiqZjFXMUobfFfZZ6NzIYtkhQ9eOhZrQpeY5uQFqNWLCk7w9RkMUwBv1HAMDW3YJQl/8OqsV0Q==} + cpu: [x64] + os: [freebsd] + + '@pagefind/linux-arm64@1.4.0': + resolution: {integrity: sha512-PIt8dkqt4W06KGmQjONw7EZbhDF+uXI7i0XtRLN1vjCUxM9vGPdtJc2mUyVPevjomrGz5M86M8bqTr6cgDp1Uw==} + cpu: [arm64] + os: [linux] + + '@pagefind/linux-x64@1.4.0': + resolution: {integrity: sha512-z4oddcWwQ0UHrTHR8psLnVlz6USGJ/eOlDPTDYZ4cI8TK8PgwRUPQZp9D2iJPNIPcS6Qx/E4TebjuGJOyK8Mmg==} + cpu: [x64] + os: [linux] + + '@pagefind/windows-x64@1.4.0': + resolution: {integrity: sha512-NkT+YAdgS2FPCn8mIA9bQhiBs+xmniMGq1LFPDhcFn0+2yIUEiIG06t7bsZlhdjknEQRTSdT7YitP6fC5qwP0g==} + cpu: [x64] + os: [win32] + + '@rollup/pluginutils@5.3.0': + resolution: {integrity: sha512-5EdhGZtnu3V88ces7s53hhfK5KSASnJZv8Lulpc04cWO3REESroJXg73DFsOmgbU2BhwV0E20bu2IDZb3VKW4Q==} + engines: {node: '>=14.0.0'} + peerDependencies: + rollup: ^1.20.0||^2.0.0||^3.0.0||^4.0.0 + peerDependenciesMeta: + rollup: + optional: true + + '@rollup/rollup-android-arm-eabi@4.57.1': + resolution: {integrity: sha512-A6ehUVSiSaaliTxai040ZpZ2zTevHYbvu/lDoeAteHI8QnaosIzm4qwtezfRg1jOYaUmnzLX1AOD6Z+UJjtifg==} + cpu: [arm] + os: [android] + + '@rollup/rollup-android-arm64@4.57.1': + resolution: {integrity: sha512-dQaAddCY9YgkFHZcFNS/606Exo8vcLHwArFZ7vxXq4rigo2bb494/xKMMwRRQW6ug7Js6yXmBZhSBRuBvCCQ3w==} + cpu: [arm64] + os: [android] + + '@rollup/rollup-darwin-arm64@4.57.1': + resolution: {integrity: sha512-crNPrwJOrRxagUYeMn/DZwqN88SDmwaJ8Cvi/TN1HnWBU7GwknckyosC2gd0IqYRsHDEnXf328o9/HC6OkPgOg==} + cpu: [arm64] + os: [darwin] + + '@rollup/rollup-darwin-x64@4.57.1': + resolution: {integrity: sha512-Ji8g8ChVbKrhFtig5QBV7iMaJrGtpHelkB3lsaKzadFBe58gmjfGXAOfI5FV0lYMH8wiqsxKQ1C9B0YTRXVy4w==} + cpu: [x64] + os: [darwin] + + '@rollup/rollup-freebsd-arm64@4.57.1': + resolution: {integrity: sha512-R+/WwhsjmwodAcz65guCGFRkMb4gKWTcIeLy60JJQbXrJ97BOXHxnkPFrP+YwFlaS0m+uWJTstrUA9o+UchFug==} + cpu: [arm64] + os: [freebsd] + + '@rollup/rollup-freebsd-x64@4.57.1': + resolution: {integrity: sha512-IEQTCHeiTOnAUC3IDQdzRAGj3jOAYNr9kBguI7MQAAZK3caezRrg0GxAb6Hchg4lxdZEI5Oq3iov/w/hnFWY9Q==} + cpu: [x64] + os: [freebsd] + + '@rollup/rollup-linux-arm-gnueabihf@4.57.1': + resolution: {integrity: sha512-F8sWbhZ7tyuEfsmOxwc2giKDQzN3+kuBLPwwZGyVkLlKGdV1nvnNwYD0fKQ8+XS6hp9nY7B+ZeK01EBUE7aHaw==} + cpu: [arm] + os: [linux] + + '@rollup/rollup-linux-arm-musleabihf@4.57.1': + resolution: {integrity: sha512-rGfNUfn0GIeXtBP1wL5MnzSj98+PZe/AXaGBCRmT0ts80lU5CATYGxXukeTX39XBKsxzFpEeK+Mrp9faXOlmrw==} + cpu: [arm] + os: [linux] + + '@rollup/rollup-linux-arm64-gnu@4.57.1': + resolution: {integrity: sha512-MMtej3YHWeg/0klK2Qodf3yrNzz6CGjo2UntLvk2RSPlhzgLvYEB3frRvbEF2wRKh1Z2fDIg9KRPe1fawv7C+g==} + cpu: [arm64] + os: [linux] + + '@rollup/rollup-linux-arm64-musl@4.57.1': + resolution: {integrity: sha512-1a/qhaaOXhqXGpMFMET9VqwZakkljWHLmZOX48R0I/YLbhdxr1m4gtG1Hq7++VhVUmf+L3sTAf9op4JlhQ5u1Q==} + cpu: [arm64] + os: [linux] + + '@rollup/rollup-linux-loong64-gnu@4.57.1': + resolution: {integrity: sha512-QWO6RQTZ/cqYtJMtxhkRkidoNGXc7ERPbZN7dVW5SdURuLeVU7lwKMpo18XdcmpWYd0qsP1bwKPf7DNSUinhvA==} + cpu: [loong64] + os: [linux] + + '@rollup/rollup-linux-loong64-musl@4.57.1': + resolution: {integrity: sha512-xpObYIf+8gprgWaPP32xiN5RVTi/s5FCR+XMXSKmhfoJjrpRAjCuuqQXyxUa/eJTdAE6eJ+KDKaoEqjZQxh3Gw==} + cpu: [loong64] + os: [linux] + + '@rollup/rollup-linux-ppc64-gnu@4.57.1': + resolution: {integrity: sha512-4BrCgrpZo4hvzMDKRqEaW1zeecScDCR+2nZ86ATLhAoJ5FQ+lbHVD3ttKe74/c7tNT9c6F2viwB3ufwp01Oh2w==} + cpu: [ppc64] + os: [linux] + + '@rollup/rollup-linux-ppc64-musl@4.57.1': + resolution: {integrity: sha512-NOlUuzesGauESAyEYFSe3QTUguL+lvrN1HtwEEsU2rOwdUDeTMJdO5dUYl/2hKf9jWydJrO9OL/XSSf65R5+Xw==} + cpu: [ppc64] + os: [linux] + + '@rollup/rollup-linux-riscv64-gnu@4.57.1': + resolution: {integrity: sha512-ptA88htVp0AwUUqhVghwDIKlvJMD/fmL/wrQj99PRHFRAG6Z5nbWoWG4o81Nt9FT+IuqUQi+L31ZKAFeJ5Is+A==} + cpu: [riscv64] + os: [linux] + + '@rollup/rollup-linux-riscv64-musl@4.57.1': + resolution: {integrity: sha512-S51t7aMMTNdmAMPpBg7OOsTdn4tySRQvklmL3RpDRyknk87+Sp3xaumlatU+ppQ+5raY7sSTcC2beGgvhENfuw==} + cpu: [riscv64] + os: [linux] + + '@rollup/rollup-linux-s390x-gnu@4.57.1': + resolution: {integrity: sha512-Bl00OFnVFkL82FHbEqy3k5CUCKH6OEJL54KCyx2oqsmZnFTR8IoNqBF+mjQVcRCT5sB6yOvK8A37LNm/kPJiZg==} + cpu: [s390x] + os: [linux] + + '@rollup/rollup-linux-x64-gnu@4.57.1': + resolution: {integrity: sha512-ABca4ceT4N+Tv/GtotnWAeXZUZuM/9AQyCyKYyKnpk4yoA7QIAuBt6Hkgpw8kActYlew2mvckXkvx0FfoInnLg==} + cpu: [x64] + os: [linux] + + '@rollup/rollup-linux-x64-musl@4.57.1': + resolution: {integrity: sha512-HFps0JeGtuOR2convgRRkHCekD7j+gdAuXM+/i6kGzQtFhlCtQkpwtNzkNj6QhCDp7DRJ7+qC/1Vg2jt5iSOFw==} + cpu: [x64] + os: [linux] + + '@rollup/rollup-openbsd-x64@4.57.1': + resolution: {integrity: sha512-H+hXEv9gdVQuDTgnqD+SQffoWoc0Of59AStSzTEj/feWTBAnSfSD3+Dql1ZruJQxmykT/JVY0dE8Ka7z0DH1hw==} + cpu: [x64] + os: [openbsd] + + '@rollup/rollup-openharmony-arm64@4.57.1': + resolution: {integrity: sha512-4wYoDpNg6o/oPximyc/NG+mYUejZrCU2q+2w6YZqrAs2UcNUChIZXjtafAiiZSUc7On8v5NyNj34Kzj/Ltk6dQ==} + cpu: [arm64] + os: [openharmony] + + '@rollup/rollup-win32-arm64-msvc@4.57.1': + resolution: {integrity: sha512-O54mtsV/6LW3P8qdTcamQmuC990HDfR71lo44oZMZlXU4tzLrbvTii87Ni9opq60ds0YzuAlEr/GNwuNluZyMQ==} + cpu: [arm64] + os: [win32] + + '@rollup/rollup-win32-ia32-msvc@4.57.1': + resolution: {integrity: sha512-P3dLS+IerxCT/7D2q2FYcRdWRl22dNbrbBEtxdWhXrfIMPP9lQhb5h4Du04mdl5Woq05jVCDPCMF7Ub0NAjIew==} + cpu: [ia32] + os: [win32] + + '@rollup/rollup-win32-x64-gnu@4.57.1': + resolution: {integrity: sha512-VMBH2eOOaKGtIJYleXsi2B8CPVADrh+TyNxJ4mWPnKfLB/DBUmzW+5m1xUrcwWoMfSLagIRpjUFeW5CO5hyciQ==} + cpu: [x64] + os: [win32] + + '@rollup/rollup-win32-x64-msvc@4.57.1': + resolution: {integrity: sha512-mxRFDdHIWRxg3UfIIAwCm6NzvxG0jDX/wBN6KsQFTvKFqqg9vTrWUE68qEjHt19A5wwx5X5aUi2zuZT7YR0jrA==} + cpu: [x64] + os: [win32] + + '@shikijs/core@3.22.0': + resolution: {integrity: sha512-iAlTtSDDbJiRpvgL5ugKEATDtHdUVkqgHDm/gbD2ZS9c88mx7G1zSYjjOxp5Qa0eaW0MAQosFRmJSk354PRoQA==} + + '@shikijs/engine-javascript@3.22.0': + resolution: {integrity: sha512-jdKhfgW9CRtj3Tor0L7+yPwdG3CgP7W+ZEqSsojrMzCjD1e0IxIbwUMDDpYlVBlC08TACg4puwFGkZfLS+56Tw==} + + '@shikijs/engine-oniguruma@3.22.0': + resolution: {integrity: sha512-DyXsOG0vGtNtl7ygvabHd7Mt5EY8gCNqR9Y7Lpbbd/PbJvgWrqaKzH1JW6H6qFkuUa8aCxoiYVv8/YfFljiQxA==} + + '@shikijs/langs@3.22.0': + resolution: {integrity: sha512-x/42TfhWmp6H00T6uwVrdTJGKgNdFbrEdhaDwSR5fd5zhQ1Q46bHq9EO61SCEWJR0HY7z2HNDMaBZp8JRmKiIA==} + + '@shikijs/themes@3.22.0': + resolution: {integrity: sha512-o+tlOKqsr6FE4+mYJG08tfCFDS+3CG20HbldXeVoyP+cYSUxDhrFf3GPjE60U55iOkkjbpY2uC3It/eeja35/g==} + + '@shikijs/types@3.22.0': + resolution: {integrity: sha512-491iAekgKDBFE67z70Ok5a8KBMsQ2IJwOWw3us/7ffQkIBCyOQfm/aNwVMBUriP02QshIfgHCBSIYAl3u2eWjg==} + + '@shikijs/vscode-textmate@10.0.2': + resolution: {integrity: sha512-83yeghZ2xxin3Nj8z1NMd/NCuca+gsYXswywDy5bHvwlWL8tpTQmzGeUuHd9FC3E/SBEMvzJRwWEOz5gGes9Qg==} + + '@swc/helpers@0.5.18': + resolution: {integrity: sha512-TXTnIcNJQEKwThMMqBXsZ4VGAza6bvN4pa41Rkqoio6QBKMvo+5lexeTMScGCIxtzgQJzElcvIltani+adC5PQ==} + + '@types/debug@4.1.12': + resolution: {integrity: sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==} + + '@types/estree-jsx@1.0.5': + resolution: {integrity: sha512-52CcUVNFyfb1A2ALocQw/Dd1BQFNmSdkuC3BkZ6iqhdMfQz7JWOFRuJFloOzjk+6WijU56m9oKXFAXc7o3Towg==} + + '@types/estree@1.0.8': + resolution: {integrity: sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==} + + '@types/fontkit@2.0.8': + resolution: {integrity: sha512-wN+8bYxIpJf+5oZdrdtaX04qUuWHcKxcDEgRS9Qm9ZClSHjzEn13SxUC+5eRM+4yXIeTYk8mTzLAWGF64847ew==} + + '@types/hast@3.0.4': + resolution: {integrity: sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==} + + '@types/js-yaml@4.0.9': + resolution: {integrity: sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg==} + + '@types/mdast@4.0.4': + resolution: {integrity: sha512-kGaNbPh1k7AFzgpud/gMdvIm5xuECykRR+JnWKQno9TAXVa6WIVCGTPvYGekIDL4uwCZQSYbUxNBSb1aUo79oA==} + + '@types/mdx@2.0.13': + resolution: {integrity: sha512-+OWZQfAYyio6YkJb3HLxDrvnx6SWWDbC0zVPfBRzUk0/nqoDyf6dNxQi3eArPe8rJ473nobTMQ/8Zk+LxJ+Yuw==} + + '@types/ms@2.1.0': + resolution: {integrity: sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA==} + + '@types/nlcst@2.0.3': + resolution: {integrity: sha512-vSYNSDe6Ix3q+6Z7ri9lyWqgGhJTmzRjZRqyq15N0Z/1/UnVsno9G/N40NBijoYx2seFDIl0+B2mgAb9mezUCA==} + + '@types/node@17.0.45': + resolution: {integrity: sha512-w+tIMs3rq2afQdsPJlODhoUEKzFP1ayaoyl1CcnwtIlsVe7K7bA1NGm4s3PraqTLlXnbIN84zuBlxBWo1u9BLw==} + + '@types/node@25.2.3': + resolution: {integrity: sha512-m0jEgYlYz+mDJZ2+F4v8D1AyQb+QzsNqRuI7xg1VQX/KlKS0qT9r1Mo16yo5F/MtifXFgaofIFsdFMox2SxIbQ==} + + '@types/sax@1.2.7': + resolution: {integrity: sha512-rO73L89PJxeYM3s3pPPjiPgVVcymqU490g0YO5n5By0k2Erzj6tay/4lr1CHAAU4JyOWd1rpQ8bCf6cZfHU96A==} + + '@types/unist@2.0.11': + resolution: {integrity: sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==} + + '@types/unist@3.0.3': + resolution: {integrity: sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q==} + + '@ungap/structured-clone@1.3.0': + resolution: {integrity: sha512-WmoN8qaIAo7WTYWbAZuG8PYEhn5fkz7dZrqTBZ7dtt//lL2Gwms1IcnQ5yHqjDfX8Ft5j4YzDM23f87zBfDe9g==} + + acorn-jsx@5.3.2: + resolution: {integrity: sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==} + peerDependencies: + acorn: ^6.0.0 || ^7.0.0 || ^8.0.0 + + acorn@8.15.0: + resolution: {integrity: sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==} + engines: {node: '>=0.4.0'} + hasBin: true + + ansi-align@3.0.1: + resolution: {integrity: sha512-IOfwwBF5iczOjp/WeY4YxyjqAFMQoZufdQWDd19SEExbVLNXqvpzSJ/M7Za4/sCPmQ0+GRquoA7bGcINcxew6w==} + + ansi-regex@5.0.1: + resolution: {integrity: sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==} + engines: {node: '>=8'} + + ansi-regex@6.2.2: + resolution: {integrity: sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==} + engines: {node: '>=12'} + + ansi-styles@6.2.3: + resolution: {integrity: sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==} + engines: {node: '>=12'} + + anymatch@3.1.3: + resolution: {integrity: sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==} + engines: {node: '>= 8'} + + arg@5.0.2: + resolution: {integrity: sha512-PYjyFOLKQ9y57JvQ6QLo8dAgNqswh8M1RMJYdQduT6xbWSgK36P/Z/v+p888pM69jMMfS8Xd8F6I1kQ/I9HUGg==} + + argparse@2.0.1: + resolution: {integrity: sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==} + + aria-query@5.3.2: + resolution: {integrity: sha512-COROpnaoap1E2F000S62r6A60uHZnmlvomhfyT2DlTcrY1OrBKn2UhH7qn5wTC9zMvD0AY7csdPSNwKP+7WiQw==} + engines: {node: '>= 0.4'} + + array-iterate@2.0.1: + resolution: {integrity: sha512-I1jXZMjAgCMmxT4qxXfPXa6SthSoE8h6gkSI9BGGNv8mP8G/v0blc+qFnZu6K42vTOiuME596QaLO0TP3Lk0xg==} + + astring@1.9.0: + resolution: {integrity: sha512-LElXdjswlqjWrPpJFg1Fx4wpkOCxj1TDHlSV4PlaRxHGWko024xICaa97ZkMfs6DRKlCguiAI+rbXv5GWwXIkg==} + hasBin: true + + astro-expressive-code@0.41.6: + resolution: {integrity: sha512-l47tb1uhmVIebHUkw+HEPtU/av0G4O8Q34g2cbkPvC7/e9ZhANcjUUciKt9Hp6gSVDdIuXBBLwJQn2LkeGMOAw==} + peerDependencies: + astro: ^4.0.0-beta || ^5.0.0-beta || ^3.3.0 || ^6.0.0-beta + + astro@5.14.1: + resolution: {integrity: sha512-gPa8NY7/lP8j8g81iy8UwANF3+aukKRWS68IlthZQNgykpg80ne6lbHOp6FErYycxQ1TUhgEfkXVDQZAoJx8Bg==} + engines: {node: 18.20.8 || ^20.3.0 || >=22.0.0, npm: '>=9.6.5', pnpm: '>=7.1.0'} + hasBin: true + + axobject-query@4.1.0: + resolution: {integrity: sha512-qIj0G9wZbMGNLjLmg1PT6v2mE9AH2zlnADJD/2tC6E00hgmhUOfEB6greHPAfLRSufHqROIUTkw6E+M3lH0PTQ==} + engines: {node: '>= 0.4'} + + bail@2.0.2: + resolution: {integrity: sha512-0xO6mYd7JB2YesxDKplafRpsiOzPt9V02ddPCLbY1xYGPOX24NTyN50qnUxgCPcSoYMhKpAuBTjQoRZCAkUDRw==} + + base-64@1.0.0: + resolution: {integrity: sha512-kwDPIFCGx0NZHog36dj+tHiwP4QMzsZ3AgMViUBKI0+V5n4U0ufTCUMhnQ04diaRI8EX/QcPfql7zlhZ7j4zgg==} + + base64-js@1.5.1: + resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==} + + bcp-47-match@2.0.3: + resolution: {integrity: sha512-JtTezzbAibu8G0R9op9zb3vcWZd9JF6M0xOYGPn0fNCd7wOpRB1mU2mH9T8gaBGbAAyIIVgB2G7xG0GP98zMAQ==} + + bcp-47@2.1.0: + resolution: {integrity: sha512-9IIS3UPrvIa1Ej+lVDdDwO7zLehjqsaByECw0bu2RRGP73jALm6FYbzI5gWbgHLvNdkvfXB5YrSbocZdOS0c0w==} + + blob-to-buffer@1.2.9: + resolution: {integrity: sha512-BF033y5fN6OCofD3vgHmNtwZWRcq9NLyyxyILx9hfMy1sXYy4ojFl765hJ2lP0YaN2fuxPaLO2Vzzoxy0FLFFA==} + + boolbase@1.0.0: + resolution: {integrity: sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==} + + boxen@8.0.1: + resolution: {integrity: sha512-F3PH5k5juxom4xktynS7MoFY+NUWH5LC4CnH11YB8NPew+HLpmBLCybSAEyb2F+4pRXhuhWqFesoQd6DAyc2hw==} + engines: {node: '>=18'} + + brotli@1.3.3: + resolution: {integrity: sha512-oTKjJdShmDuGW94SyyaoQvAjf30dZaHnjJ8uAF+u2/vGJkJbJPJAT1gDiOJP5v1Zb6f9KEyW/1HpuaWIXtGHPg==} + + camelcase@8.0.0: + resolution: {integrity: sha512-8WB3Jcas3swSvjIeA2yvCJ+Miyz5l1ZmB6HFb9R1317dt9LCQoswg/BGrmAmkWVEszSrrg4RwmO46qIm2OEnSA==} + engines: {node: '>=16'} + + ccount@2.0.1: + resolution: {integrity: sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==} + + chalk@5.6.2: + resolution: {integrity: sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==} + engines: {node: ^12.17.0 || ^14.13 || >=16.0.0} + + character-entities-html4@2.1.0: + resolution: {integrity: sha512-1v7fgQRj6hnSwFpq1Eu0ynr/CDEw0rXo2B61qXrLNdHZmPKgb7fqS1a2JwF0rISo9q77jDI8VMEHoApn8qDoZA==} + + character-entities-legacy@3.0.0: + resolution: {integrity: sha512-RpPp0asT/6ufRm//AJVwpViZbGM/MkjQFxJccQRHmISF/22NBtsHqAWmL+/pmkPWoIUJdWyeVleTl1wydHATVQ==} + + character-entities@2.0.2: + resolution: {integrity: sha512-shx7oQ0Awen/BRIdkjkvz54PnEEI/EjwXDSIZp86/KKdbafHh1Df/RYGBhn4hbe2+uKC9FnT5UCEdyPz3ai9hQ==} + + character-reference-invalid@2.0.1: + resolution: {integrity: sha512-iBZ4F4wRbyORVsu0jPV7gXkOsGYjGHPmAyv+HiHG8gi5PtC9KI2j1+v8/tlibRvjoWX027ypmG/n0HtO5t7unw==} + + chokidar@5.0.0: + resolution: {integrity: sha512-TQMmc3w+5AxjpL8iIiwebF73dRDF4fBIieAqGn9RGCWaEVwQ6Fb2cGe31Yns0RRIzii5goJ1Y7xbMwo1TxMplw==} + engines: {node: '>= 20.19.0'} + + ci-info@4.4.0: + resolution: {integrity: sha512-77PSwercCZU2Fc4sX94eF8k8Pxte6JAwL4/ICZLFjJLqegs7kCuAsqqj/70NQF6TvDpgFjkubQB2FW2ZZddvQg==} + engines: {node: '>=8'} + + cli-boxes@3.0.0: + resolution: {integrity: sha512-/lzGpEWL/8PfI0BmBOPRwp0c/wFNX1RdUML3jK/RcSBA9T8mZDdQpqYBKtCFTOfQbwPqWEOpjqW+Fnayc0969g==} + engines: {node: '>=10'} + + clone@2.1.2: + resolution: {integrity: sha512-3Pe/CF1Nn94hyhIYpjtiLhdCoEoz0DqQ+988E9gmeEdQZlojxnOb74wctFyuwWQHzqyf9X7C7MG8juUpqBJT8w==} + engines: {node: '>=0.8'} + + clsx@2.1.1: + resolution: {integrity: sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==} + engines: {node: '>=6'} + + collapse-white-space@2.1.0: + resolution: {integrity: sha512-loKTxY1zCOuG4j9f6EPnuyyYkf58RnhhWTvRoZEokgB+WbdXehfjFviyOVYkqzEWz1Q5kRiZdBYS5SwxbQYwzw==} + + color-convert@2.0.1: + resolution: {integrity: sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==} + engines: {node: '>=7.0.0'} + + color-name@1.1.4: + resolution: {integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==} + + color-string@1.9.1: + resolution: {integrity: sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==} + + color@4.2.3: + resolution: {integrity: sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A==} + engines: {node: '>=12.5.0'} + + comma-separated-tokens@2.0.3: + resolution: {integrity: sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg==} + + common-ancestor-path@1.0.1: + resolution: {integrity: sha512-L3sHRo1pXXEqX8VU28kfgUY+YGsk09hPqZiZmLacNib6XNTCM8ubYeT7ryXQw8asB1sKgcU5lkB7ONug08aB8w==} + + cookie-es@1.2.2: + resolution: {integrity: sha512-+W7VmiVINB+ywl1HGXJXmrqkOhpKrIiVZV6tQuV54ZyQC7MMuBt81Vc336GMLoHBq5hV/F9eXgt5Mnx0Rha5Fg==} + + cookie@1.1.1: + resolution: {integrity: sha512-ei8Aos7ja0weRpFzJnEA9UHJ/7XQmqglbRwnf2ATjcB9Wq874VKH9kfjjirM6UhU2/E5fFYadylyhFldcqSidQ==} + engines: {node: '>=18'} + + cross-fetch@3.2.0: + resolution: {integrity: sha512-Q+xVJLoGOeIMXZmbUK4HYk+69cQH6LudR0Vu/pRm2YlU/hDV9CiS0gKUMaWY5f2NeUH9C1nV3bsTlCo0FsTV1Q==} + + crossws@0.3.5: + resolution: {integrity: sha512-ojKiDvcmByhwa8YYqbQI/hg7MEU0NC03+pSdEq4ZUnZR9xXpwk7E43SMNGkn+JxJGPFtNvQ48+vV2p+P1ml5PA==} + + css-selector-parser@3.3.0: + resolution: {integrity: sha512-Y2asgMGFqJKF4fq4xHDSlFYIkeVfRsm69lQC1q9kbEsH5XtnINTMrweLkjYMeaUgiXBy/uvKeO/a1JHTNnmB2g==} + + css-tree@3.1.0: + resolution: {integrity: sha512-0eW44TGN5SQXU1mWSkKwFstI/22X2bG1nYzZTYMAWjylYURhse752YgbE4Cx46AC+bAvI+/dYTPRk1LqSUnu6w==} + engines: {node: ^10 || ^12.20.0 || ^14.13.0 || >=15.0.0} + + cssesc@3.0.0: + resolution: {integrity: sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg==} + engines: {node: '>=4'} + hasBin: true + + debug@4.4.3: + resolution: {integrity: sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==} + engines: {node: '>=6.0'} + peerDependencies: + supports-color: '*' + peerDependenciesMeta: + supports-color: + optional: true + + decode-named-character-reference@1.3.0: + resolution: {integrity: sha512-GtpQYB283KrPp6nRw50q3U9/VfOutZOe103qlN7BPP6Ad27xYnOIWv4lPzo8HCAL+mMZofJ9KEy30fq6MfaK6Q==} + + defu@6.1.4: + resolution: {integrity: sha512-mEQCMmwJu317oSz8CwdIOdwf3xMif1ttiM8LTufzc3g6kR+9Pe236twL8j3IYT1F7GfRgGcW6MWxzZjLIkuHIg==} + + dequal@2.0.3: + resolution: {integrity: sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==} + engines: {node: '>=6'} + + destr@2.0.5: + resolution: {integrity: sha512-ugFTXCtDZunbzasqBxrK93Ik/DRYsO6S/fedkWEMKqt04xZ4csmnmwGDBAb07QWNaGMAmnTIemsYZCksjATwsA==} + + detect-libc@2.1.2: + resolution: {integrity: sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==} + engines: {node: '>=8'} + + deterministic-object-hash@2.0.2: + resolution: {integrity: sha512-KxektNH63SrbfUyDiwXqRb1rLwKt33AmMv+5Nhsw1kqZ13SJBRTgZHtGbE+hH3a1mVW1cz+4pqSWVPAtLVXTzQ==} + engines: {node: '>=18'} + + devalue@5.6.2: + resolution: {integrity: sha512-nPRkjWzzDQlsejL1WVifk5rvcFi/y1onBRxjaFMjZeR9mFpqu2gmAZ9xUB9/IEanEP/vBtGeGganC/GO1fmufg==} + + devlop@1.1.0: + resolution: {integrity: sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==} + + dfa@1.2.0: + resolution: {integrity: sha512-ED3jP8saaweFTjeGX8HQPjeC1YYyZs98jGNZx6IiBvxW7JG5v492kamAQB3m2wop07CvU/RQmzcKr6bgcC5D/Q==} + + diff@5.2.2: + resolution: {integrity: sha512-vtcDfH3TOjP8UekytvnHH1o1P4FcUdt4eQ1Y+Abap1tk/OB2MWQvcwS2ClCd1zuIhc3JKOx6p3kod8Vfys3E+A==} + engines: {node: '>=0.3.1'} + + direction@2.0.1: + resolution: {integrity: sha512-9S6m9Sukh1cZNknO1CWAr2QAWsbKLafQiyM5gZ7VgXHeuaoUwffKN4q6NC4A/Mf9iiPlOXQEKW/Mv/mh9/3YFA==} + hasBin: true + + dlv@1.1.3: + resolution: {integrity: sha512-+HlytyjlPKnIG8XuRG8WvmBP8xs8P71y+SKKS6ZXWoEgLuePxtDoUEiH7WkdePWrQ5JBpE6aoVqfZfJUQkjXwA==} + + dset@3.1.4: + resolution: {integrity: sha512-2QF/g9/zTaPDc3BjNcVTGoBbXBgYfMTTceLaYcFJ/W9kggFUkhxD/hMEeuLKbugyef9SqAx8cpgwlIP/jinUTA==} + engines: {node: '>=4'} + + emoji-regex@10.6.0: + resolution: {integrity: sha512-toUI84YS5YmxW219erniWD0CIVOo46xGKColeNQRgOzDorgBi1v4D71/OFzgD9GO2UGKIv1C3Sp8DAn0+j5w7A==} + + emoji-regex@8.0.0: + resolution: {integrity: sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==} + + entities@6.0.1: + resolution: {integrity: sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==} + engines: {node: '>=0.12'} + + es-module-lexer@1.7.0: + resolution: {integrity: sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==} + + esast-util-from-estree@2.0.0: + resolution: {integrity: sha512-4CyanoAudUSBAn5K13H4JhsMH6L9ZP7XbLVe/dKybkxMO7eDyLsT8UHl9TRNrU2Gr9nz+FovfSIjuXWJ81uVwQ==} + + esast-util-from-js@2.0.1: + resolution: {integrity: sha512-8Ja+rNJ0Lt56Pcf3TAmpBZjmx8ZcK5Ts4cAzIOjsjevg9oSXJnl6SUQ2EevU8tv3h6ZLWmoKL5H4fgWvdvfETw==} + + esbuild@0.25.12: + resolution: {integrity: sha512-bbPBYYrtZbkt6Os6FiTLCTFxvq4tt3JKall1vRwshA3fdVztsLAatFaZobhkBC8/BrPetoa0oksYoKXoG4ryJg==} + engines: {node: '>=18'} + hasBin: true + + escape-string-regexp@5.0.0: + resolution: {integrity: sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw==} + engines: {node: '>=12'} + + estree-util-attach-comments@3.0.0: + resolution: {integrity: sha512-cKUwm/HUcTDsYh/9FgnuFqpfquUbwIqwKM26BVCGDPVgvaCl/nDCCjUfiLlx6lsEZ3Z4RFxNbOQ60pkaEwFxGw==} + + estree-util-build-jsx@3.0.1: + resolution: {integrity: sha512-8U5eiL6BTrPxp/CHbs2yMgP8ftMhR5ww1eIKoWRMlqvltHF8fZn5LRDvTKuxD3DUn+shRbLGqXemcP51oFCsGQ==} + + estree-util-is-identifier-name@3.0.0: + resolution: {integrity: sha512-hFtqIDZTIUZ9BXLb8y4pYGyk6+wekIivNVTcmvk8NoOh+VeRn5y6cEHzbURrWbfp1fIqdVipilzj+lfaadNZmg==} + + estree-util-scope@1.0.0: + resolution: {integrity: sha512-2CAASclonf+JFWBNJPndcOpA8EMJwa0Q8LUFJEKqXLW6+qBvbFZuF5gItbQOs/umBUkjviCSDCbBwU2cXbmrhQ==} + + estree-util-to-js@2.0.0: + resolution: {integrity: sha512-WDF+xj5rRWmD5tj6bIqRi6CkLIXbbNQUcxQHzGysQzvHmdYG2G7p/Tf0J0gpxGgkeMZNTIjT/AoSvC9Xehcgdg==} + + estree-util-visit@2.0.0: + resolution: {integrity: sha512-m5KgiH85xAhhW8Wta0vShLcUvOsh3LLPI2YVwcbio1l7E09NTLL1EyMZFM1OyWowoH0skScNbhOPl4kcBgzTww==} + + estree-walker@2.0.2: + resolution: {integrity: sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==} + + estree-walker@3.0.3: + resolution: {integrity: sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==} + + eventemitter3@5.0.4: + resolution: {integrity: sha512-mlsTRyGaPBjPedk6Bvw+aqbsXDtoAyAzm5MO7JgU+yVRyMQ5O8bD4Kcci7BS85f93veegeCPkL8R4GLClnjLFw==} + + expressive-code@0.41.6: + resolution: {integrity: sha512-W/5+IQbrpCIM5KGLjO35wlp1NCwDOOVQb+PAvzEoGkW1xjGM807ZGfBKptNWH6UECvt6qgmLyWolCMYKh7eQmA==} + + extend@3.0.2: + resolution: {integrity: sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==} + + fast-deep-equal@3.1.3: + resolution: {integrity: sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==} + + fdir@6.5.0: + resolution: {integrity: sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==} + engines: {node: '>=12.0.0'} + peerDependencies: + picomatch: ^3 || ^4 + peerDependenciesMeta: + picomatch: + optional: true + + flattie@1.1.1: + resolution: {integrity: sha512-9UbaD6XdAL97+k/n+N7JwX46K/M6Zc6KcFYskrYL8wbBV/Uyk0CTAMY0VT+qiK5PM7AIc9aTWYtq65U7T+aCNQ==} + engines: {node: '>=8'} + + fontace@0.3.1: + resolution: {integrity: sha512-9f5g4feWT1jWT8+SbL85aLIRLIXUaDygaM2xPXRmzPYxrOMNok79Lr3FGJoKVNKibE0WCunNiEVG2mwuE+2qEg==} + + fontkit@2.0.4: + resolution: {integrity: sha512-syetQadaUEDNdxdugga9CpEYVaQIxOwk7GlwZWWZ19//qW4zE5bknOKeMBDYAASwnpaSHKJITRLMF9m1fp3s6g==} + + fsevents@2.3.3: + resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==} + engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} + os: [darwin] + + get-east-asian-width@1.4.0: + resolution: {integrity: sha512-QZjmEOC+IT1uk6Rx0sX22V6uHWVwbdbxf1faPqJ1QhLdGgsRGCZoyaQBm/piRdJy/D2um6hM1UP7ZEeQ4EkP+Q==} + engines: {node: '>=18'} + + github-slugger@2.0.0: + resolution: {integrity: sha512-IaOQ9puYtjrkq7Y0Ygl9KDZnrf/aiUJYUpVf89y8kyaxbRG7Y1SrX/jaumrv81vc61+kiMempujsM3Yw7w5qcw==} + + h3@1.15.5: + resolution: {integrity: sha512-xEyq3rSl+dhGX2Lm0+eFQIAzlDN6Fs0EcC4f7BNUmzaRX/PTzeuM+Tr2lHB8FoXggsQIeXLj8EDVgs5ywxyxmg==} + + hast-util-embedded@3.0.0: + resolution: {integrity: sha512-naH8sld4Pe2ep03qqULEtvYr7EjrLK2QHY8KJR6RJkTUjPGObe1vnx585uzem2hGra+s1q08DZZpfgDVYRbaXA==} + + hast-util-format@1.1.0: + resolution: {integrity: sha512-yY1UDz6bC9rDvCWHpx12aIBGRG7krurX0p0Fm6pT547LwDIZZiNr8a+IHDogorAdreULSEzP82Nlv5SZkHZcjA==} + + hast-util-from-html@2.0.3: + resolution: {integrity: sha512-CUSRHXyKjzHov8yKsQjGOElXy/3EKpyX56ELnkHH34vDVw1N1XSQ1ZcAvTyAPtGqLTuKP/uxM+aLkSPqF/EtMw==} + + hast-util-from-parse5@8.0.3: + resolution: {integrity: sha512-3kxEVkEKt0zvcZ3hCRYI8rqrgwtlIOFMWkbclACvjlDw8Li9S2hk/d51OI0nr/gIpdMHNepwgOKqZ/sy0Clpyg==} + + hast-util-has-property@3.0.0: + resolution: {integrity: sha512-MNilsvEKLFpV604hwfhVStK0usFY/QmM5zX16bo7EjnAEGofr5YyI37kzopBlZJkHD4t887i+q/C8/tr5Q94cA==} + + hast-util-is-body-ok-link@3.0.1: + resolution: {integrity: sha512-0qpnzOBLztXHbHQenVB8uNuxTnm/QBFUOmdOSsEn7GnBtyY07+ENTWVFBAnXd/zEgd9/SUG3lRY7hSIBWRgGpQ==} + + hast-util-is-element@3.0.0: + resolution: {integrity: sha512-Val9mnv2IWpLbNPqc/pUem+a7Ipj2aHacCwgNfTiK0vJKl0LF+4Ba4+v1oPHFpf3bLYmreq0/l3Gud9S5OH42g==} + + hast-util-minify-whitespace@1.0.1: + resolution: {integrity: sha512-L96fPOVpnclQE0xzdWb/D12VT5FabA7SnZOUMtL1DbXmYiHJMXZvFkIZfiMmTCNJHUeO2K9UYNXoVyfz+QHuOw==} + + hast-util-parse-selector@4.0.0: + resolution: {integrity: sha512-wkQCkSYoOGCRKERFWcxMVMOcYE2K1AaNLU8DXS9arxnLOUEWbOXKXiJUNzEpqZ3JOKpnha3jkFrumEjVliDe7A==} + + hast-util-phrasing@3.0.1: + resolution: {integrity: sha512-6h60VfI3uBQUxHqTyMymMZnEbNl1XmEGtOxxKYL7stY2o601COo62AWAYBQR9lZbYXYSBoxag8UpPRXK+9fqSQ==} + + hast-util-raw@9.1.0: + resolution: {integrity: sha512-Y8/SBAHkZGoNkpzqqfCldijcuUKh7/su31kEBp67cFY09Wy0mTRgtsLYsiIxMJxlu0f6AA5SUTbDR8K0rxnbUw==} + + hast-util-select@6.0.4: + resolution: {integrity: sha512-RqGS1ZgI0MwxLaKLDxjprynNzINEkRHY2i8ln4DDjgv9ZhcYVIHN9rlpiYsqtFwrgpYU361SyWDQcGNIBVu3lw==} + + hast-util-to-estree@3.1.3: + resolution: {integrity: sha512-48+B/rJWAp0jamNbAAf9M7Uf//UVqAoMmgXhBdxTDJLGKY+LRnZ99qcG+Qjl5HfMpYNzS5v4EAwVEF34LeAj7w==} + + hast-util-to-html@9.0.5: + resolution: {integrity: sha512-OguPdidb+fbHQSU4Q4ZiLKnzWo8Wwsf5bZfbvu7//a9oTYoqD/fWpe96NuHkoS9h0ccGOTe0C4NGXdtS0iObOw==} + + hast-util-to-jsx-runtime@2.3.6: + resolution: {integrity: sha512-zl6s8LwNyo1P9uw+XJGvZtdFF1GdAkOg8ujOw+4Pyb76874fLps4ueHXDhXWdk6YHQ6OgUtinliG7RsYvCbbBg==} + + hast-util-to-parse5@8.0.1: + resolution: {integrity: sha512-MlWT6Pjt4CG9lFCjiz4BH7l9wmrMkfkJYCxFwKQic8+RTZgWPuWxwAfjJElsXkex7DJjfSJsQIt931ilUgmwdA==} + + hast-util-to-string@3.0.1: + resolution: {integrity: sha512-XelQVTDWvqcl3axRfI0xSeoVKzyIFPwsAGSLIsKdJKQMXDYJS4WYrBNF/8J7RdhIcFI2BOHgAifggsvsxp/3+A==} + + hast-util-to-text@4.0.2: + resolution: {integrity: sha512-KK6y/BN8lbaq654j7JgBydev7wuNMcID54lkRav1P0CaE1e47P72AWWPiGKXTJU271ooYzcvTAn/Zt0REnvc7A==} + + hast-util-whitespace@3.0.0: + resolution: {integrity: sha512-88JUN06ipLwsnv+dVn+OIYOvAuvBMy/Qoi6O7mQHxdPXpjy+Cd6xRkWwux7DKO+4sYILtLBRIKgsdpS2gQc7qw==} + + hastscript@9.0.1: + resolution: {integrity: sha512-g7df9rMFX/SPi34tyGCyUBREQoKkapwdY/T04Qn9TDWfHhAYt4/I0gMVirzK5wEzeUqIjEB+LXC/ypb7Aqno5w==} + + html-escaper@3.0.3: + resolution: {integrity: sha512-RuMffC89BOWQoY0WKGpIhn5gX3iI54O6nRA0yC124NYVtzjmFWBIiFd8M0x+ZdX0P9R4lADg1mgP8C7PxGOWuQ==} + + html-void-elements@3.0.0: + resolution: {integrity: sha512-bEqo66MRXsUGxWHV5IP0PUiAWwoEjba4VCzg0LjFJBpchPaTfyfCKTG6bc5F8ucKec3q5y6qOdGyYTSBEvhCrg==} + + html-whitespace-sensitive-tag-names@3.0.1: + resolution: {integrity: sha512-q+310vW8zmymYHALr1da4HyXUQ0zgiIwIicEfotYPWGN0OJVEN/58IJ3A4GBYcEq3LGAZqKb+ugvP0GNB9CEAA==} + + http-cache-semantics@4.2.0: + resolution: {integrity: sha512-dTxcvPXqPvXBQpq5dUr6mEMJX4oIEFv6bwom3FDwKRDsuIjjJGANqhBuoAn9c1RQJIdAKav33ED65E2ys+87QQ==} + + i18next@23.16.8: + resolution: {integrity: sha512-06r/TitrM88Mg5FdUXAKL96dJMzgqLE5dv3ryBAra4KCwD9mJ4ndOTS95ZuymIGoE+2hzfdaMak2X11/es7ZWg==} + + import-meta-resolve@4.2.0: + resolution: {integrity: sha512-Iqv2fzaTQN28s/FwZAoFq0ZSs/7hMAHJVX+w8PZl3cY19Pxk6jFFalxQoIfW2826i/fDLXv8IiEZRIT0lDuWcg==} + + inline-style-parser@0.2.7: + resolution: {integrity: sha512-Nb2ctOyNR8DqQoR0OwRG95uNWIC0C1lCgf5Naz5H6Ji72KZ8OcFZLz2P5sNgwlyoJ8Yif11oMuYs5pBQa86csA==} + + iron-webcrypto@1.2.1: + resolution: {integrity: sha512-feOM6FaSr6rEABp/eDfVseKyTMDt+KGpeB35SkVn9Tyn0CqvVsY3EwI0v5i8nMHyJnzCIQf7nsy3p41TPkJZhg==} + + is-alphabetical@2.0.1: + resolution: {integrity: sha512-FWyyY60MeTNyeSRpkM2Iry0G9hpr7/9kD40mD/cGQEuilcZYS4okz8SN2Q6rLCJ8gbCt6fN+rC+6tMGS99LaxQ==} + + is-alphanumerical@2.0.1: + resolution: {integrity: sha512-hmbYhX/9MUMF5uh7tOXyK/n0ZvWpad5caBA17GsC6vyuCqaWliRG5K1qS9inmUhEMaOBIW7/whAnSwveW/LtZw==} + + is-arrayish@0.3.4: + resolution: {integrity: sha512-m6UrgzFVUYawGBh1dUsWR5M2Clqic9RVXC/9f8ceNlv2IcO9j9J/z8UoCLPqtsPBFNzEpfR3xftohbfqDx8EQA==} + + is-decimal@2.0.1: + resolution: {integrity: sha512-AAB9hiomQs5DXWcRB1rqsxGUstbRroFOPPVAomNk/3XHR5JyEZChOyTWe2oayKnsSsr/kcGqF+z6yuH6HHpN0A==} + + is-docker@3.0.0: + resolution: {integrity: sha512-eljcgEDlEns/7AXFosB5K/2nCM4P7FQPkGc/DWLy5rmFEWvZayGrik1d9/QIY5nJ4f9YsVvBkA6kJpHn9rISdQ==} + engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + hasBin: true + + is-fullwidth-code-point@3.0.0: + resolution: {integrity: sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==} + engines: {node: '>=8'} + + is-hexadecimal@2.0.1: + resolution: {integrity: sha512-DgZQp241c8oO6cA1SbTEWiXeoxV42vlcJxgH+B3hi1AiqqKruZR3ZGF8In3fj4+/y/7rHvlOZLZtgJ/4ttYGZg==} + + is-inside-container@1.0.0: + resolution: {integrity: sha512-KIYLCCJghfHZxqjYBE7rEy0OBuTd5xCHS7tHVgvCLkx7StIoaxwNW3hCALgEUjFfeRk+MG/Qxmp/vtETEF3tRA==} + engines: {node: '>=14.16'} + hasBin: true + + is-plain-obj@4.1.0: + resolution: {integrity: sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg==} + engines: {node: '>=12'} + + is-wsl@3.1.0: + resolution: {integrity: sha512-UcVfVfaK4Sc4m7X3dUSoHoozQGBEFeDC+zVo06t98xe8CzHSZZBekNXH+tu0NalHolcJ/QAGqS46Hef7QXBIMw==} + engines: {node: '>=16'} + + js-yaml@4.1.1: + resolution: {integrity: sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==} + hasBin: true + + kleur@3.0.3: + resolution: {integrity: sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==} + engines: {node: '>=6'} + + kleur@4.1.5: + resolution: {integrity: sha512-o+NO+8WrRiQEE4/7nwRJhN1HWpVmJm511pBHUxPLtp0BUISzlBplORYSmTclCnJvQq2tKu/sgl3xVpkc7ZWuQQ==} + engines: {node: '>=6'} + + klona@2.0.6: + resolution: {integrity: sha512-dhG34DXATL5hSxJbIexCft8FChFXtmskoZYnoPWjXQuebWYCNkVeV3KkGegCK9CP1oswI/vQibS2GY7Em/sJJA==} + engines: {node: '>= 8'} + + longest-streak@3.1.0: + resolution: {integrity: sha512-9Ri+o0JYgehTaVBBDoMqIl8GXtbWg711O3srftcHhZ0dqnETqLaoIK0x17fUw9rFSlK/0NlsKe0Ahhyl5pXE2g==} + + lru-cache@11.2.6: + resolution: {integrity: sha512-ESL2CrkS/2wTPfuend7Zhkzo2u0daGJ/A2VucJOgQ/C48S/zB8MMeMHSGKYpXhIjbPxfuezITkaBH1wqv00DDQ==} + engines: {node: 20 || >=22} + + magic-string@0.30.21: + resolution: {integrity: sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==} + + magicast@0.3.5: + resolution: {integrity: sha512-L0WhttDl+2BOsybvEOLK7fW3UA0OQ0IQ2d6Zl2x/a6vVRs3bAY0ECOSHHeL5jD+SbOpOCUEi0y1DgHEn9Qn1AQ==} + + markdown-extensions@2.0.0: + resolution: {integrity: sha512-o5vL7aDWatOTX8LzaS1WMoaoxIiLRQJuIKKe2wAw6IeULDHaqbiqiggmx+pKvZDb1Sj+pE46Sn1T7lCqfFtg1Q==} + engines: {node: '>=16'} + + markdown-table@3.0.4: + resolution: {integrity: sha512-wiYz4+JrLyb/DqW2hkFJxP7Vd7JuTDm77fvbM8VfEQdmSMqcImWeeRbHwZjBjIFki/VaMK2BhFi7oUUZeM5bqw==} + + mdast-util-definitions@6.0.0: + resolution: {integrity: sha512-scTllyX6pnYNZH/AIp/0ePz6s4cZtARxImwoPJ7kS42n+MnVsI4XbnG6d4ibehRIldYMWM2LD7ImQblVhUejVQ==} + + mdast-util-directive@3.1.0: + resolution: {integrity: sha512-I3fNFt+DHmpWCYAT7quoM6lHf9wuqtI+oCOfvILnoicNIqjh5E3dEJWiXuYME2gNe8vl1iMQwyUHa7bgFmak6Q==} + + mdast-util-find-and-replace@3.0.2: + resolution: {integrity: sha512-Tmd1Vg/m3Xz43afeNxDIhWRtFZgM2VLyaf4vSTYwudTyeuTneoL3qtWMA5jeLyz/O1vDJmmV4QuScFCA2tBPwg==} + + mdast-util-from-markdown@2.0.2: + resolution: {integrity: sha512-uZhTV/8NBuw0WHkPTrCqDOl0zVe1BIng5ZtHoDk49ME1qqcjYmmLmOf0gELgcRMxN4w2iuIeVso5/6QymSrgmA==} + + mdast-util-gfm-autolink-literal@2.0.1: + resolution: {integrity: sha512-5HVP2MKaP6L+G6YaxPNjuL0BPrq9orG3TsrZ9YXbA3vDw/ACI4MEsnoDpn6ZNm7GnZgtAcONJyPhOP8tNJQavQ==} + + mdast-util-gfm-footnote@2.1.0: + resolution: {integrity: sha512-sqpDWlsHn7Ac9GNZQMeUzPQSMzR6Wv0WKRNvQRg0KqHh02fpTz69Qc1QSseNX29bhz1ROIyNyxExfawVKTm1GQ==} + + mdast-util-gfm-strikethrough@2.0.0: + resolution: {integrity: sha512-mKKb915TF+OC5ptj5bJ7WFRPdYtuHv0yTRxK2tJvi+BDqbkiG7h7u/9SI89nRAYcmap2xHQL9D+QG/6wSrTtXg==} + + mdast-util-gfm-table@2.0.0: + resolution: {integrity: sha512-78UEvebzz/rJIxLvE7ZtDd/vIQ0RHv+3Mh5DR96p7cS7HsBhYIICDBCu8csTNWNO6tBWfqXPWekRuj2FNOGOZg==} + + mdast-util-gfm-task-list-item@2.0.0: + resolution: {integrity: sha512-IrtvNvjxC1o06taBAVJznEnkiHxLFTzgonUdy8hzFVeDun0uTjxxrRGVaNFqkU1wJR3RBPEfsxmU6jDWPofrTQ==} + + mdast-util-gfm@3.1.0: + resolution: {integrity: sha512-0ulfdQOM3ysHhCJ1p06l0b0VKlhU0wuQs3thxZQagjcjPrlFRqY215uZGHHJan9GEAXd9MbfPjFJz+qMkVR6zQ==} + + mdast-util-mdx-expression@2.0.1: + resolution: {integrity: sha512-J6f+9hUp+ldTZqKRSg7Vw5V6MqjATc+3E4gf3CFNcuZNWD8XdyI6zQ8GqH7f8169MM6P7hMBRDVGnn7oHB9kXQ==} + + mdast-util-mdx-jsx@3.2.0: + resolution: {integrity: sha512-lj/z8v0r6ZtsN/cGNNtemmmfoLAFZnjMbNyLzBafjzikOM+glrjNHPlf6lQDOTccj9n5b0PPihEBbhneMyGs1Q==} + + mdast-util-mdx@3.0.0: + resolution: {integrity: sha512-JfbYLAW7XnYTTbUsmpu0kdBUVe+yKVJZBItEjwyYJiDJuZ9w4eeaqks4HQO+R7objWgS2ymV60GYpI14Ug554w==} + + mdast-util-mdxjs-esm@2.0.1: + resolution: {integrity: sha512-EcmOpxsZ96CvlP03NghtH1EsLtr0n9Tm4lPUJUBccV9RwUOneqSycg19n5HGzCf+10LozMRSObtVr3ee1WoHtg==} + + mdast-util-phrasing@4.1.0: + resolution: {integrity: sha512-TqICwyvJJpBwvGAMZjj4J2n0X8QWp21b9l0o7eXyVJ25YNWYbJDVIyD1bZXE6WtV6RmKJVYmQAKWa0zWOABz2w==} + + mdast-util-to-hast@13.2.1: + resolution: {integrity: sha512-cctsq2wp5vTsLIcaymblUriiTcZd0CwWtCbLvrOzYCDZoWyMNV8sZ7krj09FSnsiJi3WVsHLM4k6Dq/yaPyCXA==} + + mdast-util-to-markdown@2.1.2: + resolution: {integrity: sha512-xj68wMTvGXVOKonmog6LwyJKrYXZPvlwabaryTjLh9LuvovB/KAH+kvi8Gjj+7rJjsFi23nkUxRQv1KqSroMqA==} + + mdast-util-to-string@4.0.0: + resolution: {integrity: sha512-0H44vDimn51F0YwvxSJSm0eCDOJTRlmN0R1yBh4HLj9wiV1Dn0QoXGbvFAWj2hSItVTlCmBF1hqKlIyUBVFLPg==} + + mdn-data@2.12.2: + resolution: {integrity: sha512-IEn+pegP1aManZuckezWCO+XZQDplx1366JoVhTpMpBB1sPey/SbveZQUosKiKiGYjg1wH4pMlNgXbCiYgihQA==} + + micromark-core-commonmark@2.0.3: + resolution: {integrity: sha512-RDBrHEMSxVFLg6xvnXmb1Ayr2WzLAWjeSATAoxwKYJV94TeNavgoIdA0a9ytzDSVzBy2YKFK+emCPOEibLeCrg==} + + micromark-extension-directive@3.0.2: + resolution: {integrity: sha512-wjcXHgk+PPdmvR58Le9d7zQYWy+vKEU9Se44p2CrCDPiLr2FMyiT4Fyb5UFKFC66wGB3kPlgD7q3TnoqPS7SZA==} + + micromark-extension-gfm-autolink-literal@2.1.0: + resolution: {integrity: sha512-oOg7knzhicgQ3t4QCjCWgTmfNhvQbDDnJeVu9v81r7NltNCVmhPy1fJRX27pISafdjL+SVc4d3l48Gb6pbRypw==} + + micromark-extension-gfm-footnote@2.1.0: + resolution: {integrity: sha512-/yPhxI1ntnDNsiHtzLKYnE3vf9JZ6cAisqVDauhp4CEHxlb4uoOTxOCJ+9s51bIB8U1N1FJ1RXOKTIlD5B/gqw==} + + micromark-extension-gfm-strikethrough@2.1.0: + resolution: {integrity: sha512-ADVjpOOkjz1hhkZLlBiYA9cR2Anf8F4HqZUO6e5eDcPQd0Txw5fxLzzxnEkSkfnD0wziSGiv7sYhk/ktvbf1uw==} + + micromark-extension-gfm-table@2.1.1: + resolution: {integrity: sha512-t2OU/dXXioARrC6yWfJ4hqB7rct14e8f7m0cbI5hUmDyyIlwv5vEtooptH8INkbLzOatzKuVbQmAYcbWoyz6Dg==} + + micromark-extension-gfm-tagfilter@2.0.0: + resolution: {integrity: sha512-xHlTOmuCSotIA8TW1mDIM6X2O1SiX5P9IuDtqGonFhEK0qgRI4yeC6vMxEV2dgyr2TiD+2PQ10o+cOhdVAcwfg==} + + micromark-extension-gfm-task-list-item@2.1.0: + resolution: {integrity: sha512-qIBZhqxqI6fjLDYFTBIa4eivDMnP+OZqsNwmQ3xNLE4Cxwc+zfQEfbs6tzAo2Hjq+bh6q5F+Z8/cksrLFYWQQw==} + + micromark-extension-gfm@3.0.0: + resolution: {integrity: sha512-vsKArQsicm7t0z2GugkCKtZehqUm31oeGBV/KVSorWSy8ZlNAv7ytjFhvaryUiCUJYqs+NoE6AFhpQvBTM6Q4w==} + + micromark-extension-mdx-expression@3.0.1: + resolution: {integrity: sha512-dD/ADLJ1AeMvSAKBwO22zG22N4ybhe7kFIZ3LsDI0GlsNr2A3KYxb0LdC1u5rj4Nw+CHKY0RVdnHX8vj8ejm4Q==} + + micromark-extension-mdx-jsx@3.0.2: + resolution: {integrity: sha512-e5+q1DjMh62LZAJOnDraSSbDMvGJ8x3cbjygy2qFEi7HCeUT4BDKCvMozPozcD6WmOt6sVvYDNBKhFSz3kjOVQ==} + + micromark-extension-mdx-md@2.0.0: + resolution: {integrity: sha512-EpAiszsB3blw4Rpba7xTOUptcFeBFi+6PY8VnJ2hhimH+vCQDirWgsMpz7w1XcZE7LVrSAUGb9VJpG9ghlYvYQ==} + + micromark-extension-mdxjs-esm@3.0.0: + resolution: {integrity: sha512-DJFl4ZqkErRpq/dAPyeWp15tGrcrrJho1hKK5uBS70BCtfrIFg81sqcTVu3Ta+KD1Tk5vAtBNElWxtAa+m8K9A==} + + micromark-extension-mdxjs@3.0.0: + resolution: {integrity: sha512-A873fJfhnJ2siZyUrJ31l34Uqwy4xIFmvPY1oj+Ean5PHcPBYzEsvqvWGaWcfEIr11O5Dlw3p2y0tZWpKHDejQ==} + + micromark-factory-destination@2.0.1: + resolution: {integrity: sha512-Xe6rDdJlkmbFRExpTOmRj9N3MaWmbAgdpSrBQvCFqhezUn4AHqJHbaEnfbVYYiexVSs//tqOdY/DxhjdCiJnIA==} + + micromark-factory-label@2.0.1: + resolution: {integrity: sha512-VFMekyQExqIW7xIChcXn4ok29YE3rnuyveW3wZQWWqF4Nv9Wk5rgJ99KzPvHjkmPXF93FXIbBp6YdW3t71/7Vg==} + + micromark-factory-mdx-expression@2.0.3: + resolution: {integrity: sha512-kQnEtA3vzucU2BkrIa8/VaSAsP+EJ3CKOvhMuJgOEGg9KDC6OAY6nSnNDVRiVNRqj7Y4SlSzcStaH/5jge8JdQ==} + + micromark-factory-space@2.0.1: + resolution: {integrity: sha512-zRkxjtBxxLd2Sc0d+fbnEunsTj46SWXgXciZmHq0kDYGnck/ZSGj9/wULTV95uoeYiK5hRXP2mJ98Uo4cq/LQg==} + + micromark-factory-title@2.0.1: + resolution: {integrity: sha512-5bZ+3CjhAd9eChYTHsjy6TGxpOFSKgKKJPJxr293jTbfry2KDoWkhBb6TcPVB4NmzaPhMs1Frm9AZH7OD4Cjzw==} + + micromark-factory-whitespace@2.0.1: + resolution: {integrity: sha512-Ob0nuZ3PKt/n0hORHyvoD9uZhr+Za8sFoP+OnMcnWK5lngSzALgQYKMr9RJVOWLqQYuyn6ulqGWSXdwf6F80lQ==} + + micromark-util-character@2.1.1: + resolution: {integrity: sha512-wv8tdUTJ3thSFFFJKtpYKOYiGP2+v96Hvk4Tu8KpCAsTMs6yi+nVmGh1syvSCsaxz45J6Jbw+9DD6g97+NV67Q==} + + micromark-util-chunked@2.0.1: + resolution: {integrity: sha512-QUNFEOPELfmvv+4xiNg2sRYeS/P84pTW0TCgP5zc9FpXetHY0ab7SxKyAQCNCc1eK0459uoLI1y5oO5Vc1dbhA==} + + micromark-util-classify-character@2.0.1: + resolution: {integrity: sha512-K0kHzM6afW/MbeWYWLjoHQv1sgg2Q9EccHEDzSkxiP/EaagNzCm7T/WMKZ3rjMbvIpvBiZgwR3dKMygtA4mG1Q==} + + micromark-util-combine-extensions@2.0.1: + resolution: {integrity: sha512-OnAnH8Ujmy59JcyZw8JSbK9cGpdVY44NKgSM7E9Eh7DiLS2E9RNQf0dONaGDzEG9yjEl5hcqeIsj4hfRkLH/Bg==} + + micromark-util-decode-numeric-character-reference@2.0.2: + resolution: {integrity: sha512-ccUbYk6CwVdkmCQMyr64dXz42EfHGkPQlBj5p7YVGzq8I7CtjXZJrubAYezf7Rp+bjPseiROqe7G6foFd+lEuw==} + + micromark-util-decode-string@2.0.1: + resolution: {integrity: sha512-nDV/77Fj6eH1ynwscYTOsbK7rR//Uj0bZXBwJZRfaLEJ1iGBR6kIfNmlNqaqJf649EP0F3NWNdeJi03elllNUQ==} + + micromark-util-encode@2.0.1: + resolution: {integrity: sha512-c3cVx2y4KqUnwopcO9b/SCdo2O67LwJJ/UyqGfbigahfegL9myoEFoDYZgkT7f36T0bLrM9hZTAaAyH+PCAXjw==} + + micromark-util-events-to-acorn@2.0.3: + resolution: {integrity: sha512-jmsiEIiZ1n7X1Rr5k8wVExBQCg5jy4UXVADItHmNk1zkwEVhBuIUKRu3fqv+hs4nxLISi2DQGlqIOGiFxgbfHg==} + + micromark-util-html-tag-name@2.0.1: + resolution: {integrity: sha512-2cNEiYDhCWKI+Gs9T0Tiysk136SnR13hhO8yW6BGNyhOC4qYFnwF1nKfD3HFAIXA5c45RrIG1ub11GiXeYd1xA==} + + micromark-util-normalize-identifier@2.0.1: + resolution: {integrity: sha512-sxPqmo70LyARJs0w2UclACPUUEqltCkJ6PhKdMIDuJ3gSf/Q+/GIe3WKl0Ijb/GyH9lOpUkRAO2wp0GVkLvS9Q==} + + micromark-util-resolve-all@2.0.1: + resolution: {integrity: sha512-VdQyxFWFT2/FGJgwQnJYbe1jjQoNTS4RjglmSjTUlpUMa95Htx9NHeYW4rGDJzbjvCsl9eLjMQwGeElsqmzcHg==} + + micromark-util-sanitize-uri@2.0.1: + resolution: {integrity: sha512-9N9IomZ/YuGGZZmQec1MbgxtlgougxTodVwDzzEouPKo3qFWvymFHWcnDi2vzV1ff6kas9ucW+o3yzJK9YB1AQ==} + + micromark-util-subtokenize@2.1.0: + resolution: {integrity: sha512-XQLu552iSctvnEcgXw6+Sx75GflAPNED1qx7eBJ+wydBb2KCbRZe+NwvIEEMM83uml1+2WSXpBAcp9IUCgCYWA==} + + micromark-util-symbol@2.0.1: + resolution: {integrity: sha512-vs5t8Apaud9N28kgCrRUdEed4UJ+wWNvicHLPxCa9ENlYuAY31M0ETy5y1vA33YoNPDFTghEbnh6efaE8h4x0Q==} + + micromark-util-types@2.0.2: + resolution: {integrity: sha512-Yw0ECSpJoViF1qTU4DC6NwtC4aWGt1EkzaQB8KPPyCRR8z9TWeV0HbEFGTO+ZY1wB22zmxnJqhPyTpOVCpeHTA==} + + micromark@4.0.2: + resolution: {integrity: sha512-zpe98Q6kvavpCr1NPVSCMebCKfD7CA2NqZ+rykeNhONIJBpc1tFKt9hucLGwha3jNTNI8lHpctWJWoimVF4PfA==} + + mrmime@2.0.1: + resolution: {integrity: sha512-Y3wQdFg2Va6etvQ5I82yUhGdsKrcYox6p7FfL1LbK2J4V01F9TGlepTIhnK24t7koZibmg82KGglhA1XK5IsLQ==} + engines: {node: '>=10'} + + ms@2.1.3: + resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==} + + nanoid@3.3.11: + resolution: {integrity: sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==} + engines: {node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1} + hasBin: true + + neotraverse@0.6.18: + resolution: {integrity: sha512-Z4SmBUweYa09+o6pG+eASabEpP6QkQ70yHj351pQoEXIs8uHbaU2DWVmzBANKgflPa47A50PtB2+NgRpQvr7vA==} + engines: {node: '>= 10'} + + nlcst-to-string@4.0.0: + resolution: {integrity: sha512-YKLBCcUYKAg0FNlOBT6aI91qFmSiFKiluk655WzPF+DDMA02qIyy8uiRqI8QXtcFpEvll12LpL5MXqEmAZ+dcA==} + + node-fetch-native@1.6.7: + resolution: {integrity: sha512-g9yhqoedzIUm0nTnTqAQvueMPVOuIY16bqgAJJC8XOOubYFNwz6IER9qs0Gq2Xd0+CecCKFjtdDTMA4u4xG06Q==} + + node-fetch@2.7.0: + resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==} + engines: {node: 4.x || >=6.0.0} + peerDependencies: + encoding: ^0.1.0 + peerDependenciesMeta: + encoding: + optional: true + + node-mock-http@1.0.4: + resolution: {integrity: sha512-8DY+kFsDkNXy1sJglUfuODx1/opAGJGyrTuFqEoN90oRc2Vk0ZbD4K2qmKXBBEhZQzdKHIVfEJpDU8Ak2NJEvQ==} + + normalize-path@3.0.0: + resolution: {integrity: sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==} + engines: {node: '>=0.10.0'} + + nth-check@2.1.1: + resolution: {integrity: sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==} + + ofetch@1.5.1: + resolution: {integrity: sha512-2W4oUZlVaqAPAil6FUg/difl6YhqhUR7x2eZY4bQCko22UXg3hptq9KLQdqFClV+Wu85UX7hNtdGTngi/1BxcA==} + + ohash@2.0.11: + resolution: {integrity: sha512-RdR9FQrFwNBNXAr4GixM8YaRZRJ5PUWbKYbE5eOsrwAjJW0q2REGcf79oYPsLyskQCZG1PLN+S/K1V00joZAoQ==} + + oniguruma-parser@0.12.1: + resolution: {integrity: sha512-8Unqkvk1RYc6yq2WBYRj4hdnsAxVze8i7iPfQr8e4uSP3tRv0rpZcbGUDvxfQQcdwHt/e9PrMvGCsa8OqG9X3w==} + + oniguruma-to-es@4.3.4: + resolution: {integrity: sha512-3VhUGN3w2eYxnTzHn+ikMI+fp/96KoRSVK9/kMTcFqj1NRDh2IhQCKvYxDnWePKRXY/AqH+Fuiyb7VHSzBjHfA==} + + p-limit@6.2.0: + resolution: {integrity: sha512-kuUqqHNUqoIWp/c467RI4X6mmyuojY5jGutNU0wVTmEOOfcuwLqyMVoAi9MKi2Ak+5i9+nhmrK4ufZE8069kHA==} + engines: {node: '>=18'} + + p-queue@8.1.1: + resolution: {integrity: sha512-aNZ+VfjobsWryoiPnEApGGmf5WmNsCo9xu8dfaYamG5qaLP7ClhLN6NgsFe6SwJ2UbLEBK5dv9x8Mn5+RVhMWQ==} + engines: {node: '>=18'} + + p-timeout@6.1.4: + resolution: {integrity: sha512-MyIV3ZA/PmyBN/ud8vV9XzwTrNtR4jFrObymZYnZqMmW0zA8Z17vnT0rBgFE/TlohB+YCHqXMgZzb3Csp49vqg==} + engines: {node: '>=14.16'} + + package-manager-detector@1.6.0: + resolution: {integrity: sha512-61A5ThoTiDG/C8s8UMZwSorAGwMJ0ERVGj2OjoW5pAalsNOg15+iQiPzrLJ4jhZ1HJzmC2PIHT2oEiH3R5fzNA==} + + pagefind@1.4.0: + resolution: {integrity: sha512-z2kY1mQlL4J8q5EIsQkLzQjilovKzfNVhX8De6oyE6uHpfFtyBaqUpcl/XzJC/4fjD8vBDyh1zolimIcVrCn9g==} + hasBin: true + + pako@0.2.9: + resolution: {integrity: sha512-NUcwaKxUxWrZLpDG+z/xZaCgQITkA/Dv4V/T6bw7VON6l1Xz/VnrBqrYjZQ12TamKHzITTfOEIYUj48y2KXImA==} + + parse-entities@4.0.2: + resolution: {integrity: sha512-GG2AQYWoLgL877gQIKeRPGO1xF9+eG1ujIb5soS5gPvLQ1y2o8FL90w2QWNdf9I361Mpp7726c+lj3U0qK1uGw==} + + parse-latin@7.0.0: + resolution: {integrity: sha512-mhHgobPPua5kZ98EF4HWiH167JWBfl4pvAIXXdbaVohtK7a6YBOy56kvhCqduqyo/f3yrHFWmqmiMg/BkBkYYQ==} + + parse5@7.3.0: + resolution: {integrity: sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==} + + piccolore@0.1.3: + resolution: {integrity: sha512-o8bTeDWjE086iwKrROaDf31K0qC/BENdm15/uH9usSC/uZjJOKb2YGiVHfLY4GhwsERiPI1jmwI2XrA7ACOxVw==} + + picocolors@1.1.1: + resolution: {integrity: sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==} + + picomatch@2.3.1: + resolution: {integrity: sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==} + engines: {node: '>=8.6'} + + picomatch@4.0.3: + resolution: {integrity: sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==} + engines: {node: '>=12'} + + postcss-nested@6.2.0: + resolution: {integrity: sha512-HQbt28KulC5AJzG+cZtj9kvKB93CFCdLvog1WFLf1D+xmMvPGlBstkpTEZfK5+AN9hfJocyBFCNiqyS48bpgzQ==} + engines: {node: '>=12.0'} + peerDependencies: + postcss: ^8.2.14 + + postcss-selector-parser@6.1.2: + resolution: {integrity: sha512-Q8qQfPiZ+THO/3ZrOrO0cJJKfpYCagtMUkXbnEfmgUjwXg6z/WBeOyS9APBBPCTSiDV+s4SwQGu8yFsiMRIudg==} + engines: {node: '>=4'} + + postcss@8.5.6: + resolution: {integrity: sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==} + engines: {node: ^10 || ^12 || >=14} + + prismjs@1.30.0: + resolution: {integrity: sha512-DEvV2ZF2r2/63V+tK8hQvrR2ZGn10srHbXviTlcv7Kpzw8jWiNTqbVgjO3IY8RxrrOUF8VPMQQFysYYYv0YZxw==} + engines: {node: '>=6'} + + prompts@2.4.2: + resolution: {integrity: sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==} + engines: {node: '>= 6'} + + property-information@7.1.0: + resolution: {integrity: sha512-TwEZ+X+yCJmYfL7TPUOcvBZ4QfoT5YenQiJuX//0th53DE6w0xxLEtfK3iyryQFddXuvkIk51EEgrJQ0WJkOmQ==} + + radix3@1.1.2: + resolution: {integrity: sha512-b484I/7b8rDEdSDKckSSBA8knMpcdsXudlE/LNL639wFoHKwLbEkQFZHWEYwDC0wa0FKUcCY+GAF73Z7wxNVFA==} + + readdirp@5.0.0: + resolution: {integrity: sha512-9u/XQ1pvrQtYyMpZe7DXKv2p5CNvyVwzUB6uhLAnQwHMSgKMBR62lc7AHljaeteeHXn11XTAaLLUVZYVZyuRBQ==} + engines: {node: '>= 20.19.0'} + + recma-build-jsx@1.0.0: + resolution: {integrity: sha512-8GtdyqaBcDfva+GUKDr3nev3VpKAhup1+RvkMvUxURHpW7QyIvk9F5wz7Vzo06CEMSilw6uArgRqhpiUcWp8ew==} + + recma-jsx@1.0.1: + resolution: {integrity: sha512-huSIy7VU2Z5OLv6oFLosQGGDqPqdO1iq6bWNAdhzMxSJP7RAso4fCZ1cKu8j9YHCZf3TPrq4dw3okhrylgcd7w==} + peerDependencies: + acorn: ^6.0.0 || ^7.0.0 || ^8.0.0 + + recma-parse@1.0.0: + resolution: {integrity: sha512-OYLsIGBB5Y5wjnSnQW6t3Xg7q3fQ7FWbw/vcXtORTnyaSFscOtABg+7Pnz6YZ6c27fG1/aN8CjfwoUEUIdwqWQ==} + + recma-stringify@1.0.0: + resolution: {integrity: sha512-cjwII1MdIIVloKvC9ErQ+OgAtwHBmcZ0Bg4ciz78FtbT8In39aAYbaA7zvxQ61xVMSPE8WxhLwLbhif4Js2C+g==} + + regex-recursion@6.0.2: + resolution: {integrity: sha512-0YCaSCq2VRIebiaUviZNs0cBz1kg5kVS2UKUfNIx8YVs1cN3AV7NTctO5FOKBA+UT2BPJIWZauYHPqJODG50cg==} + + regex-utilities@2.3.0: + resolution: {integrity: sha512-8VhliFJAWRaUiVvREIiW2NXXTmHs4vMNnSzuJVhscgmGav3g9VDxLrQndI3dZZVVdp0ZO/5v0xmX516/7M9cng==} + + regex@6.1.0: + resolution: {integrity: sha512-6VwtthbV4o/7+OaAF9I5L5V3llLEsoPyq9P1JVXkedTP33c7MfCG0/5NOPcSJn0TzXcG9YUrR0gQSWioew3LDg==} + + rehype-expressive-code@0.41.6: + resolution: {integrity: sha512-aBMX8kxPtjmDSFUdZlAWJkMvsQ4ZMASfee90JWIAV8tweltXLzkWC3q++43ToTelI8ac5iC0B3/S/Cl4Ql1y2g==} + + rehype-format@5.0.1: + resolution: {integrity: sha512-zvmVru9uB0josBVpr946OR8ui7nJEdzZobwLOOqHb/OOD88W0Vk2SqLwoVOj0fM6IPCCO6TaV9CvQvJMWwukFQ==} + + rehype-parse@9.0.1: + resolution: {integrity: sha512-ksCzCD0Fgfh7trPDxr2rSylbwq9iYDkSn8TCDmEJ49ljEUBxDVCzCHv7QNzZOfODanX4+bWQ4WZqLCRWYLfhag==} + + rehype-raw@7.0.0: + resolution: {integrity: sha512-/aE8hCfKlQeA8LmyeyQvQF3eBiLRGNlfBJEvWH7ivp9sBqs7TNqBL5X3v157rM4IFETqDnIOO+z5M/biZbo9Ww==} + + rehype-recma@1.0.0: + resolution: {integrity: sha512-lqA4rGUf1JmacCNWWZx0Wv1dHqMwxzsDWYMTowuplHF3xH0N/MmrZ/G3BDZnzAkRmxDadujCjaKM2hqYdCBOGw==} + + rehype-stringify@10.0.1: + resolution: {integrity: sha512-k9ecfXHmIPuFVI61B9DeLPN0qFHfawM6RsuX48hoqlaKSF61RskNjSm1lI8PhBEM0MRdLxVVm4WmTqJQccH9mA==} + + rehype@13.0.2: + resolution: {integrity: sha512-j31mdaRFrwFRUIlxGeuPXXKWQxet52RBQRvCmzl5eCefn/KGbomK5GMHNMsOJf55fgo3qw5tST5neDuarDYR2A==} + + remark-directive@3.0.1: + resolution: {integrity: sha512-gwglrEQEZcZYgVyG1tQuA+h58EZfq5CSULw7J90AFuCTyib1thgHPoqQ+h9iFvU6R+vnZ5oNFQR5QKgGpk741A==} + + remark-gfm@4.0.1: + resolution: {integrity: sha512-1quofZ2RQ9EWdeN34S79+KExV1764+wCUGop5CPL1WGdD0ocPpu91lzPGbwWMECpEpd42kJGQwzRfyov9j4yNg==} + + remark-mdx@3.1.1: + resolution: {integrity: sha512-Pjj2IYlUY3+D8x00UJsIOg5BEvfMyeI+2uLPn9VO9Wg4MEtN/VTIq2NEJQfde9PnX15KgtHyl9S0BcTnWrIuWg==} + + remark-parse@11.0.0: + resolution: {integrity: sha512-FCxlKLNGknS5ba/1lmpYijMUzX2esxW5xQqjWxw2eHFfS2MSdaHVINFmhjo+qN1WhZhNimq0dZATN9pH0IDrpA==} + + remark-rehype@11.1.2: + resolution: {integrity: sha512-Dh7l57ianaEoIpzbp0PC9UKAdCSVklD8E5Rpw7ETfbTl3FqcOOgq5q2LVDhgGCkaBv7p24JXikPdvhhmHvKMsw==} + + remark-smartypants@3.0.2: + resolution: {integrity: sha512-ILTWeOriIluwEvPjv67v7Blgrcx+LZOkAUVtKI3putuhlZm84FnqDORNXPPm+HY3NdZOMhyDwZ1E+eZB/Df5dA==} + engines: {node: '>=16.0.0'} + + remark-stringify@11.0.0: + resolution: {integrity: sha512-1OSmLd3awB/t8qdoEOMazZkNsfVTeY4fTsgzcQFdXNq8ToTN4ZGwrMnlda4K6smTFKD+GRV6O48i6Z4iKgPPpw==} + + restructure@3.0.2: + resolution: {integrity: sha512-gSfoiOEA0VPE6Tukkrr7I0RBdE0s7H1eFCDBk05l1KIQT1UIKNc5JZy6jdyW6eYH3aR3g5b3PuL77rq0hvwtAw==} + + retext-latin@4.0.0: + resolution: {integrity: sha512-hv9woG7Fy0M9IlRQloq/N6atV82NxLGveq+3H2WOi79dtIYWN8OaxogDm77f8YnVXJL2VD3bbqowu5E3EMhBYA==} + + retext-smartypants@6.2.0: + resolution: {integrity: sha512-kk0jOU7+zGv//kfjXEBjdIryL1Acl4i9XNkHxtM7Tm5lFiCog576fjNC9hjoR7LTKQ0DsPWy09JummSsH1uqfQ==} + + retext-stringify@4.0.0: + resolution: {integrity: sha512-rtfN/0o8kL1e+78+uxPTqu1Klt0yPzKuQ2BfWwwfgIUSayyzxpM1PJzkKt4V8803uB9qSy32MvI7Xep9khTpiA==} + + retext@9.0.0: + resolution: {integrity: sha512-sbMDcpHCNjvlheSgMfEcVrZko3cDzdbe1x/e7G66dFp0Ff7Mldvi2uv6JkJQzdRcvLYE8CA8Oe8siQx8ZOgTcA==} + + rollup@4.57.1: + resolution: {integrity: sha512-oQL6lgK3e2QZeQ7gcgIkS2YZPg5slw37hYufJ3edKlfQSGGm8ICoxswK15ntSzF/a8+h7ekRy7k7oWc3BQ7y8A==} + engines: {node: '>=18.0.0', npm: '>=8.0.0'} + hasBin: true + + sax@1.4.4: + resolution: {integrity: sha512-1n3r/tGXO6b6VXMdFT54SHzT9ytu9yr7TaELowdYpMqY/Ao7EnlQGmAQ1+RatX7Tkkdm6hONI2owqNx2aZj5Sw==} + engines: {node: '>=11.0.0'} + + semver@7.7.4: + resolution: {integrity: sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==} + engines: {node: '>=10'} + hasBin: true + + sharp@0.34.2: + resolution: {integrity: sha512-lszvBmB9QURERtyKT2bNmsgxXK0ShJrL/fvqlonCo7e6xBF8nT8xU6pW+PMIbLsz0RxQk3rgH9kd8UmvOzlMJg==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + + shiki@3.22.0: + resolution: {integrity: sha512-LBnhsoYEe0Eou4e1VgJACes+O6S6QC0w71fCSp5Oya79inkwkm15gQ1UF6VtQ8j/taMDh79hAB49WUk8ALQW3g==} + + simple-swizzle@0.2.4: + resolution: {integrity: sha512-nAu1WFPQSMNr2Zn9PGSZK9AGn4t/y97lEm+MXTtUDwfP0ksAIX4nO+6ruD9Jwut4C49SB1Ws+fbXsm/yScWOHw==} + + sisteransi@1.0.5: + resolution: {integrity: sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==} + + sitemap@8.0.2: + resolution: {integrity: sha512-LwktpJcyZDoa0IL6KT++lQ53pbSrx2c9ge41/SeLTyqy2XUNA6uR4+P9u5IVo5lPeL2arAcOKn1aZAxoYbCKlQ==} + engines: {node: '>=14.0.0', npm: '>=6.0.0'} + hasBin: true + + smol-toml@1.6.0: + resolution: {integrity: sha512-4zemZi0HvTnYwLfrpk/CF9LOd9Lt87kAt50GnqhMpyF9U3poDAP2+iukq2bZsO/ufegbYehBkqINbsWxj4l4cw==} + engines: {node: '>= 18'} + + source-map-js@1.2.1: + resolution: {integrity: sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==} + engines: {node: '>=0.10.0'} + + source-map@0.7.6: + resolution: {integrity: sha512-i5uvt8C3ikiWeNZSVZNWcfZPItFQOsYTUAOkcUPGd8DqDy1uOUikjt5dG+uRlwyvR108Fb9DOd4GvXfT0N2/uQ==} + engines: {node: '>= 12'} + + space-separated-tokens@2.0.2: + resolution: {integrity: sha512-PEGlAwrG8yXGXRjW32fGbg66JAlOAwbObuqVoJpv/mRgoWDQfgH1wDPvtzWyUSNAXBGSk8h755YDbbcEy3SH2Q==} + + starlight-changelogs@0.1.1: + resolution: {integrity: sha512-Upq5VMuzNiVbXHJLVTOpsaqJ14YykCefg5fhT+KdidxfG3Rd4M5kD53G2OIPxqQS0f/x6xWSdD1ORjgnKyLWeA==} + engines: {node: '>=18.17.1'} + peerDependencies: + '@astrojs/starlight': '>=0.35.0' + + starlight-github-alerts@0.1.0: + resolution: {integrity: sha512-mz+btaGen5ByF1BRCMHnqkzSGXsHfvXfiPGGB9BgeczKUvbDR1PAciLYam0gN0JbZYwZSzmhJzgvcHG6gt4f/Q==} + engines: {node: '>=18.17.1'} + peerDependencies: + '@astrojs/starlight': '>=0.35.0' + + starlight-scroll-to-top@0.3.1: + resolution: {integrity: sha512-/RTpJKEjhyn9sE+d5FqQMkJyAzb2xy7e63dGdHPaG9/mFuHKYMriRoTiUGwBYMw3P5xlRJdn5RLkv3ZBOudbvA==} + engines: {node: ^18.17.1 || ^20.3.0 || >=21.0.0} + peerDependencies: + '@astrojs/starlight': '>=0.35' + + stream-replace-string@2.0.0: + resolution: {integrity: sha512-TlnjJ1C0QrmxRNrON00JvaFFlNh5TTG00APw23j74ET7gkQpTASi6/L2fuiav8pzK715HXtUeClpBTw2NPSn6w==} + + string-width@4.2.3: + resolution: {integrity: sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==} + engines: {node: '>=8'} + + string-width@7.2.0: + resolution: {integrity: sha512-tsaTIkKW9b4N+AEj+SVA+WhJzV7/zMhcSu78mLKWSk7cXMOSHsBKFWUs0fWwq8QyK3MgJBQRX6Gbi4kYbdvGkQ==} + engines: {node: '>=18'} + + stringify-entities@4.0.4: + resolution: {integrity: sha512-IwfBptatlO+QCJUo19AqvrPNqlVMpW9YEL2LIVY+Rpv2qsjCGxaDLNRgeGsQWJhfItebuJhsGSLjaBbNSQ+ieg==} + + strip-ansi@6.0.1: + resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==} + engines: {node: '>=8'} + + strip-ansi@7.1.2: + resolution: {integrity: sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA==} + engines: {node: '>=12'} + + style-to-js@1.1.21: + resolution: {integrity: sha512-RjQetxJrrUJLQPHbLku6U/ocGtzyjbJMP9lCNK7Ag0CNh690nSH8woqWH9u16nMjYBAok+i7JO1NP2pOy8IsPQ==} + + style-to-object@1.0.14: + resolution: {integrity: sha512-LIN7rULI0jBscWQYaSswptyderlarFkjQ+t79nzty8tcIAceVomEVlLzH5VP4Cmsv6MtKhs7qaAiwlcp+Mgaxw==} + + tiny-inflate@1.0.3: + resolution: {integrity: sha512-pkY1fj1cKHb2seWDy0B16HeWyczlJA9/WW3u3c4z/NiWDsO3DOU5D7nhTLE9CF0yXv/QZFY7sEJmj24dK+Rrqw==} + + tinyexec@0.3.2: + resolution: {integrity: sha512-KQQR9yN7R5+OSwaK0XQoj22pwHoTlgYqmUscPYoknOoWCWfj/5/ABTMRi69FrKU5ffPVh5QcFikpWJI/P1ocHA==} + + tinyglobby@0.2.15: + resolution: {integrity: sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==} + engines: {node: '>=12.0.0'} + + tr46@0.0.3: + resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==} + + trim-lines@3.0.1: + resolution: {integrity: sha512-kRj8B+YHZCc9kQYdWfJB2/oUl9rA99qbowYYBtr4ui4mZyAQ2JpvVBd/6U2YloATfqBhBTSMhTpgBHtU0Mf3Rg==} + + trough@2.2.0: + resolution: {integrity: sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw==} + + tsconfck@3.1.6: + resolution: {integrity: sha512-ks6Vjr/jEw0P1gmOVwutM3B7fWxoWBL2KRDb1JfqGVawBmO5UsvmWOQFGHBPl5yxYz4eERr19E6L7NMv+Fej4w==} + engines: {node: ^18 || >=20} + hasBin: true + peerDependencies: + typescript: ^5.0.0 + peerDependenciesMeta: + typescript: + optional: true + + tslib@2.8.1: + resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==} + + type-fest@4.41.0: + resolution: {integrity: sha512-TeTSQ6H5YHvpqVwBRcnLDCBnDOHWYu7IvGbHT6N8AOymcr9PJGjc1GTtiWZTYg0NCgYwvnYWEkVChQAr9bjfwA==} + engines: {node: '>=16'} + + typescript@5.9.3: + resolution: {integrity: sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==} + engines: {node: '>=14.17'} + hasBin: true + + ufo@1.6.3: + resolution: {integrity: sha512-yDJTmhydvl5lJzBmy/hyOAA0d+aqCBuwl818haVdYCRrWV84o7YyeVm4QlVHStqNrrJSTb6jKuFAVqAFsr+K3Q==} + + ultrahtml@1.6.0: + resolution: {integrity: sha512-R9fBn90VTJrqqLDwyMph+HGne8eqY1iPfYhPzZrvKpIfwkWZbcYlfpsb8B9dTvBfpy1/hqAD7Wi8EKfP9e8zdw==} + + uncrypto@0.1.3: + resolution: {integrity: sha512-Ql87qFHB3s/De2ClA9e0gsnS6zXG27SkTiSJwjCc9MebbfapQfuPzumMIUMi38ezPZVNFcHI9sUIepeQfw8J8Q==} + + undici-types@7.16.0: + resolution: {integrity: sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==} + + unicode-properties@1.4.1: + resolution: {integrity: sha512-CLjCCLQ6UuMxWnbIylkisbRj31qxHPAurvena/0iwSVbQ2G1VY5/HjV0IRabOEbDHlzZlRdCrD4NhB0JtU40Pg==} + + unicode-trie@2.0.0: + resolution: {integrity: sha512-x7bc76x0bm4prf1VLg79uhAzKw8DVboClSN5VxJuQ+LKDOVEW9CdH+VY7SP+vX7xCYQqzzgQpFqz15zeLvAtZQ==} + + unified@11.0.5: + resolution: {integrity: sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA==} + + unifont@0.5.2: + resolution: {integrity: sha512-LzR4WUqzH9ILFvjLAUU7dK3Lnou/qd5kD+IakBtBK4S15/+x2y9VX+DcWQv6s551R6W+vzwgVS6tFg3XggGBgg==} + + unist-util-find-after@5.0.0: + resolution: {integrity: sha512-amQa0Ep2m6hE2g72AugUItjbuM8X8cGQnFoHk0pGfrFeT9GZhzN5SW8nRsiGKK7Aif4CrACPENkA6P/Lw6fHGQ==} + + unist-util-is@6.0.1: + resolution: {integrity: sha512-LsiILbtBETkDz8I9p1dQ0uyRUWuaQzd/cuEeS1hoRSyW5E5XGmTzlwY1OrNzzakGowI9Dr/I8HVaw4hTtnxy8g==} + + unist-util-modify-children@4.0.0: + resolution: {integrity: sha512-+tdN5fGNddvsQdIzUF3Xx82CU9sMM+fA0dLgR9vOmT0oPT2jH+P1nd5lSqfCfXAw+93NhcXNY2qqvTUtE4cQkw==} + + unist-util-position-from-estree@2.0.0: + resolution: {integrity: sha512-KaFVRjoqLyF6YXCbVLNad/eS4+OfPQQn2yOd7zF/h5T/CSL2v8NpN6a5TPvtbXthAGw5nG+PuTtq+DdIZr+cRQ==} + + unist-util-position@5.0.0: + resolution: {integrity: sha512-fucsC7HjXvkB5R3kTCO7kUjRdrS0BJt3M/FPxmHMBOm8JQi2BsHAHFsy27E0EolP8rp0NzXsJ+jNPyDWvOJZPA==} + + unist-util-remove-position@5.0.0: + resolution: {integrity: sha512-Hp5Kh3wLxv0PHj9m2yZhhLt58KzPtEYKQQ4yxfYFEO7EvHwzyDYnduhHnY1mDxoqr7VUwVuHXk9RXKIiYS1N8Q==} + + unist-util-stringify-position@4.0.0: + resolution: {integrity: sha512-0ASV06AAoKCDkS2+xw5RXJywruurpbC4JZSm7nr7MOt1ojAzvyyaO+UxZf18j8FCF6kmzCZKcAgN/yu2gm2XgQ==} + + unist-util-visit-children@3.0.0: + resolution: {integrity: sha512-RgmdTfSBOg04sdPcpTSD1jzoNBjt9a80/ZCzp5cI9n1qPzLZWF9YdvWGN2zmTumP1HWhXKdUWexjy/Wy/lJ7tA==} + + unist-util-visit-parents@6.0.2: + resolution: {integrity: sha512-goh1s1TBrqSqukSc8wrjwWhL0hiJxgA8m4kFxGlQ+8FYQ3C/m11FcTs4YYem7V664AhHVvgoQLk890Ssdsr2IQ==} + + unist-util-visit@5.1.0: + resolution: {integrity: sha512-m+vIdyeCOpdr/QeQCu2EzxX/ohgS8KbnPDgFni4dQsfSCtpz8UqDyY5GjRru8PDKuYn7Fq19j1CQ+nJSsGKOzg==} + + unstorage@1.17.4: + resolution: {integrity: sha512-fHK0yNg38tBiJKp/Vgsq4j0JEsCmgqH58HAn707S7zGkArbZsVr/CwINoi+nh3h98BRCwKvx1K3Xg9u3VV83sw==} + peerDependencies: + '@azure/app-configuration': ^1.8.0 + '@azure/cosmos': ^4.2.0 + '@azure/data-tables': ^13.3.0 + '@azure/identity': ^4.6.0 + '@azure/keyvault-secrets': ^4.9.0 + '@azure/storage-blob': ^12.26.0 + '@capacitor/preferences': ^6 || ^7 || ^8 + '@deno/kv': '>=0.9.0' + '@netlify/blobs': ^6.5.0 || ^7.0.0 || ^8.1.0 || ^9.0.0 || ^10.0.0 + '@planetscale/database': ^1.19.0 + '@upstash/redis': ^1.34.3 + '@vercel/blob': '>=0.27.1' + '@vercel/functions': ^2.2.12 || ^3.0.0 + '@vercel/kv': ^1 || ^2 || ^3 + aws4fetch: ^1.0.20 + db0: '>=0.2.1' + idb-keyval: ^6.2.1 + ioredis: ^5.4.2 + uploadthing: ^7.4.4 + peerDependenciesMeta: + '@azure/app-configuration': + optional: true + '@azure/cosmos': + optional: true + '@azure/data-tables': + optional: true + '@azure/identity': + optional: true + '@azure/keyvault-secrets': + optional: true + '@azure/storage-blob': + optional: true + '@capacitor/preferences': + optional: true + '@deno/kv': + optional: true + '@netlify/blobs': + optional: true + '@planetscale/database': + optional: true + '@upstash/redis': + optional: true + '@vercel/blob': + optional: true + '@vercel/functions': + optional: true + '@vercel/kv': + optional: true + aws4fetch: + optional: true + db0: + optional: true + idb-keyval: + optional: true + ioredis: + optional: true + uploadthing: + optional: true + + util-deprecate@1.0.2: + resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==} + + vfile-location@5.0.3: + resolution: {integrity: sha512-5yXvWDEgqeiYiBe1lbxYF7UMAIm/IcopxMHrMQDq3nvKcjPKIhZklUKL+AE7J7uApI4kwe2snsK+eI6UTj9EHg==} + + vfile-message@4.0.3: + resolution: {integrity: sha512-QTHzsGd1EhbZs4AsQ20JX1rC3cOlt/IWJruk893DfLRr57lcnOeMaWG4K0JrRta4mIJZKth2Au3mM3u03/JWKw==} + + vfile@6.0.3: + resolution: {integrity: sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q==} + + vite@6.4.1: + resolution: {integrity: sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==} + engines: {node: ^18.0.0 || ^20.0.0 || >=22.0.0} + hasBin: true + peerDependencies: + '@types/node': ^18.0.0 || ^20.0.0 || >=22.0.0 + jiti: '>=1.21.0' + less: '*' + lightningcss: ^1.21.0 + sass: '*' + sass-embedded: '*' + stylus: '*' + sugarss: '*' + terser: ^5.16.0 + tsx: ^4.8.1 + yaml: ^2.4.2 + peerDependenciesMeta: + '@types/node': + optional: true + jiti: + optional: true + less: + optional: true + lightningcss: + optional: true + sass: + optional: true + sass-embedded: + optional: true + stylus: + optional: true + sugarss: + optional: true + terser: + optional: true + tsx: + optional: true + yaml: + optional: true + + vitefu@1.1.1: + resolution: {integrity: sha512-B/Fegf3i8zh0yFbpzZ21amWzHmuNlLlmJT6n7bu5e+pCHUKQIfXSYokrqOBGEMMe9UG2sostKQF9mml/vYaWJQ==} + peerDependencies: + vite: ^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0 || ^7.0.0-beta.0 + peerDependenciesMeta: + vite: + optional: true + + web-namespaces@2.0.1: + resolution: {integrity: sha512-bKr1DkiNa2krS7qxNtdrtHAmzuYGFQLiQ13TsorsdT6ULTkPLKuu5+GsFpDlg6JFjUTwX2DyhMPG2be8uPrqsQ==} + + webidl-conversions@3.0.1: + resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==} + + whatwg-url@5.0.0: + resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==} + + which-pm-runs@1.1.0: + resolution: {integrity: sha512-n1brCuqClxfFfq/Rb0ICg9giSZqCS+pLtccdag6C2HyufBrh3fBOiy9nb6ggRMvWOVH5GrdJskj5iGTZNxd7SA==} + engines: {node: '>=4'} + + widest-line@5.0.0: + resolution: {integrity: sha512-c9bZp7b5YtRj2wOe6dlj32MK+Bx/M/d+9VB2SHM1OtsUHR0aV0tdP6DWh/iMt0kWi1t5g1Iudu6hQRNd1A4PVA==} + engines: {node: '>=18'} + + wrap-ansi@9.0.2: + resolution: {integrity: sha512-42AtmgqjV+X1VpdOfyTGOYRi0/zsoLqtXQckTmqTeybT+BDIbM/Guxo7x3pE2vtpr1ok6xRqM9OpBe+Jyoqyww==} + engines: {node: '>=18'} + + xxhash-wasm@1.1.0: + resolution: {integrity: sha512-147y/6YNh+tlp6nd/2pWq38i9h6mz/EuQ6njIrmW8D1BS5nCqs0P6DG+m6zTGnNz5I+uhZ0SHxBs9BsPrwcKDA==} + + yargs-parser@21.1.1: + resolution: {integrity: sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==} + engines: {node: '>=12'} + + yocto-queue@1.2.2: + resolution: {integrity: sha512-4LCcse/U2MHZ63HAJVE+v71o7yOdIe4cZ70Wpf8D/IyjDKYQLV5GD46B+hSTjJsvV5PztjvHoU580EftxjDZFQ==} + engines: {node: '>=12.20'} + + yocto-spinner@0.2.3: + resolution: {integrity: sha512-sqBChb33loEnkoXte1bLg45bEBsOP9N1kzQh5JZNKj/0rik4zAPTNSAVPj3uQAdc6slYJ0Ksc403G2XgxsJQFQ==} + engines: {node: '>=18.19'} + + yoctocolors@2.1.2: + resolution: {integrity: sha512-CzhO+pFNo8ajLM2d2IW/R93ipy99LWjtwblvC1RsoSUMZgyLbYFr221TnSNT7GjGdYui6P459mw9JH/g/zW2ug==} + engines: {node: '>=18'} + + zod-to-json-schema@3.25.1: + resolution: {integrity: sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA==} + peerDependencies: + zod: ^3.25 || ^4 + + zod-to-ts@1.2.0: + resolution: {integrity: sha512-x30XE43V+InwGpvTySRNz9kB7qFU8DlyEy7BsSTCHPH1R0QasMmHWZDCzYm6bVXtj/9NNJAZF3jW8rzFvH5OFA==} + peerDependencies: + typescript: ^4.9.4 || ^5.0.2 + zod: ^3 + + zod@3.25.76: + resolution: {integrity: sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==} + + zwitch@2.0.4: + resolution: {integrity: sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A==} + +snapshots: + + '@ascorbic/loader-utils@1.0.2(astro@5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3))': + dependencies: + astro: 5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3) + + '@astrojs/compiler@2.13.1': {} + + '@astrojs/internal-helpers@0.7.3': {} + + '@astrojs/internal-helpers@0.7.5': {} + + '@astrojs/markdown-remark@6.3.10': + dependencies: + '@astrojs/internal-helpers': 0.7.5 + '@astrojs/prism': 3.3.0 + github-slugger: 2.0.0 + hast-util-from-html: 2.0.3 + hast-util-to-text: 4.0.2 + import-meta-resolve: 4.2.0 + js-yaml: 4.1.1 + mdast-util-definitions: 6.0.0 + rehype-raw: 7.0.0 + rehype-stringify: 10.0.1 + remark-gfm: 4.0.1 + remark-parse: 11.0.0 + remark-rehype: 11.1.2 + remark-smartypants: 3.0.2 + shiki: 3.22.0 + smol-toml: 1.6.0 + unified: 11.0.5 + unist-util-remove-position: 5.0.0 + unist-util-visit: 5.1.0 + unist-util-visit-parents: 6.0.2 + vfile: 6.0.3 + transitivePeerDependencies: + - supports-color + + '@astrojs/markdown-remark@6.3.7': + dependencies: + '@astrojs/internal-helpers': 0.7.3 + '@astrojs/prism': 3.3.0 + github-slugger: 2.0.0 + hast-util-from-html: 2.0.3 + hast-util-to-text: 4.0.2 + import-meta-resolve: 4.2.0 + js-yaml: 4.1.1 + mdast-util-definitions: 6.0.0 + rehype-raw: 7.0.0 + rehype-stringify: 10.0.1 + remark-gfm: 4.0.1 + remark-parse: 11.0.0 + remark-rehype: 11.1.2 + remark-smartypants: 3.0.2 + shiki: 3.22.0 + smol-toml: 1.6.0 + unified: 11.0.5 + unist-util-remove-position: 5.0.0 + unist-util-visit: 5.1.0 + unist-util-visit-parents: 6.0.2 + vfile: 6.0.3 + transitivePeerDependencies: + - supports-color + + '@astrojs/mdx@4.3.13(astro@5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3))': + dependencies: + '@astrojs/markdown-remark': 6.3.10 + '@mdx-js/mdx': 3.1.1 + acorn: 8.15.0 + astro: 5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3) + es-module-lexer: 1.7.0 + estree-util-visit: 2.0.0 + hast-util-to-html: 9.0.5 + piccolore: 0.1.3 + rehype-raw: 7.0.0 + remark-gfm: 4.0.1 + remark-smartypants: 3.0.2 + source-map: 0.7.6 + unist-util-visit: 5.1.0 + vfile: 6.0.3 + transitivePeerDependencies: + - supports-color + + '@astrojs/prism@3.3.0': + dependencies: + prismjs: 1.30.0 + + '@astrojs/sitemap@3.7.0': + dependencies: + sitemap: 8.0.2 + stream-replace-string: 2.0.0 + zod: 3.25.76 + + '@astrojs/starlight@0.36.0(astro@5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3))': + dependencies: + '@astrojs/markdown-remark': 6.3.10 + '@astrojs/mdx': 4.3.13(astro@5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3)) + '@astrojs/sitemap': 3.7.0 + '@pagefind/default-ui': 1.4.0 + '@types/hast': 3.0.4 + '@types/js-yaml': 4.0.9 + '@types/mdast': 4.0.4 + astro: 5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3) + astro-expressive-code: 0.41.6(astro@5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3)) + bcp-47: 2.1.0 + hast-util-from-html: 2.0.3 + hast-util-select: 6.0.4 + hast-util-to-string: 3.0.1 + hastscript: 9.0.1 + i18next: 23.16.8 + js-yaml: 4.1.1 + klona: 2.0.6 + mdast-util-directive: 3.1.0 + mdast-util-to-markdown: 2.1.2 + mdast-util-to-string: 4.0.0 + pagefind: 1.4.0 + rehype: 13.0.2 + rehype-format: 5.0.1 + remark-directive: 3.0.1 + ultrahtml: 1.6.0 + unified: 11.0.5 + unist-util-visit: 5.1.0 + vfile: 6.0.3 + transitivePeerDependencies: + - supports-color + + '@astrojs/telemetry@3.3.0': + dependencies: + ci-info: 4.4.0 + debug: 4.4.3 + dlv: 1.1.3 + dset: 3.1.4 + is-docker: 3.0.0 + is-wsl: 3.1.0 + which-pm-runs: 1.1.0 + transitivePeerDependencies: + - supports-color + + '@babel/helper-string-parser@7.27.1': {} + + '@babel/helper-validator-identifier@7.28.5': {} + + '@babel/parser@7.29.0': + dependencies: + '@babel/types': 7.29.0 + + '@babel/runtime@7.28.6': {} + + '@babel/types@7.29.0': + dependencies: + '@babel/helper-string-parser': 7.27.1 + '@babel/helper-validator-identifier': 7.28.5 + + '@capsizecss/unpack@2.4.0': + dependencies: + blob-to-buffer: 1.2.9 + cross-fetch: 3.2.0 + fontkit: 2.0.4 + transitivePeerDependencies: + - encoding + + '@ctrl/tinycolor@4.2.0': {} + + '@emnapi/runtime@1.8.1': + dependencies: + tslib: 2.8.1 + optional: true + + '@esbuild/aix-ppc64@0.25.12': + optional: true + + '@esbuild/android-arm64@0.25.12': + optional: true + + '@esbuild/android-arm@0.25.12': + optional: true + + '@esbuild/android-x64@0.25.12': + optional: true + + '@esbuild/darwin-arm64@0.25.12': + optional: true + + '@esbuild/darwin-x64@0.25.12': + optional: true + + '@esbuild/freebsd-arm64@0.25.12': + optional: true + + '@esbuild/freebsd-x64@0.25.12': + optional: true + + '@esbuild/linux-arm64@0.25.12': + optional: true + + '@esbuild/linux-arm@0.25.12': + optional: true + + '@esbuild/linux-ia32@0.25.12': + optional: true + + '@esbuild/linux-loong64@0.25.12': + optional: true + + '@esbuild/linux-mips64el@0.25.12': + optional: true + + '@esbuild/linux-ppc64@0.25.12': + optional: true + + '@esbuild/linux-riscv64@0.25.12': + optional: true + + '@esbuild/linux-s390x@0.25.12': + optional: true + + '@esbuild/linux-x64@0.25.12': + optional: true + + '@esbuild/netbsd-arm64@0.25.12': + optional: true + + '@esbuild/netbsd-x64@0.25.12': + optional: true + + '@esbuild/openbsd-arm64@0.25.12': + optional: true + + '@esbuild/openbsd-x64@0.25.12': + optional: true + + '@esbuild/openharmony-arm64@0.25.12': + optional: true + + '@esbuild/sunos-x64@0.25.12': + optional: true + + '@esbuild/win32-arm64@0.25.12': + optional: true + + '@esbuild/win32-ia32@0.25.12': + optional: true + + '@esbuild/win32-x64@0.25.12': + optional: true + + '@expressive-code/core@0.41.6': + dependencies: + '@ctrl/tinycolor': 4.2.0 + hast-util-select: 6.0.4 + hast-util-to-html: 9.0.5 + hast-util-to-text: 4.0.2 + hastscript: 9.0.1 + postcss: 8.5.6 + postcss-nested: 6.2.0(postcss@8.5.6) + unist-util-visit: 5.1.0 + unist-util-visit-parents: 6.0.2 + + '@expressive-code/plugin-frames@0.41.6': + dependencies: + '@expressive-code/core': 0.41.6 + + '@expressive-code/plugin-shiki@0.41.6': + dependencies: + '@expressive-code/core': 0.41.6 + shiki: 3.22.0 + + '@expressive-code/plugin-text-markers@0.41.6': + dependencies: + '@expressive-code/core': 0.41.6 + + '@img/sharp-darwin-arm64@0.34.2': + optionalDependencies: + '@img/sharp-libvips-darwin-arm64': 1.1.0 + optional: true + + '@img/sharp-darwin-x64@0.34.2': + optionalDependencies: + '@img/sharp-libvips-darwin-x64': 1.1.0 + optional: true + + '@img/sharp-libvips-darwin-arm64@1.1.0': + optional: true + + '@img/sharp-libvips-darwin-x64@1.1.0': + optional: true + + '@img/sharp-libvips-linux-arm64@1.1.0': + optional: true + + '@img/sharp-libvips-linux-arm@1.1.0': + optional: true + + '@img/sharp-libvips-linux-ppc64@1.1.0': + optional: true + + '@img/sharp-libvips-linux-s390x@1.1.0': + optional: true + + '@img/sharp-libvips-linux-x64@1.1.0': + optional: true + + '@img/sharp-libvips-linuxmusl-arm64@1.1.0': + optional: true + + '@img/sharp-libvips-linuxmusl-x64@1.1.0': + optional: true + + '@img/sharp-linux-arm64@0.34.2': + optionalDependencies: + '@img/sharp-libvips-linux-arm64': 1.1.0 + optional: true + + '@img/sharp-linux-arm@0.34.2': + optionalDependencies: + '@img/sharp-libvips-linux-arm': 1.1.0 + optional: true + + '@img/sharp-linux-s390x@0.34.2': + optionalDependencies: + '@img/sharp-libvips-linux-s390x': 1.1.0 + optional: true + + '@img/sharp-linux-x64@0.34.2': + optionalDependencies: + '@img/sharp-libvips-linux-x64': 1.1.0 + optional: true + + '@img/sharp-linuxmusl-arm64@0.34.2': + optionalDependencies: + '@img/sharp-libvips-linuxmusl-arm64': 1.1.0 + optional: true + + '@img/sharp-linuxmusl-x64@0.34.2': + optionalDependencies: + '@img/sharp-libvips-linuxmusl-x64': 1.1.0 + optional: true + + '@img/sharp-wasm32@0.34.2': + dependencies: + '@emnapi/runtime': 1.8.1 + optional: true + + '@img/sharp-win32-arm64@0.34.2': + optional: true + + '@img/sharp-win32-ia32@0.34.2': + optional: true + + '@img/sharp-win32-x64@0.34.2': + optional: true + + '@jridgewell/sourcemap-codec@1.5.5': {} + + '@mdx-js/mdx@3.1.1': + dependencies: + '@types/estree': 1.0.8 + '@types/estree-jsx': 1.0.5 + '@types/hast': 3.0.4 + '@types/mdx': 2.0.13 + acorn: 8.15.0 + collapse-white-space: 2.1.0 + devlop: 1.1.0 + estree-util-is-identifier-name: 3.0.0 + estree-util-scope: 1.0.0 + estree-walker: 3.0.3 + hast-util-to-jsx-runtime: 2.3.6 + markdown-extensions: 2.0.0 + recma-build-jsx: 1.0.0 + recma-jsx: 1.0.1(acorn@8.15.0) + recma-stringify: 1.0.0 + rehype-recma: 1.0.0 + remark-mdx: 3.1.1 + remark-parse: 11.0.0 + remark-rehype: 11.1.2 + source-map: 0.7.6 + unified: 11.0.5 + unist-util-position-from-estree: 2.0.0 + unist-util-stringify-position: 4.0.0 + unist-util-visit: 5.1.0 + vfile: 6.0.3 + transitivePeerDependencies: + - supports-color + + '@oslojs/encoding@1.1.0': {} + + '@pagefind/darwin-arm64@1.4.0': + optional: true + + '@pagefind/darwin-x64@1.4.0': + optional: true + + '@pagefind/default-ui@1.4.0': {} + + '@pagefind/freebsd-x64@1.4.0': + optional: true + + '@pagefind/linux-arm64@1.4.0': + optional: true + + '@pagefind/linux-x64@1.4.0': + optional: true + + '@pagefind/windows-x64@1.4.0': + optional: true + + '@rollup/pluginutils@5.3.0(rollup@4.57.1)': + dependencies: + '@types/estree': 1.0.8 + estree-walker: 2.0.2 + picomatch: 4.0.3 + optionalDependencies: + rollup: 4.57.1 + + '@rollup/rollup-android-arm-eabi@4.57.1': + optional: true + + '@rollup/rollup-android-arm64@4.57.1': + optional: true + + '@rollup/rollup-darwin-arm64@4.57.1': + optional: true + + '@rollup/rollup-darwin-x64@4.57.1': + optional: true + + '@rollup/rollup-freebsd-arm64@4.57.1': + optional: true + + '@rollup/rollup-freebsd-x64@4.57.1': + optional: true + + '@rollup/rollup-linux-arm-gnueabihf@4.57.1': + optional: true + + '@rollup/rollup-linux-arm-musleabihf@4.57.1': + optional: true + + '@rollup/rollup-linux-arm64-gnu@4.57.1': + optional: true + + '@rollup/rollup-linux-arm64-musl@4.57.1': + optional: true + + '@rollup/rollup-linux-loong64-gnu@4.57.1': + optional: true + + '@rollup/rollup-linux-loong64-musl@4.57.1': + optional: true + + '@rollup/rollup-linux-ppc64-gnu@4.57.1': + optional: true + + '@rollup/rollup-linux-ppc64-musl@4.57.1': + optional: true + + '@rollup/rollup-linux-riscv64-gnu@4.57.1': + optional: true + + '@rollup/rollup-linux-riscv64-musl@4.57.1': + optional: true + + '@rollup/rollup-linux-s390x-gnu@4.57.1': + optional: true + + '@rollup/rollup-linux-x64-gnu@4.57.1': + optional: true + + '@rollup/rollup-linux-x64-musl@4.57.1': + optional: true + + '@rollup/rollup-openbsd-x64@4.57.1': + optional: true + + '@rollup/rollup-openharmony-arm64@4.57.1': + optional: true + + '@rollup/rollup-win32-arm64-msvc@4.57.1': + optional: true + + '@rollup/rollup-win32-ia32-msvc@4.57.1': + optional: true + + '@rollup/rollup-win32-x64-gnu@4.57.1': + optional: true + + '@rollup/rollup-win32-x64-msvc@4.57.1': + optional: true + + '@shikijs/core@3.22.0': + dependencies: + '@shikijs/types': 3.22.0 + '@shikijs/vscode-textmate': 10.0.2 + '@types/hast': 3.0.4 + hast-util-to-html: 9.0.5 + + '@shikijs/engine-javascript@3.22.0': + dependencies: + '@shikijs/types': 3.22.0 + '@shikijs/vscode-textmate': 10.0.2 + oniguruma-to-es: 4.3.4 + + '@shikijs/engine-oniguruma@3.22.0': + dependencies: + '@shikijs/types': 3.22.0 + '@shikijs/vscode-textmate': 10.0.2 + + '@shikijs/langs@3.22.0': + dependencies: + '@shikijs/types': 3.22.0 + + '@shikijs/themes@3.22.0': + dependencies: + '@shikijs/types': 3.22.0 + + '@shikijs/types@3.22.0': + dependencies: + '@shikijs/vscode-textmate': 10.0.2 + '@types/hast': 3.0.4 + + '@shikijs/vscode-textmate@10.0.2': {} + + '@swc/helpers@0.5.18': + dependencies: + tslib: 2.8.1 + + '@types/debug@4.1.12': + dependencies: + '@types/ms': 2.1.0 + + '@types/estree-jsx@1.0.5': + dependencies: + '@types/estree': 1.0.8 + + '@types/estree@1.0.8': {} + + '@types/fontkit@2.0.8': + dependencies: + '@types/node': 25.2.3 + + '@types/hast@3.0.4': + dependencies: + '@types/unist': 3.0.3 + + '@types/js-yaml@4.0.9': {} + + '@types/mdast@4.0.4': + dependencies: + '@types/unist': 3.0.3 + + '@types/mdx@2.0.13': {} + + '@types/ms@2.1.0': {} + + '@types/nlcst@2.0.3': + dependencies: + '@types/unist': 3.0.3 + + '@types/node@17.0.45': {} + + '@types/node@25.2.3': + dependencies: + undici-types: 7.16.0 + + '@types/sax@1.2.7': + dependencies: + '@types/node': 17.0.45 + + '@types/unist@2.0.11': {} + + '@types/unist@3.0.3': {} + + '@ungap/structured-clone@1.3.0': {} + + acorn-jsx@5.3.2(acorn@8.15.0): + dependencies: + acorn: 8.15.0 + + acorn@8.15.0: {} + + ansi-align@3.0.1: + dependencies: + string-width: 4.2.3 + + ansi-regex@5.0.1: {} + + ansi-regex@6.2.2: {} + + ansi-styles@6.2.3: {} + + anymatch@3.1.3: + dependencies: + normalize-path: 3.0.0 + picomatch: 2.3.1 + + arg@5.0.2: {} + + argparse@2.0.1: {} + + aria-query@5.3.2: {} + + array-iterate@2.0.1: {} + + astring@1.9.0: {} + + astro-expressive-code@0.41.6(astro@5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3)): + dependencies: + astro: 5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3) + rehype-expressive-code: 0.41.6 + + astro@5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3): + dependencies: + '@astrojs/compiler': 2.13.1 + '@astrojs/internal-helpers': 0.7.3 + '@astrojs/markdown-remark': 6.3.7 + '@astrojs/telemetry': 3.3.0 + '@capsizecss/unpack': 2.4.0 + '@oslojs/encoding': 1.1.0 + '@rollup/pluginutils': 5.3.0(rollup@4.57.1) + acorn: 8.15.0 + aria-query: 5.3.2 + axobject-query: 4.1.0 + boxen: 8.0.1 + ci-info: 4.4.0 + clsx: 2.1.1 + common-ancestor-path: 1.0.1 + cookie: 1.1.1 + cssesc: 3.0.0 + debug: 4.4.3 + deterministic-object-hash: 2.0.2 + devalue: 5.6.2 + diff: 5.2.2 + dlv: 1.1.3 + dset: 3.1.4 + es-module-lexer: 1.7.0 + esbuild: 0.25.12 + estree-walker: 3.0.3 + flattie: 1.1.1 + fontace: 0.3.1 + github-slugger: 2.0.0 + html-escaper: 3.0.3 + http-cache-semantics: 4.2.0 + import-meta-resolve: 4.2.0 + js-yaml: 4.1.1 + kleur: 4.1.5 + magic-string: 0.30.21 + magicast: 0.3.5 + mrmime: 2.0.1 + neotraverse: 0.6.18 + p-limit: 6.2.0 + p-queue: 8.1.1 + package-manager-detector: 1.6.0 + picomatch: 4.0.3 + prompts: 2.4.2 + rehype: 13.0.2 + semver: 7.7.4 + shiki: 3.22.0 + smol-toml: 1.6.0 + tinyexec: 0.3.2 + tinyglobby: 0.2.15 + tsconfck: 3.1.6(typescript@5.9.3) + ultrahtml: 1.6.0 + unifont: 0.5.2 + unist-util-visit: 5.1.0 + unstorage: 1.17.4 + vfile: 6.0.3 + vite: 6.4.1(@types/node@25.2.3) + vitefu: 1.1.1(vite@6.4.1(@types/node@25.2.3)) + xxhash-wasm: 1.1.0 + yargs-parser: 21.1.1 + yocto-spinner: 0.2.3 + zod: 3.25.76 + zod-to-json-schema: 3.25.1(zod@3.25.76) + zod-to-ts: 1.2.0(typescript@5.9.3)(zod@3.25.76) + optionalDependencies: + sharp: 0.34.2 + transitivePeerDependencies: + - '@azure/app-configuration' + - '@azure/cosmos' + - '@azure/data-tables' + - '@azure/identity' + - '@azure/keyvault-secrets' + - '@azure/storage-blob' + - '@capacitor/preferences' + - '@deno/kv' + - '@netlify/blobs' + - '@planetscale/database' + - '@types/node' + - '@upstash/redis' + - '@vercel/blob' + - '@vercel/functions' + - '@vercel/kv' + - aws4fetch + - db0 + - encoding + - idb-keyval + - ioredis + - jiti + - less + - lightningcss + - rollup + - sass + - sass-embedded + - stylus + - sugarss + - supports-color + - terser + - tsx + - typescript + - uploadthing + - yaml + + axobject-query@4.1.0: {} + + bail@2.0.2: {} + + base-64@1.0.0: {} + + base64-js@1.5.1: {} + + bcp-47-match@2.0.3: {} + + bcp-47@2.1.0: + dependencies: + is-alphabetical: 2.0.1 + is-alphanumerical: 2.0.1 + is-decimal: 2.0.1 + + blob-to-buffer@1.2.9: {} + + boolbase@1.0.0: {} + + boxen@8.0.1: + dependencies: + ansi-align: 3.0.1 + camelcase: 8.0.0 + chalk: 5.6.2 + cli-boxes: 3.0.0 + string-width: 7.2.0 + type-fest: 4.41.0 + widest-line: 5.0.0 + wrap-ansi: 9.0.2 + + brotli@1.3.3: + dependencies: + base64-js: 1.5.1 + + camelcase@8.0.0: {} + + ccount@2.0.1: {} + + chalk@5.6.2: {} + + character-entities-html4@2.1.0: {} + + character-entities-legacy@3.0.0: {} + + character-entities@2.0.2: {} + + character-reference-invalid@2.0.1: {} + + chokidar@5.0.0: + dependencies: + readdirp: 5.0.0 + + ci-info@4.4.0: {} + + cli-boxes@3.0.0: {} + + clone@2.1.2: {} + + clsx@2.1.1: {} + + collapse-white-space@2.1.0: {} + + color-convert@2.0.1: + dependencies: + color-name: 1.1.4 + + color-name@1.1.4: {} + + color-string@1.9.1: + dependencies: + color-name: 1.1.4 + simple-swizzle: 0.2.4 + + color@4.2.3: + dependencies: + color-convert: 2.0.1 + color-string: 1.9.1 + + comma-separated-tokens@2.0.3: {} + + common-ancestor-path@1.0.1: {} + + cookie-es@1.2.2: {} + + cookie@1.1.1: {} + + cross-fetch@3.2.0: + dependencies: + node-fetch: 2.7.0 + transitivePeerDependencies: + - encoding + + crossws@0.3.5: + dependencies: + uncrypto: 0.1.3 + + css-selector-parser@3.3.0: {} + + css-tree@3.1.0: + dependencies: + mdn-data: 2.12.2 + source-map-js: 1.2.1 + + cssesc@3.0.0: {} + + debug@4.4.3: + dependencies: + ms: 2.1.3 + + decode-named-character-reference@1.3.0: + dependencies: + character-entities: 2.0.2 + + defu@6.1.4: {} + + dequal@2.0.3: {} + + destr@2.0.5: {} + + detect-libc@2.1.2: {} + + deterministic-object-hash@2.0.2: + dependencies: + base-64: 1.0.0 + + devalue@5.6.2: {} + + devlop@1.1.0: + dependencies: + dequal: 2.0.3 + + dfa@1.2.0: {} + + diff@5.2.2: {} + + direction@2.0.1: {} + + dlv@1.1.3: {} + + dset@3.1.4: {} + + emoji-regex@10.6.0: {} + + emoji-regex@8.0.0: {} + + entities@6.0.1: {} + + es-module-lexer@1.7.0: {} + + esast-util-from-estree@2.0.0: + dependencies: + '@types/estree-jsx': 1.0.5 + devlop: 1.1.0 + estree-util-visit: 2.0.0 + unist-util-position-from-estree: 2.0.0 + + esast-util-from-js@2.0.1: + dependencies: + '@types/estree-jsx': 1.0.5 + acorn: 8.15.0 + esast-util-from-estree: 2.0.0 + vfile-message: 4.0.3 + + esbuild@0.25.12: + optionalDependencies: + '@esbuild/aix-ppc64': 0.25.12 + '@esbuild/android-arm': 0.25.12 + '@esbuild/android-arm64': 0.25.12 + '@esbuild/android-x64': 0.25.12 + '@esbuild/darwin-arm64': 0.25.12 + '@esbuild/darwin-x64': 0.25.12 + '@esbuild/freebsd-arm64': 0.25.12 + '@esbuild/freebsd-x64': 0.25.12 + '@esbuild/linux-arm': 0.25.12 + '@esbuild/linux-arm64': 0.25.12 + '@esbuild/linux-ia32': 0.25.12 + '@esbuild/linux-loong64': 0.25.12 + '@esbuild/linux-mips64el': 0.25.12 + '@esbuild/linux-ppc64': 0.25.12 + '@esbuild/linux-riscv64': 0.25.12 + '@esbuild/linux-s390x': 0.25.12 + '@esbuild/linux-x64': 0.25.12 + '@esbuild/netbsd-arm64': 0.25.12 + '@esbuild/netbsd-x64': 0.25.12 + '@esbuild/openbsd-arm64': 0.25.12 + '@esbuild/openbsd-x64': 0.25.12 + '@esbuild/openharmony-arm64': 0.25.12 + '@esbuild/sunos-x64': 0.25.12 + '@esbuild/win32-arm64': 0.25.12 + '@esbuild/win32-ia32': 0.25.12 + '@esbuild/win32-x64': 0.25.12 + + escape-string-regexp@5.0.0: {} + + estree-util-attach-comments@3.0.0: + dependencies: + '@types/estree': 1.0.8 + + estree-util-build-jsx@3.0.1: + dependencies: + '@types/estree-jsx': 1.0.5 + devlop: 1.1.0 + estree-util-is-identifier-name: 3.0.0 + estree-walker: 3.0.3 + + estree-util-is-identifier-name@3.0.0: {} + + estree-util-scope@1.0.0: + dependencies: + '@types/estree': 1.0.8 + devlop: 1.1.0 + + estree-util-to-js@2.0.0: + dependencies: + '@types/estree-jsx': 1.0.5 + astring: 1.9.0 + source-map: 0.7.6 + + estree-util-visit@2.0.0: + dependencies: + '@types/estree-jsx': 1.0.5 + '@types/unist': 3.0.3 + + estree-walker@2.0.2: {} + + estree-walker@3.0.3: + dependencies: + '@types/estree': 1.0.8 + + eventemitter3@5.0.4: {} + + expressive-code@0.41.6: + dependencies: + '@expressive-code/core': 0.41.6 + '@expressive-code/plugin-frames': 0.41.6 + '@expressive-code/plugin-shiki': 0.41.6 + '@expressive-code/plugin-text-markers': 0.41.6 + + extend@3.0.2: {} + + fast-deep-equal@3.1.3: {} + + fdir@6.5.0(picomatch@4.0.3): + optionalDependencies: + picomatch: 4.0.3 + + flattie@1.1.1: {} + + fontace@0.3.1: + dependencies: + '@types/fontkit': 2.0.8 + fontkit: 2.0.4 + + fontkit@2.0.4: + dependencies: + '@swc/helpers': 0.5.18 + brotli: 1.3.3 + clone: 2.1.2 + dfa: 1.2.0 + fast-deep-equal: 3.1.3 + restructure: 3.0.2 + tiny-inflate: 1.0.3 + unicode-properties: 1.4.1 + unicode-trie: 2.0.0 + + fsevents@2.3.3: + optional: true + + get-east-asian-width@1.4.0: {} + + github-slugger@2.0.0: {} + + h3@1.15.5: + dependencies: + cookie-es: 1.2.2 + crossws: 0.3.5 + defu: 6.1.4 + destr: 2.0.5 + iron-webcrypto: 1.2.1 + node-mock-http: 1.0.4 + radix3: 1.1.2 + ufo: 1.6.3 + uncrypto: 0.1.3 + + hast-util-embedded@3.0.0: + dependencies: + '@types/hast': 3.0.4 + hast-util-is-element: 3.0.0 + + hast-util-format@1.1.0: + dependencies: + '@types/hast': 3.0.4 + hast-util-embedded: 3.0.0 + hast-util-minify-whitespace: 1.0.1 + hast-util-phrasing: 3.0.1 + hast-util-whitespace: 3.0.0 + html-whitespace-sensitive-tag-names: 3.0.1 + unist-util-visit-parents: 6.0.2 + + hast-util-from-html@2.0.3: + dependencies: + '@types/hast': 3.0.4 + devlop: 1.1.0 + hast-util-from-parse5: 8.0.3 + parse5: 7.3.0 + vfile: 6.0.3 + vfile-message: 4.0.3 + + hast-util-from-parse5@8.0.3: + dependencies: + '@types/hast': 3.0.4 + '@types/unist': 3.0.3 + devlop: 1.1.0 + hastscript: 9.0.1 + property-information: 7.1.0 + vfile: 6.0.3 + vfile-location: 5.0.3 + web-namespaces: 2.0.1 + + hast-util-has-property@3.0.0: + dependencies: + '@types/hast': 3.0.4 + + hast-util-is-body-ok-link@3.0.1: + dependencies: + '@types/hast': 3.0.4 + + hast-util-is-element@3.0.0: + dependencies: + '@types/hast': 3.0.4 + + hast-util-minify-whitespace@1.0.1: + dependencies: + '@types/hast': 3.0.4 + hast-util-embedded: 3.0.0 + hast-util-is-element: 3.0.0 + hast-util-whitespace: 3.0.0 + unist-util-is: 6.0.1 + + hast-util-parse-selector@4.0.0: + dependencies: + '@types/hast': 3.0.4 + + hast-util-phrasing@3.0.1: + dependencies: + '@types/hast': 3.0.4 + hast-util-embedded: 3.0.0 + hast-util-has-property: 3.0.0 + hast-util-is-body-ok-link: 3.0.1 + hast-util-is-element: 3.0.0 + + hast-util-raw@9.1.0: + dependencies: + '@types/hast': 3.0.4 + '@types/unist': 3.0.3 + '@ungap/structured-clone': 1.3.0 + hast-util-from-parse5: 8.0.3 + hast-util-to-parse5: 8.0.1 + html-void-elements: 3.0.0 + mdast-util-to-hast: 13.2.1 + parse5: 7.3.0 + unist-util-position: 5.0.0 + unist-util-visit: 5.1.0 + vfile: 6.0.3 + web-namespaces: 2.0.1 + zwitch: 2.0.4 + + hast-util-select@6.0.4: + dependencies: + '@types/hast': 3.0.4 + '@types/unist': 3.0.3 + bcp-47-match: 2.0.3 + comma-separated-tokens: 2.0.3 + css-selector-parser: 3.3.0 + devlop: 1.1.0 + direction: 2.0.1 + hast-util-has-property: 3.0.0 + hast-util-to-string: 3.0.1 + hast-util-whitespace: 3.0.0 + nth-check: 2.1.1 + property-information: 7.1.0 + space-separated-tokens: 2.0.2 + unist-util-visit: 5.1.0 + zwitch: 2.0.4 + + hast-util-to-estree@3.1.3: + dependencies: + '@types/estree': 1.0.8 + '@types/estree-jsx': 1.0.5 + '@types/hast': 3.0.4 + comma-separated-tokens: 2.0.3 + devlop: 1.1.0 + estree-util-attach-comments: 3.0.0 + estree-util-is-identifier-name: 3.0.0 + hast-util-whitespace: 3.0.0 + mdast-util-mdx-expression: 2.0.1 + mdast-util-mdx-jsx: 3.2.0 + mdast-util-mdxjs-esm: 2.0.1 + property-information: 7.1.0 + space-separated-tokens: 2.0.2 + style-to-js: 1.1.21 + unist-util-position: 5.0.0 + zwitch: 2.0.4 + transitivePeerDependencies: + - supports-color + + hast-util-to-html@9.0.5: + dependencies: + '@types/hast': 3.0.4 + '@types/unist': 3.0.3 + ccount: 2.0.1 + comma-separated-tokens: 2.0.3 + hast-util-whitespace: 3.0.0 + html-void-elements: 3.0.0 + mdast-util-to-hast: 13.2.1 + property-information: 7.1.0 + space-separated-tokens: 2.0.2 + stringify-entities: 4.0.4 + zwitch: 2.0.4 + + hast-util-to-jsx-runtime@2.3.6: + dependencies: + '@types/estree': 1.0.8 + '@types/hast': 3.0.4 + '@types/unist': 3.0.3 + comma-separated-tokens: 2.0.3 + devlop: 1.1.0 + estree-util-is-identifier-name: 3.0.0 + hast-util-whitespace: 3.0.0 + mdast-util-mdx-expression: 2.0.1 + mdast-util-mdx-jsx: 3.2.0 + mdast-util-mdxjs-esm: 2.0.1 + property-information: 7.1.0 + space-separated-tokens: 2.0.2 + style-to-js: 1.1.21 + unist-util-position: 5.0.0 + vfile-message: 4.0.3 + transitivePeerDependencies: + - supports-color + + hast-util-to-parse5@8.0.1: + dependencies: + '@types/hast': 3.0.4 + comma-separated-tokens: 2.0.3 + devlop: 1.1.0 + property-information: 7.1.0 + space-separated-tokens: 2.0.2 + web-namespaces: 2.0.1 + zwitch: 2.0.4 + + hast-util-to-string@3.0.1: + dependencies: + '@types/hast': 3.0.4 + + hast-util-to-text@4.0.2: + dependencies: + '@types/hast': 3.0.4 + '@types/unist': 3.0.3 + hast-util-is-element: 3.0.0 + unist-util-find-after: 5.0.0 + + hast-util-whitespace@3.0.0: + dependencies: + '@types/hast': 3.0.4 + + hastscript@9.0.1: + dependencies: + '@types/hast': 3.0.4 + comma-separated-tokens: 2.0.3 + hast-util-parse-selector: 4.0.0 + property-information: 7.1.0 + space-separated-tokens: 2.0.2 + + html-escaper@3.0.3: {} + + html-void-elements@3.0.0: {} + + html-whitespace-sensitive-tag-names@3.0.1: {} + + http-cache-semantics@4.2.0: {} + + i18next@23.16.8: + dependencies: + '@babel/runtime': 7.28.6 + + import-meta-resolve@4.2.0: {} + + inline-style-parser@0.2.7: {} + + iron-webcrypto@1.2.1: {} + + is-alphabetical@2.0.1: {} + + is-alphanumerical@2.0.1: + dependencies: + is-alphabetical: 2.0.1 + is-decimal: 2.0.1 + + is-arrayish@0.3.4: {} + + is-decimal@2.0.1: {} + + is-docker@3.0.0: {} + + is-fullwidth-code-point@3.0.0: {} + + is-hexadecimal@2.0.1: {} + + is-inside-container@1.0.0: + dependencies: + is-docker: 3.0.0 + + is-plain-obj@4.1.0: {} + + is-wsl@3.1.0: + dependencies: + is-inside-container: 1.0.0 + + js-yaml@4.1.1: + dependencies: + argparse: 2.0.1 + + kleur@3.0.3: {} + + kleur@4.1.5: {} + + klona@2.0.6: {} + + longest-streak@3.1.0: {} + + lru-cache@11.2.6: {} + + magic-string@0.30.21: + dependencies: + '@jridgewell/sourcemap-codec': 1.5.5 + + magicast@0.3.5: + dependencies: + '@babel/parser': 7.29.0 + '@babel/types': 7.29.0 + source-map-js: 1.2.1 + + markdown-extensions@2.0.0: {} + + markdown-table@3.0.4: {} + + mdast-util-definitions@6.0.0: + dependencies: + '@types/mdast': 4.0.4 + '@types/unist': 3.0.3 + unist-util-visit: 5.1.0 + + mdast-util-directive@3.1.0: + dependencies: + '@types/mdast': 4.0.4 + '@types/unist': 3.0.3 + ccount: 2.0.1 + devlop: 1.1.0 + mdast-util-from-markdown: 2.0.2 + mdast-util-to-markdown: 2.1.2 + parse-entities: 4.0.2 + stringify-entities: 4.0.4 + unist-util-visit-parents: 6.0.2 + transitivePeerDependencies: + - supports-color + + mdast-util-find-and-replace@3.0.2: + dependencies: + '@types/mdast': 4.0.4 + escape-string-regexp: 5.0.0 + unist-util-is: 6.0.1 + unist-util-visit-parents: 6.0.2 + + mdast-util-from-markdown@2.0.2: + dependencies: + '@types/mdast': 4.0.4 + '@types/unist': 3.0.3 + decode-named-character-reference: 1.3.0 + devlop: 1.1.0 + mdast-util-to-string: 4.0.0 + micromark: 4.0.2 + micromark-util-decode-numeric-character-reference: 2.0.2 + micromark-util-decode-string: 2.0.1 + micromark-util-normalize-identifier: 2.0.1 + micromark-util-symbol: 2.0.1 + micromark-util-types: 2.0.2 + unist-util-stringify-position: 4.0.0 + transitivePeerDependencies: + - supports-color + + mdast-util-gfm-autolink-literal@2.0.1: + dependencies: + '@types/mdast': 4.0.4 + ccount: 2.0.1 + devlop: 1.1.0 + mdast-util-find-and-replace: 3.0.2 + micromark-util-character: 2.1.1 + + mdast-util-gfm-footnote@2.1.0: + dependencies: + '@types/mdast': 4.0.4 + devlop: 1.1.0 + mdast-util-from-markdown: 2.0.2 + mdast-util-to-markdown: 2.1.2 + micromark-util-normalize-identifier: 2.0.1 + transitivePeerDependencies: + - supports-color + + mdast-util-gfm-strikethrough@2.0.0: + dependencies: + '@types/mdast': 4.0.4 + mdast-util-from-markdown: 2.0.2 + mdast-util-to-markdown: 2.1.2 + transitivePeerDependencies: + - supports-color + + mdast-util-gfm-table@2.0.0: + dependencies: + '@types/mdast': 4.0.4 + devlop: 1.1.0 + markdown-table: 3.0.4 + mdast-util-from-markdown: 2.0.2 + mdast-util-to-markdown: 2.1.2 + transitivePeerDependencies: + - supports-color + + mdast-util-gfm-task-list-item@2.0.0: + dependencies: + '@types/mdast': 4.0.4 + devlop: 1.1.0 + mdast-util-from-markdown: 2.0.2 + mdast-util-to-markdown: 2.1.2 + transitivePeerDependencies: + - supports-color + + mdast-util-gfm@3.1.0: + dependencies: + mdast-util-from-markdown: 2.0.2 + mdast-util-gfm-autolink-literal: 2.0.1 + mdast-util-gfm-footnote: 2.1.0 + mdast-util-gfm-strikethrough: 2.0.0 + mdast-util-gfm-table: 2.0.0 + mdast-util-gfm-task-list-item: 2.0.0 + mdast-util-to-markdown: 2.1.2 + transitivePeerDependencies: + - supports-color + + mdast-util-mdx-expression@2.0.1: + dependencies: + '@types/estree-jsx': 1.0.5 + '@types/hast': 3.0.4 + '@types/mdast': 4.0.4 + devlop: 1.1.0 + mdast-util-from-markdown: 2.0.2 + mdast-util-to-markdown: 2.1.2 + transitivePeerDependencies: + - supports-color + + mdast-util-mdx-jsx@3.2.0: + dependencies: + '@types/estree-jsx': 1.0.5 + '@types/hast': 3.0.4 + '@types/mdast': 4.0.4 + '@types/unist': 3.0.3 + ccount: 2.0.1 + devlop: 1.1.0 + mdast-util-from-markdown: 2.0.2 + mdast-util-to-markdown: 2.1.2 + parse-entities: 4.0.2 + stringify-entities: 4.0.4 + unist-util-stringify-position: 4.0.0 + vfile-message: 4.0.3 + transitivePeerDependencies: + - supports-color + + mdast-util-mdx@3.0.0: + dependencies: + mdast-util-from-markdown: 2.0.2 + mdast-util-mdx-expression: 2.0.1 + mdast-util-mdx-jsx: 3.2.0 + mdast-util-mdxjs-esm: 2.0.1 + mdast-util-to-markdown: 2.1.2 + transitivePeerDependencies: + - supports-color + + mdast-util-mdxjs-esm@2.0.1: + dependencies: + '@types/estree-jsx': 1.0.5 + '@types/hast': 3.0.4 + '@types/mdast': 4.0.4 + devlop: 1.1.0 + mdast-util-from-markdown: 2.0.2 + mdast-util-to-markdown: 2.1.2 + transitivePeerDependencies: + - supports-color + + mdast-util-phrasing@4.1.0: + dependencies: + '@types/mdast': 4.0.4 + unist-util-is: 6.0.1 + + mdast-util-to-hast@13.2.1: + dependencies: + '@types/hast': 3.0.4 + '@types/mdast': 4.0.4 + '@ungap/structured-clone': 1.3.0 + devlop: 1.1.0 + micromark-util-sanitize-uri: 2.0.1 + trim-lines: 3.0.1 + unist-util-position: 5.0.0 + unist-util-visit: 5.1.0 + vfile: 6.0.3 + + mdast-util-to-markdown@2.1.2: + dependencies: + '@types/mdast': 4.0.4 + '@types/unist': 3.0.3 + longest-streak: 3.1.0 + mdast-util-phrasing: 4.1.0 + mdast-util-to-string: 4.0.0 + micromark-util-classify-character: 2.0.1 + micromark-util-decode-string: 2.0.1 + unist-util-visit: 5.1.0 + zwitch: 2.0.4 + + mdast-util-to-string@4.0.0: + dependencies: + '@types/mdast': 4.0.4 + + mdn-data@2.12.2: {} + + micromark-core-commonmark@2.0.3: + dependencies: + decode-named-character-reference: 1.3.0 + devlop: 1.1.0 + micromark-factory-destination: 2.0.1 + micromark-factory-label: 2.0.1 + micromark-factory-space: 2.0.1 + micromark-factory-title: 2.0.1 + micromark-factory-whitespace: 2.0.1 + micromark-util-character: 2.1.1 + micromark-util-chunked: 2.0.1 + micromark-util-classify-character: 2.0.1 + micromark-util-html-tag-name: 2.0.1 + micromark-util-normalize-identifier: 2.0.1 + micromark-util-resolve-all: 2.0.1 + micromark-util-subtokenize: 2.1.0 + micromark-util-symbol: 2.0.1 + micromark-util-types: 2.0.2 + + micromark-extension-directive@3.0.2: + dependencies: + devlop: 1.1.0 + micromark-factory-space: 2.0.1 + micromark-factory-whitespace: 2.0.1 + micromark-util-character: 2.1.1 + micromark-util-symbol: 2.0.1 + micromark-util-types: 2.0.2 + parse-entities: 4.0.2 + + micromark-extension-gfm-autolink-literal@2.1.0: + dependencies: + micromark-util-character: 2.1.1 + micromark-util-sanitize-uri: 2.0.1 + micromark-util-symbol: 2.0.1 + micromark-util-types: 2.0.2 + + micromark-extension-gfm-footnote@2.1.0: + dependencies: + devlop: 1.1.0 + micromark-core-commonmark: 2.0.3 + micromark-factory-space: 2.0.1 + micromark-util-character: 2.1.1 + micromark-util-normalize-identifier: 2.0.1 + micromark-util-sanitize-uri: 2.0.1 + micromark-util-symbol: 2.0.1 + micromark-util-types: 2.0.2 + + micromark-extension-gfm-strikethrough@2.1.0: + dependencies: + devlop: 1.1.0 + micromark-util-chunked: 2.0.1 + micromark-util-classify-character: 2.0.1 + micromark-util-resolve-all: 2.0.1 + micromark-util-symbol: 2.0.1 + micromark-util-types: 2.0.2 + + micromark-extension-gfm-table@2.1.1: + dependencies: + devlop: 1.1.0 + micromark-factory-space: 2.0.1 + micromark-util-character: 2.1.1 + micromark-util-symbol: 2.0.1 + micromark-util-types: 2.0.2 + + micromark-extension-gfm-tagfilter@2.0.0: + dependencies: + micromark-util-types: 2.0.2 + + micromark-extension-gfm-task-list-item@2.1.0: + dependencies: + devlop: 1.1.0 + micromark-factory-space: 2.0.1 + micromark-util-character: 2.1.1 + micromark-util-symbol: 2.0.1 + micromark-util-types: 2.0.2 + + micromark-extension-gfm@3.0.0: + dependencies: + micromark-extension-gfm-autolink-literal: 2.1.0 + micromark-extension-gfm-footnote: 2.1.0 + micromark-extension-gfm-strikethrough: 2.1.0 + micromark-extension-gfm-table: 2.1.1 + micromark-extension-gfm-tagfilter: 2.0.0 + micromark-extension-gfm-task-list-item: 2.1.0 + micromark-util-combine-extensions: 2.0.1 + micromark-util-types: 2.0.2 + + micromark-extension-mdx-expression@3.0.1: + dependencies: + '@types/estree': 1.0.8 + devlop: 1.1.0 + micromark-factory-mdx-expression: 2.0.3 + micromark-factory-space: 2.0.1 + micromark-util-character: 2.1.1 + micromark-util-events-to-acorn: 2.0.3 + micromark-util-symbol: 2.0.1 + micromark-util-types: 2.0.2 + + micromark-extension-mdx-jsx@3.0.2: + dependencies: + '@types/estree': 1.0.8 + devlop: 1.1.0 + estree-util-is-identifier-name: 3.0.0 + micromark-factory-mdx-expression: 2.0.3 + micromark-factory-space: 2.0.1 + micromark-util-character: 2.1.1 + micromark-util-events-to-acorn: 2.0.3 + micromark-util-symbol: 2.0.1 + micromark-util-types: 2.0.2 + vfile-message: 4.0.3 + + micromark-extension-mdx-md@2.0.0: + dependencies: + micromark-util-types: 2.0.2 + + micromark-extension-mdxjs-esm@3.0.0: + dependencies: + '@types/estree': 1.0.8 + devlop: 1.1.0 + micromark-core-commonmark: 2.0.3 + micromark-util-character: 2.1.1 + micromark-util-events-to-acorn: 2.0.3 + micromark-util-symbol: 2.0.1 + micromark-util-types: 2.0.2 + unist-util-position-from-estree: 2.0.0 + vfile-message: 4.0.3 + + micromark-extension-mdxjs@3.0.0: + dependencies: + acorn: 8.15.0 + acorn-jsx: 5.3.2(acorn@8.15.0) + micromark-extension-mdx-expression: 3.0.1 + micromark-extension-mdx-jsx: 3.0.2 + micromark-extension-mdx-md: 2.0.0 + micromark-extension-mdxjs-esm: 3.0.0 + micromark-util-combine-extensions: 2.0.1 + micromark-util-types: 2.0.2 + + micromark-factory-destination@2.0.1: + dependencies: + micromark-util-character: 2.1.1 + micromark-util-symbol: 2.0.1 + micromark-util-types: 2.0.2 + + micromark-factory-label@2.0.1: + dependencies: + devlop: 1.1.0 + micromark-util-character: 2.1.1 + micromark-util-symbol: 2.0.1 + micromark-util-types: 2.0.2 + + micromark-factory-mdx-expression@2.0.3: + dependencies: + '@types/estree': 1.0.8 + devlop: 1.1.0 + micromark-factory-space: 2.0.1 + micromark-util-character: 2.1.1 + micromark-util-events-to-acorn: 2.0.3 + micromark-util-symbol: 2.0.1 + micromark-util-types: 2.0.2 + unist-util-position-from-estree: 2.0.0 + vfile-message: 4.0.3 + + micromark-factory-space@2.0.1: + dependencies: + micromark-util-character: 2.1.1 + micromark-util-types: 2.0.2 + + micromark-factory-title@2.0.1: + dependencies: + micromark-factory-space: 2.0.1 + micromark-util-character: 2.1.1 + micromark-util-symbol: 2.0.1 + micromark-util-types: 2.0.2 + + micromark-factory-whitespace@2.0.1: + dependencies: + micromark-factory-space: 2.0.1 + micromark-util-character: 2.1.1 + micromark-util-symbol: 2.0.1 + micromark-util-types: 2.0.2 + + micromark-util-character@2.1.1: + dependencies: + micromark-util-symbol: 2.0.1 + micromark-util-types: 2.0.2 + + micromark-util-chunked@2.0.1: + dependencies: + micromark-util-symbol: 2.0.1 + + micromark-util-classify-character@2.0.1: + dependencies: + micromark-util-character: 2.1.1 + micromark-util-symbol: 2.0.1 + micromark-util-types: 2.0.2 + + micromark-util-combine-extensions@2.0.1: + dependencies: + micromark-util-chunked: 2.0.1 + micromark-util-types: 2.0.2 + + micromark-util-decode-numeric-character-reference@2.0.2: + dependencies: + micromark-util-symbol: 2.0.1 + + micromark-util-decode-string@2.0.1: + dependencies: + decode-named-character-reference: 1.3.0 + micromark-util-character: 2.1.1 + micromark-util-decode-numeric-character-reference: 2.0.2 + micromark-util-symbol: 2.0.1 + + micromark-util-encode@2.0.1: {} + + micromark-util-events-to-acorn@2.0.3: + dependencies: + '@types/estree': 1.0.8 + '@types/unist': 3.0.3 + devlop: 1.1.0 + estree-util-visit: 2.0.0 + micromark-util-symbol: 2.0.1 + micromark-util-types: 2.0.2 + vfile-message: 4.0.3 + + micromark-util-html-tag-name@2.0.1: {} + + micromark-util-normalize-identifier@2.0.1: + dependencies: + micromark-util-symbol: 2.0.1 + + micromark-util-resolve-all@2.0.1: + dependencies: + micromark-util-types: 2.0.2 + + micromark-util-sanitize-uri@2.0.1: + dependencies: + micromark-util-character: 2.1.1 + micromark-util-encode: 2.0.1 + micromark-util-symbol: 2.0.1 + + micromark-util-subtokenize@2.1.0: + dependencies: + devlop: 1.1.0 + micromark-util-chunked: 2.0.1 + micromark-util-symbol: 2.0.1 + micromark-util-types: 2.0.2 + + micromark-util-symbol@2.0.1: {} + + micromark-util-types@2.0.2: {} + + micromark@4.0.2: + dependencies: + '@types/debug': 4.1.12 + debug: 4.4.3 + decode-named-character-reference: 1.3.0 + devlop: 1.1.0 + micromark-core-commonmark: 2.0.3 + micromark-factory-space: 2.0.1 + micromark-util-character: 2.1.1 + micromark-util-chunked: 2.0.1 + micromark-util-combine-extensions: 2.0.1 + micromark-util-decode-numeric-character-reference: 2.0.2 + micromark-util-encode: 2.0.1 + micromark-util-normalize-identifier: 2.0.1 + micromark-util-resolve-all: 2.0.1 + micromark-util-sanitize-uri: 2.0.1 + micromark-util-subtokenize: 2.1.0 + micromark-util-symbol: 2.0.1 + micromark-util-types: 2.0.2 + transitivePeerDependencies: + - supports-color + + mrmime@2.0.1: {} + + ms@2.1.3: {} + + nanoid@3.3.11: {} + + neotraverse@0.6.18: {} + + nlcst-to-string@4.0.0: + dependencies: + '@types/nlcst': 2.0.3 + + node-fetch-native@1.6.7: {} + + node-fetch@2.7.0: + dependencies: + whatwg-url: 5.0.0 + + node-mock-http@1.0.4: {} + + normalize-path@3.0.0: {} + + nth-check@2.1.1: + dependencies: + boolbase: 1.0.0 + + ofetch@1.5.1: + dependencies: + destr: 2.0.5 + node-fetch-native: 1.6.7 + ufo: 1.6.3 + + ohash@2.0.11: {} + + oniguruma-parser@0.12.1: {} + + oniguruma-to-es@4.3.4: + dependencies: + oniguruma-parser: 0.12.1 + regex: 6.1.0 + regex-recursion: 6.0.2 + + p-limit@6.2.0: + dependencies: + yocto-queue: 1.2.2 + + p-queue@8.1.1: + dependencies: + eventemitter3: 5.0.4 + p-timeout: 6.1.4 + + p-timeout@6.1.4: {} + + package-manager-detector@1.6.0: {} + + pagefind@1.4.0: + optionalDependencies: + '@pagefind/darwin-arm64': 1.4.0 + '@pagefind/darwin-x64': 1.4.0 + '@pagefind/freebsd-x64': 1.4.0 + '@pagefind/linux-arm64': 1.4.0 + '@pagefind/linux-x64': 1.4.0 + '@pagefind/windows-x64': 1.4.0 + + pako@0.2.9: {} + + parse-entities@4.0.2: + dependencies: + '@types/unist': 2.0.11 + character-entities-legacy: 3.0.0 + character-reference-invalid: 2.0.1 + decode-named-character-reference: 1.3.0 + is-alphanumerical: 2.0.1 + is-decimal: 2.0.1 + is-hexadecimal: 2.0.1 + + parse-latin@7.0.0: + dependencies: + '@types/nlcst': 2.0.3 + '@types/unist': 3.0.3 + nlcst-to-string: 4.0.0 + unist-util-modify-children: 4.0.0 + unist-util-visit-children: 3.0.0 + vfile: 6.0.3 + + parse5@7.3.0: + dependencies: + entities: 6.0.1 + + piccolore@0.1.3: {} + + picocolors@1.1.1: {} + + picomatch@2.3.1: {} + + picomatch@4.0.3: {} + + postcss-nested@6.2.0(postcss@8.5.6): + dependencies: + postcss: 8.5.6 + postcss-selector-parser: 6.1.2 + + postcss-selector-parser@6.1.2: + dependencies: + cssesc: 3.0.0 + util-deprecate: 1.0.2 + + postcss@8.5.6: + dependencies: + nanoid: 3.3.11 + picocolors: 1.1.1 + source-map-js: 1.2.1 + + prismjs@1.30.0: {} + + prompts@2.4.2: + dependencies: + kleur: 3.0.3 + sisteransi: 1.0.5 + + property-information@7.1.0: {} + + radix3@1.1.2: {} + + readdirp@5.0.0: {} + + recma-build-jsx@1.0.0: + dependencies: + '@types/estree': 1.0.8 + estree-util-build-jsx: 3.0.1 + vfile: 6.0.3 + + recma-jsx@1.0.1(acorn@8.15.0): + dependencies: + acorn: 8.15.0 + acorn-jsx: 5.3.2(acorn@8.15.0) + estree-util-to-js: 2.0.0 + recma-parse: 1.0.0 + recma-stringify: 1.0.0 + unified: 11.0.5 + + recma-parse@1.0.0: + dependencies: + '@types/estree': 1.0.8 + esast-util-from-js: 2.0.1 + unified: 11.0.5 + vfile: 6.0.3 + + recma-stringify@1.0.0: + dependencies: + '@types/estree': 1.0.8 + estree-util-to-js: 2.0.0 + unified: 11.0.5 + vfile: 6.0.3 + + regex-recursion@6.0.2: + dependencies: + regex-utilities: 2.3.0 + + regex-utilities@2.3.0: {} + + regex@6.1.0: + dependencies: + regex-utilities: 2.3.0 + + rehype-expressive-code@0.41.6: + dependencies: + expressive-code: 0.41.6 + + rehype-format@5.0.1: + dependencies: + '@types/hast': 3.0.4 + hast-util-format: 1.1.0 + + rehype-parse@9.0.1: + dependencies: + '@types/hast': 3.0.4 + hast-util-from-html: 2.0.3 + unified: 11.0.5 + + rehype-raw@7.0.0: + dependencies: + '@types/hast': 3.0.4 + hast-util-raw: 9.1.0 + vfile: 6.0.3 + + rehype-recma@1.0.0: + dependencies: + '@types/estree': 1.0.8 + '@types/hast': 3.0.4 + hast-util-to-estree: 3.1.3 + transitivePeerDependencies: + - supports-color + + rehype-stringify@10.0.1: + dependencies: + '@types/hast': 3.0.4 + hast-util-to-html: 9.0.5 + unified: 11.0.5 + + rehype@13.0.2: + dependencies: + '@types/hast': 3.0.4 + rehype-parse: 9.0.1 + rehype-stringify: 10.0.1 + unified: 11.0.5 + + remark-directive@3.0.1: + dependencies: + '@types/mdast': 4.0.4 + mdast-util-directive: 3.1.0 + micromark-extension-directive: 3.0.2 + unified: 11.0.5 + transitivePeerDependencies: + - supports-color + + remark-gfm@4.0.1: + dependencies: + '@types/mdast': 4.0.4 + mdast-util-gfm: 3.1.0 + micromark-extension-gfm: 3.0.0 + remark-parse: 11.0.0 + remark-stringify: 11.0.0 + unified: 11.0.5 + transitivePeerDependencies: + - supports-color + + remark-mdx@3.1.1: + dependencies: + mdast-util-mdx: 3.0.0 + micromark-extension-mdxjs: 3.0.0 + transitivePeerDependencies: + - supports-color + + remark-parse@11.0.0: + dependencies: + '@types/mdast': 4.0.4 + mdast-util-from-markdown: 2.0.2 + micromark-util-types: 2.0.2 + unified: 11.0.5 + transitivePeerDependencies: + - supports-color + + remark-rehype@11.1.2: + dependencies: + '@types/hast': 3.0.4 + '@types/mdast': 4.0.4 + mdast-util-to-hast: 13.2.1 + unified: 11.0.5 + vfile: 6.0.3 + + remark-smartypants@3.0.2: + dependencies: + retext: 9.0.0 + retext-smartypants: 6.2.0 + unified: 11.0.5 + unist-util-visit: 5.1.0 + + remark-stringify@11.0.0: + dependencies: + '@types/mdast': 4.0.4 + mdast-util-to-markdown: 2.1.2 + unified: 11.0.5 + + restructure@3.0.2: {} + + retext-latin@4.0.0: + dependencies: + '@types/nlcst': 2.0.3 + parse-latin: 7.0.0 + unified: 11.0.5 + + retext-smartypants@6.2.0: + dependencies: + '@types/nlcst': 2.0.3 + nlcst-to-string: 4.0.0 + unist-util-visit: 5.1.0 + + retext-stringify@4.0.0: + dependencies: + '@types/nlcst': 2.0.3 + nlcst-to-string: 4.0.0 + unified: 11.0.5 + + retext@9.0.0: + dependencies: + '@types/nlcst': 2.0.3 + retext-latin: 4.0.0 + retext-stringify: 4.0.0 + unified: 11.0.5 + + rollup@4.57.1: + dependencies: + '@types/estree': 1.0.8 + optionalDependencies: + '@rollup/rollup-android-arm-eabi': 4.57.1 + '@rollup/rollup-android-arm64': 4.57.1 + '@rollup/rollup-darwin-arm64': 4.57.1 + '@rollup/rollup-darwin-x64': 4.57.1 + '@rollup/rollup-freebsd-arm64': 4.57.1 + '@rollup/rollup-freebsd-x64': 4.57.1 + '@rollup/rollup-linux-arm-gnueabihf': 4.57.1 + '@rollup/rollup-linux-arm-musleabihf': 4.57.1 + '@rollup/rollup-linux-arm64-gnu': 4.57.1 + '@rollup/rollup-linux-arm64-musl': 4.57.1 + '@rollup/rollup-linux-loong64-gnu': 4.57.1 + '@rollup/rollup-linux-loong64-musl': 4.57.1 + '@rollup/rollup-linux-ppc64-gnu': 4.57.1 + '@rollup/rollup-linux-ppc64-musl': 4.57.1 + '@rollup/rollup-linux-riscv64-gnu': 4.57.1 + '@rollup/rollup-linux-riscv64-musl': 4.57.1 + '@rollup/rollup-linux-s390x-gnu': 4.57.1 + '@rollup/rollup-linux-x64-gnu': 4.57.1 + '@rollup/rollup-linux-x64-musl': 4.57.1 + '@rollup/rollup-openbsd-x64': 4.57.1 + '@rollup/rollup-openharmony-arm64': 4.57.1 + '@rollup/rollup-win32-arm64-msvc': 4.57.1 + '@rollup/rollup-win32-ia32-msvc': 4.57.1 + '@rollup/rollup-win32-x64-gnu': 4.57.1 + '@rollup/rollup-win32-x64-msvc': 4.57.1 + fsevents: 2.3.3 + + sax@1.4.4: {} + + semver@7.7.4: {} + + sharp@0.34.2: + dependencies: + color: 4.2.3 + detect-libc: 2.1.2 + semver: 7.7.4 + optionalDependencies: + '@img/sharp-darwin-arm64': 0.34.2 + '@img/sharp-darwin-x64': 0.34.2 + '@img/sharp-libvips-darwin-arm64': 1.1.0 + '@img/sharp-libvips-darwin-x64': 1.1.0 + '@img/sharp-libvips-linux-arm': 1.1.0 + '@img/sharp-libvips-linux-arm64': 1.1.0 + '@img/sharp-libvips-linux-ppc64': 1.1.0 + '@img/sharp-libvips-linux-s390x': 1.1.0 + '@img/sharp-libvips-linux-x64': 1.1.0 + '@img/sharp-libvips-linuxmusl-arm64': 1.1.0 + '@img/sharp-libvips-linuxmusl-x64': 1.1.0 + '@img/sharp-linux-arm': 0.34.2 + '@img/sharp-linux-arm64': 0.34.2 + '@img/sharp-linux-s390x': 0.34.2 + '@img/sharp-linux-x64': 0.34.2 + '@img/sharp-linuxmusl-arm64': 0.34.2 + '@img/sharp-linuxmusl-x64': 0.34.2 + '@img/sharp-wasm32': 0.34.2 + '@img/sharp-win32-arm64': 0.34.2 + '@img/sharp-win32-ia32': 0.34.2 + '@img/sharp-win32-x64': 0.34.2 + + shiki@3.22.0: + dependencies: + '@shikijs/core': 3.22.0 + '@shikijs/engine-javascript': 3.22.0 + '@shikijs/engine-oniguruma': 3.22.0 + '@shikijs/langs': 3.22.0 + '@shikijs/themes': 3.22.0 + '@shikijs/types': 3.22.0 + '@shikijs/vscode-textmate': 10.0.2 + '@types/hast': 3.0.4 + + simple-swizzle@0.2.4: + dependencies: + is-arrayish: 0.3.4 + + sisteransi@1.0.5: {} + + sitemap@8.0.2: + dependencies: + '@types/node': 17.0.45 + '@types/sax': 1.2.7 + arg: 5.0.2 + sax: 1.4.4 + + smol-toml@1.6.0: {} + + source-map-js@1.2.1: {} + + source-map@0.7.6: {} + + space-separated-tokens@2.0.2: {} + + starlight-changelogs@0.1.1(@astrojs/starlight@0.36.0(astro@5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3)))(astro@5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3)): + dependencies: + '@ascorbic/loader-utils': 1.0.2(astro@5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3)) + '@astrojs/starlight': 0.36.0(astro@5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3)) + github-slugger: 2.0.0 + mdast-util-from-markdown: 2.0.2 + mdast-util-to-markdown: 2.1.2 + mdast-util-to-string: 4.0.0 + unist-util-visit: 5.1.0 + transitivePeerDependencies: + - astro + - supports-color + + starlight-github-alerts@0.1.0(@astrojs/starlight@0.36.0(astro@5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3))): + dependencies: + '@astrojs/starlight': 0.36.0(astro@5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3)) + unist-util-visit: 5.1.0 + + starlight-scroll-to-top@0.3.1(@astrojs/starlight@0.36.0(astro@5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3))): + dependencies: + '@astrojs/starlight': 0.36.0(astro@5.14.1(@types/node@25.2.3)(rollup@4.57.1)(typescript@5.9.3)) + + stream-replace-string@2.0.0: {} + + string-width@4.2.3: + dependencies: + emoji-regex: 8.0.0 + is-fullwidth-code-point: 3.0.0 + strip-ansi: 6.0.1 + + string-width@7.2.0: + dependencies: + emoji-regex: 10.6.0 + get-east-asian-width: 1.4.0 + strip-ansi: 7.1.2 + + stringify-entities@4.0.4: + dependencies: + character-entities-html4: 2.1.0 + character-entities-legacy: 3.0.0 + + strip-ansi@6.0.1: + dependencies: + ansi-regex: 5.0.1 + + strip-ansi@7.1.2: + dependencies: + ansi-regex: 6.2.2 + + style-to-js@1.1.21: + dependencies: + style-to-object: 1.0.14 + + style-to-object@1.0.14: + dependencies: + inline-style-parser: 0.2.7 + + tiny-inflate@1.0.3: {} + + tinyexec@0.3.2: {} + + tinyglobby@0.2.15: + dependencies: + fdir: 6.5.0(picomatch@4.0.3) + picomatch: 4.0.3 + + tr46@0.0.3: {} + + trim-lines@3.0.1: {} + + trough@2.2.0: {} + + tsconfck@3.1.6(typescript@5.9.3): + optionalDependencies: + typescript: 5.9.3 + + tslib@2.8.1: {} + + type-fest@4.41.0: {} + + typescript@5.9.3: {} + + ufo@1.6.3: {} + + ultrahtml@1.6.0: {} + + uncrypto@0.1.3: {} + + undici-types@7.16.0: {} + + unicode-properties@1.4.1: + dependencies: + base64-js: 1.5.1 + unicode-trie: 2.0.0 + + unicode-trie@2.0.0: + dependencies: + pako: 0.2.9 + tiny-inflate: 1.0.3 + + unified@11.0.5: + dependencies: + '@types/unist': 3.0.3 + bail: 2.0.2 + devlop: 1.1.0 + extend: 3.0.2 + is-plain-obj: 4.1.0 + trough: 2.2.0 + vfile: 6.0.3 + + unifont@0.5.2: + dependencies: + css-tree: 3.1.0 + ofetch: 1.5.1 + ohash: 2.0.11 + + unist-util-find-after@5.0.0: + dependencies: + '@types/unist': 3.0.3 + unist-util-is: 6.0.1 + + unist-util-is@6.0.1: + dependencies: + '@types/unist': 3.0.3 + + unist-util-modify-children@4.0.0: + dependencies: + '@types/unist': 3.0.3 + array-iterate: 2.0.1 + + unist-util-position-from-estree@2.0.0: + dependencies: + '@types/unist': 3.0.3 + + unist-util-position@5.0.0: + dependencies: + '@types/unist': 3.0.3 + + unist-util-remove-position@5.0.0: + dependencies: + '@types/unist': 3.0.3 + unist-util-visit: 5.1.0 + + unist-util-stringify-position@4.0.0: + dependencies: + '@types/unist': 3.0.3 + + unist-util-visit-children@3.0.0: + dependencies: + '@types/unist': 3.0.3 + + unist-util-visit-parents@6.0.2: + dependencies: + '@types/unist': 3.0.3 + unist-util-is: 6.0.1 + + unist-util-visit@5.1.0: + dependencies: + '@types/unist': 3.0.3 + unist-util-is: 6.0.1 + unist-util-visit-parents: 6.0.2 + + unstorage@1.17.4: + dependencies: + anymatch: 3.1.3 + chokidar: 5.0.0 + destr: 2.0.5 + h3: 1.15.5 + lru-cache: 11.2.6 + node-fetch-native: 1.6.7 + ofetch: 1.5.1 + ufo: 1.6.3 + + util-deprecate@1.0.2: {} + + vfile-location@5.0.3: + dependencies: + '@types/unist': 3.0.3 + vfile: 6.0.3 + + vfile-message@4.0.3: + dependencies: + '@types/unist': 3.0.3 + unist-util-stringify-position: 4.0.0 + + vfile@6.0.3: + dependencies: + '@types/unist': 3.0.3 + vfile-message: 4.0.3 + + vite@6.4.1(@types/node@25.2.3): + dependencies: + esbuild: 0.25.12 + fdir: 6.5.0(picomatch@4.0.3) + picomatch: 4.0.3 + postcss: 8.5.6 + rollup: 4.57.1 + tinyglobby: 0.2.15 + optionalDependencies: + '@types/node': 25.2.3 + fsevents: 2.3.3 + + vitefu@1.1.1(vite@6.4.1(@types/node@25.2.3)): + optionalDependencies: + vite: 6.4.1(@types/node@25.2.3) + + web-namespaces@2.0.1: {} + + webidl-conversions@3.0.1: {} + + whatwg-url@5.0.0: + dependencies: + tr46: 0.0.3 + webidl-conversions: 3.0.1 + + which-pm-runs@1.1.0: {} + + widest-line@5.0.0: + dependencies: + string-width: 7.2.0 + + wrap-ansi@9.0.2: + dependencies: + ansi-styles: 6.2.3 + string-width: 7.2.0 + strip-ansi: 7.1.2 + + xxhash-wasm@1.1.0: {} + + yargs-parser@21.1.1: {} + + yocto-queue@1.2.2: {} + + yocto-spinner@0.2.3: + dependencies: + yoctocolors: 2.1.2 + + yoctocolors@2.1.2: {} + + zod-to-json-schema@3.25.1(zod@3.25.76): + dependencies: + zod: 3.25.76 + + zod-to-ts@1.2.0(typescript@5.9.3)(zod@3.25.76): + dependencies: + typescript: 5.9.3 + zod: 3.25.76 + + zod@3.25.76: {} + + zwitch@2.0.4: {} diff --git a/website/public/fairspec-logo.png b/website/public/fairspec-logo.png new file mode 100644 index 0000000..c41ddc1 Binary files /dev/null and b/website/public/fairspec-logo.png differ diff --git a/website/styles/general.css b/website/styles/general.css new file mode 100644 index 0000000..c72bba5 --- /dev/null +++ b/website/styles/general.css @@ -0,0 +1,49 @@ +:root { + --purple-hsl: 209, 60%, 60%; + --overlay-blurple: hsla(var(--purple-hsl), 0.2); + --scrollbar-color: #ddd; +} + +:root[data-theme="light"] { + --purple-hsl: 209, 85%, 65%; + --sl-color-text-accent: #2c7cfd; + --sl-color-banner-bg: #2c7cfd; +} + +:root[data-theme="dark"] { + --sl-color-text-accent: #2c7cfd; + --sl-color-banner-bg: #2c7cfd; +} + +/* Title */ + +.site-title { + font-size: var(--sl-text-xl); + gap: 0.75rem; +} + +* { + scrollbar-color: var(--scrollbar-color) transparent; + scrollbar-gutter: stable; +} + +/* Markdown heading links. */ + +.sl-markdown-content :is(h1, h2, h3, h4, h5, h6) > a { + color: var(--sl-color-white); + text-decoration: none; + &:hover { + text-decoration: underline; + } +} + +article.card { + border-radius: 15px; +} + +/* Scroll-to-top button */ + +#scroll-to-top-button { + accent-color: var(--sl-color-text-accent); + background-color: var(--sl-color-text-accent); +} diff --git a/website/tsconfig.json b/website/tsconfig.json new file mode 100644 index 0000000..8ae0575 --- /dev/null +++ b/website/tsconfig.json @@ -0,0 +1,5 @@ +{ + "extends": "astro/tsconfigs/strict", + "include": [".astro/types.d.ts", "**/*"], + "exclude": ["**/build/*"] +} diff --git a/website/wrangler.jsonc b/website/wrangler.jsonc new file mode 100644 index 0000000..296ee87 --- /dev/null +++ b/website/wrangler.jsonc @@ -0,0 +1,8 @@ +{ + "name": "fairspec-python", + "preview_urls": true, + "compatibility_date": "2025-08-15", + "assets": { + "directory": "./build" + } +}