Skip to content
Merged

v4.0 #83

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.9, "3.10", 3.11, 3.12]
python-version: ["3.10", 3.11, 3.12, 3.13, 3.14]

steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v6
with:
submodules: recursive
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand Down
13 changes: 13 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
Changelog
=========

Unreleased
----------

- removed dependency on `attrs`

Backwards incompatibility
~~~~~~~~~~~~~~~~~~~~~~~~~

- Since formerly `@attr.s` decorated classes are now dataclasses, any class inheriting from these
will be broken.
- Some functions have been moved to different modules, so imports may be broken.


Version 3.7.0
-------------

Expand Down
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ This package provides
- a Python API to read and write relational, tabular data according to the [CSV on the Web](https://csvw.org/) specification and
- commandline tools for reading and validating CSVW data.

> [!IMPORTANT]
> The Python API provided by `csvw` 4.x is not fully backwards compatible with `csvw` < 4.
> See [CHANGES](CHANGES) for more information.


## Links

Expand All @@ -19,7 +23,7 @@ This package provides

## Installation

This package runs under Python >=3.8, use pip to install:
This package runs under Python >=3.9, use pip to install:

```bash
$ pip install csvw
Expand Down
13 changes: 13 additions & 0 deletions RELEASING.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,24 @@ Releasing csvw
tox -r
```

- Run the integration test from the README:
```python
import json
from csvw import CSVW
data = CSVW('https://raw.githubusercontent.com/cldf/csvw/master/tests/fixtures/test.tsv')
print(json.dumps(data.to_json(minimal=True), indent=4))
```

- Make sure flake8 passes:
```shell
flake8 src
```

- Make sure pylint passes with a score of 10:
```shell
pylint src/csvw
```

- Make sure docs can be created:
```shell
cd docs
Expand Down
9 changes: 3 additions & 6 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ classifiers =
Natural Language :: English
Operating System :: OS Independent
Programming Language :: Python :: 3
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11
Programming Language :: Python :: 3.12
Programming Language :: Python :: 3.13
Programming Language :: Python :: 3.14
Programming Language :: Python :: Implementation :: CPython
Programming Language :: Python :: Implementation :: PyPy
License :: OSI Approved :: Apache Software License
Expand All @@ -38,16 +38,14 @@ zip_safe = False
packages = find:
package_dir =
= src
python_requires = >=3.8
python_requires = >=3.9
install_requires =
attrs>=18.1
isodate
python-dateutil
# Pin until fix for 2.0.0 is released (https://pypi.org/project/rfc3986/#history):
rfc3986<2
uritemplate>=3.0.0
babel
requests
language-tags
rdflib
termcolor
Expand Down Expand Up @@ -76,7 +74,6 @@ test =
frictionless
pytest>=5
pytest-mock
requests-mock
pytest-cov
docs =
sphinx<7
Expand Down Expand Up @@ -112,7 +109,7 @@ show_missing = true
skip_covered = true

[tox:tox]
envlist = py38, py39, py310, py311, py312, py313
envlist = py39, py310, py311, py312, py313, py314
isolated_build = true
skip_missing_interpreter = true

Expand Down
4 changes: 3 additions & 1 deletion src/csvw/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# csvw - https://w3c.github.io/csvw/primer/
"""
csvw - https://w3c.github.io/csvw/primer/
"""

from .metadata import (
TableGroup, Table, Column, ForeignKey, Link, NaturalLanguage, Datatype, URITemplate, CSVW,
Expand Down
92 changes: 52 additions & 40 deletions src/csvw/__main__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
"""
CLI for the csvw package.
"""
import sys
import json
import shutil
Expand All @@ -9,10 +12,11 @@

from csvw import CSVW, TableGroup
from csvw.db import Database
from csvw.utils import metadata2markdown
from csvw.metadata_utils import metadata2markdown


def parsed_args(desc, args, *argspecs):
"""Add custom arguments to the parser and parse."""
if args is None: # pragma: no cover
parser = argparse.ArgumentParser(description=desc)
for kw, kwargs in argspecs:
Expand All @@ -21,23 +25,25 @@ def parsed_args(desc, args, *argspecs):
return args


def exit(ret, test=False):
def exit(ret, test=False): # pylint: disable=redefined-builtin
"""We don't want to exit the test suite"""
if test:
return ret
sys.exit(ret) # pragma: no cover


def csvwdescribe(args=None, test=False):
"""Describe a (set of) CSV file(s) with basic CSVW metadata."""
frictionless = shutil.which('frictionless')
if not frictionless: # pragma: no cover
raise ValueError('The frictionless command must be installed for this functionality!\n'
'Run `pip install frictionless` and try again.')

args = parsed_args(
"Describe a (set of) CSV file(s) with basic CSVW metadata.",
csvwdescribe.__doc__,
args,
(['--delimiter'], dict(default=None)),
(['csv'], dict(nargs='+', help="CSV files to describe as CSVW TableGroup")),
(['--delimiter'], {'default': None}),
(['csv'], {'nargs': '+', 'help': "CSV files to describe as CSVW TableGroup"}),
)
fargs = ['describe', '--json']
if args.delimiter:
Expand All @@ -53,19 +59,20 @@ def csvwdescribe(args=None, test=False):

dp = json.loads(subprocess.check_output([frictionless] + fargs + args.csv))
if onefile:
dp = dict(resources=[dp], profile='data-package')
dp = {'resources': [dp], 'profile': 'data-package'}

tg = TableGroup.from_frictionless_datapackage(dp)
print(json.dumps(tg.asdict(), indent=4))
return exit(0, test=test)
return exit(0, test=test) # pylint: disable=R1722


def csvwvalidate(args=None, test=False):
"""Validate a (set of) CSV file(s) described by CSVW metadata."""
args = parsed_args(
"Validate a (set of) CSV file(s) described by CSVW metadata.",
csvwvalidate.__doc__,
args,
(['url'], dict(help='URL or local path to CSV or JSON metadata file.')),
(['-v', '--verbose'], dict(action='store_true', default=False)),
(['url'], {'help': 'URL or local path to CSV or JSON metadata file.'}),
(['-v', '--verbose'], {'action': 'store_true', 'default': False}),
)
ret = 0
try:
Expand All @@ -83,80 +90,85 @@ def csvwvalidate(args=None, test=False):
print(colored('FAIL', 'red', attrs=['bold']))
if args.verbose:
print(colored(str(e), 'blue'))
return exit(ret, test=test)
return exit(ret, test=test) # pylint: disable=R1722


def csvw2datasette(args=None, test=False):
"""Convert CSVW to data for datasette (https://datasette.io/)."""
args = parsed_args(
"Convert CSVW to data for datasette (https://datasette.io/).",
csvw2datasette.__doc__,
args,
(['url'], dict(help='URL or local path to CSV or JSON metadata file.')),
(['-o', '--outdir'], dict(type=pathlib.Path, default=pathlib.Path('.'))),
(['url'], {'help': 'URL or local path to CSV or JSON metadata file.'}),
(['-o', '--outdir'], {'type': pathlib.Path, 'default': pathlib.Path('.')}),
)
dbname, mdname = 'datasette.db', 'datasette-metadata.json'
csvw = CSVW(args.url)
db = Database(csvw.tablegroup, fname=args.outdir / dbname)
db.write_from_tg()
md = {}
for k in ['title', 'description', 'license']:
if 'dc:{}'.format(k) in csvw.common_props:
md[k] = csvw.common_props['dc:{}'.format(k)]
# FIXME: flesh out, see https://docs.datasette.io/en/stable/metadata.html
if f'dc:{k}' in csvw.common_props:
md[k] = csvw.common_props[f'dc:{k}']
args.outdir.joinpath(mdname).write_text(json.dumps(md, indent=4))
print("""Run
datasette {} --metadata {}
and open your browser at
http://localhost:8001/
to browse the data.
""".format(args.outdir / dbname, args.outdir / mdname))
return exit(0, test=test)
for line in [
"Run",
f" datasette {args.outdir / dbname} --metadata {args.outdir / mdname}",
"and open your browser at",
" http://localhost:8001/",
"to browse the data.",
]:
print(line)
return exit(0, test=test) # pylint: disable=R1722


def csvw2json(args=None, test=False):
"""Convert CSVW to JSON, see https://w3c.github.io/csvw/csv2json/"""
args = parsed_args(
"Convert CSVW to JSON, see https://w3c.github.io/csvw/csv2json/",
csvw2json.__doc__,
args,
(['url'], dict(help='URL or local path to CSV or JSON metadata file.')),
(['url'], {'help': 'URL or local path to CSV or JSON metadata file.'}),
)
csvw = CSVW(args.url)
print(json.dumps(csvw.to_json(), indent=4))
return exit(0, test=test)
return exit(0, test=test) # pylint: disable=R1722


def csvw2sqlite(args=None, test=False): # pragma: no cover
def csvw2sqlite(args=None, test=False):
"""Convert CSVW to SQLite"""
args = parsed_args(
"Convert CSVW to JSON, see https://w3c.github.io/csvw/csv2json/",
csvw2sqlite.__doc__,
args,
(
['url'],
dict(help='URL or local path to CSVW metadata file describing a TableGroup.\n\n'
'Note that not all valid CSVW datasets can be converted to SQLite. One '
'limitation is that all tables which are referenced by foreign keys must '
'have a primary key.')),
{'help': 'URL or local path to CSVW metadata file describing a TableGroup.\n\n'
'Note that not all valid CSVW datasets can be converted to SQLite. One '
'limitation is that all tables which are referenced by foreign keys must '
'have a primary key.'}),
(
['output'],
dict(help='Path for the generated SQLite database file.')),
{'help': 'Path for the generated SQLite database file.'}),
)
tg = TableGroup.from_file(args.url)
db = Database(tg, args.output)
db.write_from_tg(_force=True)
return exit(0, test=test)
return exit(0, test=test) # pylint: disable=R1722


def csvw2markdown(args=None, test=False):
"""Create a Markdown document containing the CSVW metadata in human readable form."""
args = parsed_args(
"Convert CSVW to JSON, see https://w3c.github.io/csvw/csv2json/",
args,
(
['url'],
dict(help='URL or local path to CSVW metadata file describing a TableGroup.\n\n'
'Note that not all valid CSVW datasets can be converted to SQLite. One '
'limitation is that all tables which are referenced by foreign keys must '
'have a primary key.')),
{'help': 'URL or local path to CSVW metadata file describing a TableGroup.\n\n'
'Note that not all valid CSVW datasets can be converted to SQLite. One '
'limitation is that all tables which are referenced by foreign keys must '
'have a primary key.'}),
)
tg = TableGroup.from_file(args.url)
print(metadata2markdown(tg, link_files=True))
return exit(0, test=test)
return exit(0, test=test) # pylint: disable=R1722


if __name__ == '__main__': # pragma: no cover
Expand Down
24 changes: 24 additions & 0 deletions src/csvw/_compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""
Functionality to address python compatibility issues.
"""
import re
import sys
import datetime


if (sys.version_info.major, sys.version_info.minor) >= (3, 11): # pragma: no cover
fromisoformat = datetime.datetime.fromisoformat
else:
def fromisoformat(s: str) -> datetime.datetime: # pragma: no cover
"""Somewhat hacky backport of the more full-fledged date parsing support in py3.11."""
s = s.replace('Z', '+00:00')
ms_p = re.compile(r'(?P<ms>\.[0-9]+)')
m = ms_p.search(s)
ms = None
if m:
s = ms_p.sub('', s)
ms = float(f'0{ms}')
res = datetime.datetime.fromisoformat(s)
if ms:
res = res.replace(microsecond=(ms * 1000000) % 1000000)
return res
Loading