diff --git a/packages/linkml/src/linkml/generators/jsonldcontextgen.py b/packages/linkml/src/linkml/generators/jsonldcontextgen.py index 60eaa9ffd..6b4c70e5b 100644 --- a/packages/linkml/src/linkml/generators/jsonldcontextgen.py +++ b/packages/linkml/src/linkml/generators/jsonldcontextgen.py @@ -56,8 +56,18 @@ class ContextGenerator(Generator): fix_multivalue_containers: bool | None = False exclude_imports: bool = False """If True, elements from imported schemas won't be included in the generated context""" + exclude_external_imports: bool = False + """If True, elements from URL-based external vocabulary imports are excluded. + + Local file imports and linkml standard imports are kept. This is useful + when extending an external ontology (e.g. W3C Verifiable Credentials) + whose terms are ``@protected`` in their own JSON-LD context — redefining + them locally would violate JSON-LD 1.1 §4.1.11. + """ _local_classes: set | None = field(default=None, repr=False) _local_slots: set | None = field(default=None, repr=False) + _external_classes: set | None = field(default=None, repr=False) + _external_slots: set | None = field(default=None, repr=False) # Framing (opt-in via CLI flag) emit_frame: bool = False @@ -69,7 +79,7 @@ def __post_init__(self) -> None: super().__post_init__() if self.namespaces is None: raise TypeError("Schema text must be supplied to context generator. Preparsed schema will not work") - if self.exclude_imports: + if self.exclude_imports or self.exclude_external_imports: if self.schemaview: sv = self.schemaview else: @@ -77,8 +87,31 @@ def __post_init__(self) -> None: if isinstance(source, str) and self.base_dir and not Path(source).is_absolute(): source = str(Path(self.base_dir) / source) sv = SchemaView(source, importmap=self.importmap, base_dir=self.base_dir) - self._local_classes = set(sv.all_classes(imports=False).keys()) - self._local_slots = set(sv.all_slots(imports=False).keys()) + if self.exclude_imports: + self._local_classes = set(sv.all_classes(imports=False).keys()) + self._local_slots = set(sv.all_slots(imports=False).keys()) + if self.exclude_external_imports: + self._external_classes, self._external_slots = self._collect_external_elements(sv) + + @staticmethod + def _collect_external_elements(sv: SchemaView) -> tuple[set[str], set[str]]: + """Identify classes and slots from URL-based external vocabulary imports. + + Walks the SchemaView ``schema_map`` (populated by ``imports_closure``) + and collects element names from schemas whose import key starts with + ``http://`` or ``https://``. Local file imports and ``linkml:`` + standard imports are left untouched. + """ + sv.imports_closure() + external_classes: set[str] = set() + external_slots: set[str] = set() + for schema_key, schema_def in sv.schema_map.items(): + if schema_key == sv.schema.name: + continue + if schema_key.startswith("http://") or schema_key.startswith("https://"): + external_classes.update(schema_def.classes.keys()) + external_slots.update(schema_def.slots.keys()) + return external_classes, external_slots def visit_schema(self, base: str | Namespace | None = None, output: str | None = None, **_): # Add any explicitly declared prefixes @@ -194,6 +227,8 @@ def end_schema( def visit_class(self, cls: ClassDefinition) -> bool: if self.exclude_imports and cls.name not in self._local_classes: return False + if self.exclude_external_imports and cls.name in self._external_classes: + return False class_def = {} cn = camelcase(cls.name) @@ -246,6 +281,8 @@ def _literal_coercion_for_ranges(self, ranges: list[str]) -> tuple[bool, str | N def visit_slot(self, aliased_slot_name: str, slot: SlotDefinition) -> None: if self.exclude_imports and slot.name not in self._local_slots: return + if self.exclude_external_imports and slot.name in self._external_slots: + return if slot.identifier: slot_def = "@id" @@ -390,6 +427,13 @@ def serialize( help="Use --exclude-imports to exclude imported elements from the generated JSON-LD context. This is useful when " "extending an ontology whose terms already have context definitions in their own JSON-LD context file.", ) +@click.option( + "--exclude-external-imports/--no-exclude-external-imports", + default=False, + show_default=True, + help="Exclude elements from URL-based external vocabulary imports while keeping local file imports. " + "Useful when extending ontologies (e.g. W3C VC v2) whose terms are @protected in their own JSON-LD context.", +) @click.version_option(__version__, "-V", "--version") def cli(yamlfile, emit_frame, embed_context_in_frame, output, **args): """Generate jsonld @context definition from LinkML model""" diff --git a/tests/linkml/test_generators/test_jsonldcontextgen.py b/tests/linkml/test_generators/test_jsonldcontextgen.py index 6de23347a..9e17b2462 100644 --- a/tests/linkml/test_generators/test_jsonldcontextgen.py +++ b/tests/linkml/test_generators/test_jsonldcontextgen.py @@ -1,4 +1,5 @@ import json +import textwrap import pytest from click.testing import CliRunner @@ -571,3 +572,97 @@ def test_exclude_imports(input_path): # Imported class and slot must NOT be present assert "BaseClass" not in ctx, "Imported class 'BaseClass' must not appear in exclude-imports context" assert "baseProperty" not in ctx, "Imported slot 'baseProperty' must not appear in exclude-imports context" + + +@pytest.mark.parametrize("mergeimports", [True, False], ids=["merge", "no-merge"]) +def test_exclude_external_imports(tmp_path, mergeimports): + """With --exclude-external-imports, elements from URL-based external + vocabulary imports must not appear in the generated JSON-LD context, + while local file imports and linkml standard imports are kept. + + When a schema imports terms from an external vocabulary (e.g. W3C VC + v2), those terms already have context definitions in their own JSON-LD + context file. Re-defining them in the local context can conflict with + @protected term definitions from the external context (JSON-LD 1.1 + section 4.1.11). + """ + ext_dir = tmp_path / "ext" + ext_dir.mkdir() + (ext_dir / "external_vocab.yaml").write_text( + textwrap.dedent("""\ + id: https://example.org/external-vocab + name: external_vocab + default_prefix: ext + prefixes: + linkml: https://w3id.org/linkml/ + ext: https://example.org/external-vocab/ + imports: + - linkml:types + slots: + issuer: + slot_uri: ext:issuer + range: string + validFrom: + slot_uri: ext:validFrom + range: date + classes: + ExternalCredential: + class_uri: ext:ExternalCredential + slots: + - issuer + - validFrom + """), + encoding="utf-8", + ) + + (tmp_path / "main.yaml").write_text( + textwrap.dedent("""\ + id: https://example.org/main + name: main + default_prefix: main + prefixes: + linkml: https://w3id.org/linkml/ + main: https://example.org/main/ + ext: https://example.org/external-vocab/ + imports: + - linkml:types + - https://example.org/external-vocab + slots: + localName: + slot_uri: main:localName + range: string + classes: + LocalThing: + class_uri: main:LocalThing + slots: + - localName + """), + encoding="utf-8", + ) + + importmap = {"https://example.org/external-vocab": str(ext_dir / "external_vocab")} + + context_text = ContextGenerator( + str(tmp_path / "main.yaml"), + exclude_external_imports=True, + mergeimports=mergeimports, + importmap=importmap, + base_dir=str(tmp_path), + ).serialize() + context = json.loads(context_text) + ctx = context["@context"] + + # Local terms must be present + assert "localName" in ctx or "local_name" in ctx, ( + f"Local slot missing with mergeimports={mergeimports}, got: {list(ctx.keys())}" + ) + assert "LocalThing" in ctx, f"Local class missing with mergeimports={mergeimports}, got: {list(ctx.keys())}" + + # External vocabulary terms must NOT be present + assert "issuer" not in ctx, f"External slot 'issuer' present with mergeimports={mergeimports}" + assert "validFrom" not in ctx and "valid_from" not in ctx, ( + f"External slot 'validFrom' present with mergeimports={mergeimports}" + ) + assert "ExternalCredential" not in ctx, ( + f"External class 'ExternalCredential' present with mergeimports={mergeimports}" + )