Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 39 additions & 5 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
from jinja2 import Environment, select_autoescape, FileSystemLoader

from pipeline.translator import PythonBuilder
from pipeline.utils import clone_sources, SchemaLoader, InstanceLoader
from pipeline.utils import (
clone_sources, SchemaLoader, InstanceLoader,
build_instance_id_to_ref, generate_directory_instance_files,
)


include_instances = True # to speed up the build during development, set this to False
Expand Down Expand Up @@ -52,6 +55,9 @@
print(f"Loaded instances ({perf_counter() - start_time} s)")

python_modules = defaultdict(list)
directories_with_patches = set() # (version_module, dir_path) pairs needing _instance_patches.py

env = Environment(loader=FileSystemLoader(os.path.dirname(os.path.realpath(__file__))), autoescape=select_autoescape())

for schema_version in schema_loader.get_schema_versions():

Expand All @@ -62,11 +68,12 @@
embedded = set()
linked = set()
class_to_module_map = {}
class_full_modules = {}
for schema_file_path in schemas_file_paths:
emb, lnk = PythonBuilder(schema_file_path, schema_loader.schemas_sources).get_edges()
class_to_module_map = PythonBuilder(
schema_file_path, schema_loader.schemas_sources
).update_class_to_module_map(class_to_module_map)
).update_class_to_module_map(class_to_module_map, class_full_modules)
embedded.update(emb)
linked.update(lnk)
conflicts = linked.intersection(embedded)
Expand All @@ -79,18 +86,43 @@
linked.remove(schema_identifier)

# Step 4b - translate and build each openMINDS schema as a Python class
instance_data_by_dir = defaultdict(list) # dir_path → [(class_name, full_module_path, instances_raw)]

for schema_file_path in schemas_file_paths:
module_path, class_name = PythonBuilder(
builder = PythonBuilder(
schema_file_path,
schema_loader.schemas_sources,
instances=instances.get(schema_version, None),
additional_methods=additional_methods,
).build(embedded=embedded, class_to_module_map=class_to_module_map)
)
module_path, class_name = builder.build(
embedded=embedded, class_to_module_map=class_to_module_map, class_full_modules=class_full_modules
)

if builder.context["instances_raw"]:
dir_parts = builder.relative_path_without_extension[:-1]
dir_path = "/".join(dir_parts)
instance_data_by_dir[dir_path].append(
(class_name, class_full_modules[class_name], builder.context["instances_raw"])
)

parts = module_path.split(".")
parent_path = ".".join(parts[:-1])
python_modules[parent_path].append((parts[-1], class_name))

# Step 4c - generate _instances.py for each leaf directory that has instances
version_module = schema_version.split(".")[0]
all_instances_for_version = instances.get(schema_version, {})
id_to_ref = build_instance_id_to_ref(all_instances_for_version)

for dir_path, dir_class_data in instance_data_by_dir.items():
has_patches = generate_directory_instance_files(
version_module, dir_path, dir_class_data,
all_instances_for_version, id_to_ref, class_full_modules, env
)
if has_patches:
directories_with_patches.add((version_module, dir_path))

print(f"Processed schemas ({perf_counter() - start_time} s)")


Expand All @@ -104,6 +136,9 @@
with open(init_file_path, "w") as fp:
for class_module, class_name in sorted(classes):
fp.write(f"from .{class_module} import {class_name}\n")
rel_dir_path = "/".join(dir_path[3:])
if (dir_path[2], rel_dir_path) in directories_with_patches:
fp.write("from . import _instance_patches as _ # noqa: F401\n")
while len(dir_path) > 3:
child_dir = dir_path[-1]
dir_path = dir_path[:-1]
Expand All @@ -118,7 +153,6 @@
with open(init_file_path, "w") as fp:
fp.write(f"from . import ({', '.join(sorted(module_list))})\n")

env = Environment(loader=FileSystemLoader(os.path.dirname(os.path.realpath(__file__))), autoescape=select_autoescape())
context = {
"version": "0.5.1",
}
Expand Down
9 changes: 9 additions & 0 deletions pipeline/src/instance_patches_template.py.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# this file was auto-generated!

{% for imp in imports %}
{{imp}}
{% endfor %}

{% for class_name, inst_name, prop_name, value in patches %}
{{class_name}}.{{inst_name}}.{{prop_name}} = {{value}}
{% endfor %}
24 changes: 24 additions & 0 deletions pipeline/src/instances_template.py.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# this file was auto-generated!

{% for imp in imports %}
{{imp}}
{% endfor %}

{% for class_name, inst_name, props in phase1_instances %}
{{class_name}}.{{inst_name}} = {{class_name}}(
{% for key, value in props.items() -%}
{% if value is string -%}
{% if value.startswith('http') and key != 'id' -%}
{{key}}=IRI("{{value}}"),
{%- else -%}
{{key}}="{{value}}",
{%- endif %}
{%- else -%}
{{key}}={{value}},
{%- endif %}
{% endfor -%}
)
{% endfor %}
{% for class_name, inst_name, prop_name, value in phase2_patches %}
{{class_name}}.{{inst_name}}.{{prop_name}} = {{value}}
{% endfor %}
16 changes: 2 additions & 14 deletions pipeline/src/module_template.py.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,5 @@ class {{ class_name }}({{ base_class }}):

{{ additional_methods }}

{% for instance_name, instance in instances.items() %}
{{ class_name }}.{{ instance_name }} = {{ class_name }}(
{% for key, value in instance.items() -%}
{% if value is string -%}
{% if value.startswith('http') and key != 'id' -%}
{{key}}=IRI("{{value}}"),
{%- else -%}
{{key}}="{{value}}",
{%- endif %}
{%- else -%}
{{key}}={{value}},
{%- endif %}
{% endfor -%}
){% endfor %}

{{instances_trigger}}
26 changes: 26 additions & 0 deletions pipeline/tests/test_regressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,3 +583,29 @@ def test_issue0084(om):
"name": "test",
"order": 0,
}


@pytest.mark.parametrize("om", [openminds.v5, openminds.latest])
def test_issue0094(om, tmp_path):
# https://github.com/openMetadataInitiative/openMINDS_Python/issues/94
# Accessibility library instances store payment_models as dicts instead of
# PaymentModelType objects, causing KeyError on Collection.load()

PaymentModelType = om.controlled_terms.PaymentModelType

acc = om.core.Accessibility.direct_virtual_open_access

# Properties should be typed objects, not dicts
assert not isinstance(acc.payment_models[0], dict)
assert isinstance(acc.payment_models[0], PaymentModelType)

# Save and reload should not raise KeyError
c = Collection()
c.add(acc)
c.save(str(tmp_path), individual_files=True, group_by_schema=True)

c2 = Collection()
c2.load(str(tmp_path), version=om.__name__.split(".")[1])

acc2 = next(item for item in c2 if isinstance(item, om.core.Accessibility))
assert acc2.id == acc.id
59 changes: 36 additions & 23 deletions pipeline/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def _version_module(self):
def _target_file_without_extension(self) -> str:
return os.path.join(self._version_module, "/".join(self.relative_path_without_extension))

def translate(self, embedded=None, class_to_module_map=None):
def translate(self, embedded=None, class_to_module_map=None, class_full_modules=None):
def get_type(property):
type_map = {
"string": "str",
Expand Down Expand Up @@ -164,23 +164,31 @@ def get_type(property):
else:
base_class = "LinkedMetadata"

def filter_value(value):
has_instances = bool(self.instances.get(openminds_type))
module_name = self.relative_path_without_extension[-1]

def filter_value_strings(value):
"""Normalize strings only — PythonRef conversion happens in *_instances.py generation."""

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would suggest removing the PythonRef reference.

if isinstance(value, str):
return value.replace('"', "'").replace("\n", " ")
if isinstance(value, list):
return [filter_value_strings(item) for item in value]
return value

def filter_instance(instance):
filtered_instance = {
k: filter_value(v) for k, v in instance.items() if k[0] != "@" and k[:4] != "http" and v is not None
filtered = {
k: filter_value_strings(v)
for k, v in instance.items()
if k[0] != "@" and k[:4] != "http" and v is not None
}
filtered_instance["id"] = instance["@id"]
return filtered_instance
filtered["id"] = instance["@id"]
return filtered

instances = {
generate_python_name(instance["@id"].split("/")[-1]): filter_instance(instance)
for instance in self.instances.get(openminds_type, [])
instances_raw = {
generate_python_name(inst["@id"].split("/")[-1]): filter_instance(inst)
for inst in self.instances.get(openminds_type, [])
}
instances = {name: instances[name] for name in sorted(instances)} # sort by key
instances_raw = {name: instances_raw[name] for name in sorted(instances_raw)}

properties = []
for iri, property in self._schema_payload["properties"].items():
Expand Down Expand Up @@ -211,9 +219,10 @@ def filter_instance(instance):
}
)
# unused in property: "nameForReverseLink"
for instance in instances.values():
for instance in instances_raw.values():
if property["name"] in instance:
instance[pythonic_name] = instance.pop(property["name"])

self.context = {
"docstring": self._schema_payload.get("description", "<description not available>"),
"base_class": base_class,
Expand All @@ -223,13 +232,13 @@ def filter_instance(instance):
"schema_version": self.version,
"context_vocab": self.context_vocab,
"properties": sorted(properties, key=lambda p: p["name"].lower()),
"additional_methods": "",
"instances": instances,
"additional_methods": self.additional_methods["by_name"] if has_instances else "",
"instances_trigger": (
f"from . import {module_name}_instances as _ # noqa: F401" if has_instances else ""
),
"instances_raw": instances_raw,
}

if len(instances) > 0:
self.context["additional_methods"] = self.additional_methods["by_name"]

import_map = {
"date": "from datetime import date",
"datetime": "from datetime import datetime",
Expand All @@ -239,8 +248,6 @@ def filter_instance(instance):
"Real": "from numbers import Real",
}
extra_imports = set()
if len(instances) > 0:
extra_imports.add(import_map["IRI"])
for property in self.context["properties"]:
if isinstance(property["type"], list):
for t in property["type"]:
Expand All @@ -251,14 +258,14 @@ def filter_instance(instance):
imp = import_map.get(property["type"], None)
if imp:
extra_imports.add(imp)
if extra_imports:
self.context["preamble"] = "\n".join(sorted(extra_imports))
if extra_imports:
self.context["preamble"] = "\n".join(sorted(extra_imports))

def build(self, embedded=None, class_to_module_map=None):
def build(self, embedded=None, class_to_module_map=None, class_full_modules=None):
target_file_path = os.path.join("target", "openminds", f"{self._target_file_without_extension()}.py")
os.makedirs(os.path.dirname(target_file_path), exist_ok=True)

self.translate(embedded=embedded, class_to_module_map=class_to_module_map)
self.translate(embedded=embedded, class_to_module_map=class_to_module_map, class_full_modules=class_full_modules)

with open(target_file_path, "w") as target_file:
contents = self.env.get_template(self.template_name).render(self.context)
Expand All @@ -274,7 +281,7 @@ def get_edges(self):
linked.update(property.get("_linkedTypes", []))
return embedded, linked

def update_class_to_module_map(self, class_to_module_map):
def update_class_to_module_map(self, class_to_module_map, class_full_modules=None):
"""
Updates a dictionary with the class name and its corresponding module based on the schemas.

Expand All @@ -288,6 +295,9 @@ def update_class_to_module_map(self, class_to_module_map):
Args:
class_to_module_map (dict): A dictionary where keys are class names and values
are their corresponding modules.
class_full_modules (dict, optional): If provided, also populated with
class_name → full dotted submodule path
(e.g. "core.miscellaneous.accessibility").

Returns:
dict: The updated dictionary with the class name and module mapping.
Expand All @@ -301,4 +311,7 @@ def update_class_to_module_map(self, class_to_module_map):

class_to_module_map[class_name] = module

if class_full_modules is not None:
class_full_modules[class_name] = ".".join(self.relative_path_without_extension)

return class_to_module_map
Loading
Loading