Coverage for apis_core/utils/rdf.py: 98%
49 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-02-19 16:54 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-02-19 16:54 +0000
1# SPDX-FileCopyrightText: 2023 Birger Schacht
2# SPDX-License-Identifier: MIT
4import importlib
5import logging
6import re
7from collections import defaultdict
8from typing import Tuple
10from AcdhArcheAssets.uri_norm_rules import get_normalized_uri
11from rdflib import Graph
13from apis_core.utils.settings import dict_from_toml_directory
15logger = logging.getLogger(__name__)
18def definition_matches_model(definition: str, model: object) -> bool:
19 if definition.get("superclass", False) and model:
20 try:
21 module, cls = definition.get("superclass").rsplit(".", 1)
22 module = importlib.import_module(module)
23 parent = getattr(module, cls)
24 if issubclass(type(model), parent) or issubclass(model, parent):
25 return True
26 except Exception as e:
27 logger.error("superclass %s led to: %s", definition.get("superclass"), e)
28 return False
31def definition_matches_uri(definition: str, uri: str) -> bool:
32 if regex := definition.get("regex", False):
33 logger.info("found regex %s", regex)
34 pattern = re.compile(regex)
35 if pattern.fullmatch(uri) is None:
36 return False
37 return True
40def get_definition_and_attributes_from_uri(
41 uri: str, model: object
42) -> Tuple[dict, dict]:
43 """
44 This function looks for `.toml` files in the `rdfimport` app directories
45 and loads all the files it can parse. For every file that contains a
46 `superclass` key it checks if it is a superclass of `model`.
47 It uses the first file that
48 matches to extract attributes from the RDF endpoint and then returns both
49 the parsed file contents and the extracted attributes.
50 The reason we are also returning the parsed file contents is, that you then
51 can define a model *in* the file and then use this function to iterate over
52 a list of URIs and you can use the matched definition to choose which model
53 to create.
54 The dict containing the parsed file contents also contains the filename, to
55 make debugging a bit easier.
56 """
57 uri = get_normalized_uri(uri)
58 graph = Graph()
59 graph.parse(uri)
61 configs = dict_from_toml_directory("rdfimport")
62 matching_definition = None
63 for key, definition in configs.items():
64 if definition_matches_model(definition, model) and definition_matches_uri(
65 definition, uri
66 ):
67 matching_definition = definition
68 matching_definition["filename"] = str(key)
69 break
70 model_attributes = defaultdict(list)
71 if matching_definition:
72 attributes = matching_definition.get("attributes", [])
73 sparql_attributes = list(filter(lambda d: d.get("sparql"), attributes))
74 for attribute in sparql_attributes:
75 result = graph.query(attribute["sparql"])
76 for binding in result.bindings:
77 # {rdflib.term.Variable('somekey'): rdflib.term.Literal('some value')}
78 for key, value in binding.items():
79 model_attributes[str(key)].append(str(value))
80 else:
81 raise AttributeError(f"No matching definition found for {uri}")
82 return matching_definition, model_attributes