Coverage for apis_core/utils/rdf.py: 98%
48 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-22 07:51 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-22 07:51 +0000
1# SPDX-FileCopyrightText: 2023 Birger Schacht
2# SPDX-License-Identifier: MIT
4import importlib
5import logging
6import re
7from typing import Tuple
9from rdflib import Graph
11from apis_core.utils.normalize import clean_uri
12from apis_core.utils.settings import dict_from_toml_directory
14logger = logging.getLogger(__name__)
17def definition_matches_model(definition: str, model: object) -> bool:
18 if definition.get("superclass", False) and model:
19 try:
20 module, cls = definition.get("superclass").rsplit(".", 1)
21 module = importlib.import_module(module)
22 parent = getattr(module, cls)
23 if issubclass(type(model), parent) or issubclass(model, parent):
24 return True
25 except Exception as e:
26 logger.error("superclass %s led to: %s", definition.get("superclass"), e)
27 return False
30def definition_matches_uri(definition: str, uri: str) -> bool:
31 if regex := definition.get("regex", False):
32 logger.info("found regex %s", regex)
33 pattern = re.compile(regex)
34 if pattern.fullmatch(uri) is None:
35 return False
36 return True
39def get_definition_and_attributes_from_uri(
40 uri: str, model: object
41) -> Tuple[dict, dict]:
42 """
43 This function looks for `.toml` files in the `rdfimport` app directories
44 and loads all the files it can parse. For every file that contains a
45 `superclass` key it checks if it is a superclass of `model`.
46 It uses the first file that
47 matches to extract attributes from the RDF endpoint and then returns both
48 the parsed file contents and the extracted attributes.
49 The reason we are also returning the parsed file contents is, that you then
50 can define a model *in* the file and then use this function to iterate over
51 a list of URIs and you can use the matched definition to choose which model
52 to create.
53 The dict containing the parsed file contents also contains the filename, to
54 make debugging a bit easier.
55 """
56 uri = clean_uri(uri)
57 graph = Graph()
58 graph.parse(uri)
60 configs = dict_from_toml_directory("rdfimport")
61 matching_definition = None
62 for key, definition in configs.items():
63 if definition_matches_model(definition, model) and definition_matches_uri(
64 definition, uri
65 ):
66 matching_definition = definition
67 matching_definition["filename"] = str(key)
68 break
69 model_attributes = dict()
70 if matching_definition:
71 attributes = matching_definition.get("attributes", [])
72 sparql_attributes = list(filter(lambda d: d.get("sparql"), attributes))
73 for attribute in sparql_attributes:
74 result = graph.query(attribute["sparql"])
75 for binding in result.bindings:
76 # {rdflib.term.Variable('somekey'): rdflib.term.Literal('some value')}
77 for key, value in binding.items():
78 model_attributes[str(key)] = str(value)
79 else:
80 raise AttributeError(f"No matching definition found for {uri}")
81 return matching_definition, model_attributes