Coverage for apis_core/utils/rdf.py: 98%

49 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-02-19 16:54 +0000

1# SPDX-FileCopyrightText: 2023 Birger Schacht 

2# SPDX-License-Identifier: MIT 

3 

4import importlib 

5import logging 

6import re 

7from collections import defaultdict 

8from typing import Tuple 

9 

10from AcdhArcheAssets.uri_norm_rules import get_normalized_uri 

11from rdflib import Graph 

12 

13from apis_core.utils.settings import dict_from_toml_directory 

14 

15logger = logging.getLogger(__name__) 

16 

17 

18def definition_matches_model(definition: str, model: object) -> bool: 

19 if definition.get("superclass", False) and model: 

20 try: 

21 module, cls = definition.get("superclass").rsplit(".", 1) 

22 module = importlib.import_module(module) 

23 parent = getattr(module, cls) 

24 if issubclass(type(model), parent) or issubclass(model, parent): 

25 return True 

26 except Exception as e: 

27 logger.error("superclass %s led to: %s", definition.get("superclass"), e) 

28 return False 

29 

30 

31def definition_matches_uri(definition: str, uri: str) -> bool: 

32 if regex := definition.get("regex", False): 

33 logger.info("found regex %s", regex) 

34 pattern = re.compile(regex) 

35 if pattern.fullmatch(uri) is None: 

36 return False 

37 return True 

38 

39 

40def get_definition_and_attributes_from_uri( 

41 uri: str, model: object 

42) -> Tuple[dict, dict]: 

43 """ 

44 This function looks for `.toml` files in the `rdfimport` app directories 

45 and loads all the files it can parse. For every file that contains a 

46 `superclass` key it checks if it is a superclass of `model`. 

47 It uses the first file that 

48 matches to extract attributes from the RDF endpoint and then returns both 

49 the parsed file contents and the extracted attributes. 

50 The reason we are also returning the parsed file contents is, that you then 

51 can define a model *in* the file and then use this function to iterate over 

52 a list of URIs and you can use the matched definition to choose which model 

53 to create. 

54 The dict containing the parsed file contents also contains the filename, to 

55 make debugging a bit easier. 

56 """ 

57 uri = get_normalized_uri(uri) 

58 graph = Graph() 

59 graph.parse(uri) 

60 

61 configs = dict_from_toml_directory("rdfimport") 

62 matching_definition = None 

63 for key, definition in configs.items(): 

64 if definition_matches_model(definition, model) and definition_matches_uri( 

65 definition, uri 

66 ): 

67 matching_definition = definition 

68 matching_definition["filename"] = str(key) 

69 break 

70 model_attributes = defaultdict(list) 

71 if matching_definition: 

72 attributes = matching_definition.get("attributes", []) 

73 sparql_attributes = list(filter(lambda d: d.get("sparql"), attributes)) 

74 for attribute in sparql_attributes: 

75 result = graph.query(attribute["sparql"]) 

76 for binding in result.bindings: 

77 # {rdflib.term.Variable('somekey'): rdflib.term.Literal('some value')} 

78 for key, value in binding.items(): 

79 model_attributes[str(key)].append(str(value)) 

80 else: 

81 raise AttributeError(f"No matching definition found for {uri}") 

82 return matching_definition, model_attributes