Coverage for apis_core/utils/rdf.py: 98%

48 statements  

« prev     ^ index     » next       coverage.py v7.6.8, created at 2024-12-20 09:24 +0000

1# SPDX-FileCopyrightText: 2023 Birger Schacht 

2# SPDX-License-Identifier: MIT 

3 

4import importlib 

5import logging 

6import re 

7from typing import Tuple 

8 

9from rdflib import Graph 

10 

11from apis_core.utils.normalize import clean_uri 

12from apis_core.utils.settings import dict_from_toml_directory 

13 

14logger = logging.getLogger(__name__) 

15 

16 

17def definition_matches_model(definition: str, model: object) -> bool: 

18 if definition.get("superclass", False) and model: 

19 try: 

20 module, cls = definition.get("superclass").rsplit(".", 1) 

21 module = importlib.import_module(module) 

22 parent = getattr(module, cls) 

23 if issubclass(type(model), parent) or issubclass(model, parent): 

24 return True 

25 except Exception as e: 

26 logger.error("superclass %s led to: %s", definition.get("superclass"), e) 

27 return False 

28 

29 

30def definition_matches_uri(definition: str, uri: str) -> bool: 

31 if regex := definition.get("regex", False): 

32 logger.info("found regex %s", regex) 

33 pattern = re.compile(regex) 

34 if pattern.fullmatch(uri) is None: 

35 return False 

36 return True 

37 

38 

39def get_definition_and_attributes_from_uri( 

40 uri: str, model: object 

41) -> Tuple[dict, dict]: 

42 """ 

43 This function looks for `.toml` files in the `rdfimport` app directories 

44 and loads all the files it can parse. For every file that contains a 

45 `superclass` key it checks if it is a superclass of `model`. 

46 It uses the first file that 

47 matches to extract attributes from the RDF endpoint and then returns both 

48 the parsed file contents and the extracted attributes. 

49 The reason we are also returning the parsed file contents is, that you then 

50 can define a model *in* the file and then use this function to iterate over 

51 a list of URIs and you can use the matched definition to choose which model 

52 to create. 

53 The dict containing the parsed file contents also contains the filename, to 

54 make debugging a bit easier. 

55 """ 

56 uri = clean_uri(uri) 

57 graph = Graph() 

58 graph.parse(uri) 

59 

60 configs = dict_from_toml_directory("rdfimport") 

61 matching_definition = None 

62 for key, definition in configs.items(): 

63 if definition_matches_model(definition, model) and definition_matches_uri( 

64 definition, uri 

65 ): 

66 matching_definition = definition 

67 matching_definition["filename"] = str(key) 

68 break 

69 model_attributes = dict() 

70 if matching_definition: 

71 attributes = matching_definition.get("attributes", []) 

72 sparql_attributes = list(filter(lambda d: d.get("sparql"), attributes)) 

73 for attribute in sparql_attributes: 

74 result = graph.query(attribute["sparql"]) 

75 for binding in result.bindings: 

76 # {rdflib.term.Variable('somekey'): rdflib.term.Literal('some value')} 

77 for key, value in binding.items(): 

78 model_attributes[str(key)] = str(value) 

79 else: 

80 raise AttributeError(f"No matching definition found for {uri}") 

81 return matching_definition, model_attributes