Coverage for apis_core/generic/importers.py: 0%

81 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-10-10 13:36 +0000

1import json 

2import logging 

3import urllib 

4from functools import cache 

5 

6from AcdhArcheAssets.uri_norm_rules import get_normalized_uri 

7from django.contrib.contenttypes.models import ContentType 

8from django.core.exceptions import ImproperlyConfigured 

9from django.db.utils import IntegrityError 

10 

11from apis_core.uris.models import Uri 

12from apis_core.utils.rdf import get_something_from_uri 

13 

14logger = logging.getLogger(__name__) 

15 

16 

17class GenericModelImporter: 

18 """ 

19 A generic importer class which provides methods for 

20 importing data from a URI and creating a model instance from it. 

21 

22 By default, it fetches a resource and first tries to parse it using 

23 our RDF parser. If that fails, it tries to parse it using JSON and 

24 then extracts the fields whose keys match the model field names. 

25 Projects can inherit from this class and override the default 

26 methods or simply write their own from scratch. 

27 """ 

28 

29 model = None 

30 import_uri = None 

31 

32 def __init__(self, uri, model): 

33 self.model = model 

34 self.import_uri = self.clean_uri(uri) 

35 

36 @property 

37 def get_uri(self): 

38 return self.import_uri 

39 

40 def clean_uri(self, uri): 

41 return get_normalized_uri(uri) 

42 

43 @cache 

44 def request(self, uri): 

45 data = None 

46 # We first try to use the RDF parser 

47 if not data: 

48 try: 

49 data = get_something_from_uri( 

50 uri, 

51 [self.model], 

52 ) 

53 except Exception as e: 

54 logger.debug(e) 

55 # If there is no data yet, try parsing JSON 

56 if not data: 

57 try: 

58 data = json.loads(urllib.request.urlopen(uri).read()) 

59 except Exception as e: 

60 logger.debug(e) 

61 # Return the fetched data or an empty dict if there is none 

62 return data or {} 

63 

64 def mangle_data(self, data): 

65 return data 

66 

67 def get_data(self, drop_unknown_fields=True): 

68 """ 

69 Fetch the data using the `request` method and 

70 mangle it using the `mangle_data` method. 

71 

72 If the `drop_unknown_fields` argument is True, 

73 remove all fields from the data dict that do not 

74 have an equivalent field in the model. 

75 """ 

76 data = self.request(self.import_uri) 

77 data = self.mangle_data(data) 

78 if drop_unknown_fields: 

79 # we are dropping all fields that are not part of the model 

80 modelfields = [field.name for field in self.model._meta.fields] 

81 data = {key: data[key] for key in data if key in modelfields} 

82 if not data: 

83 raise ImproperlyConfigured( 

84 f"Could not import {self.import_uri}. Data fetched was: {data}" 

85 ) 

86 return data 

87 

88 def import_into_instance(self, instance, fields="__all__"): 

89 data = self.get_data() 

90 if fields == "__all__": 

91 fields = data.keys() 

92 for field in fields: 

93 if hasattr(instance, field) and field in data.keys(): 

94 setattr(instance, field, data[field][0]) 

95 instance.save() 

96 

97 def create_instance(self): 

98 logger.debug("Create instance from URI %s", self.import_uri) 

99 data = self.get_data(drop_unknown_fields=False) 

100 instance = None 

101 same_as = data.get("same_as", []) 

102 same_as = [get_normalized_uri(uri) for uri in same_as] 

103 if sa := Uri.objects.filter(uri__in=same_as): 

104 root_set = set([s.content_object for s in sa]) 

105 if len(root_set) > 1: 

106 raise IntegrityError( 

107 f"Multiple objects found for sameAs URIs {data['same_as']}. " 

108 f"This indicates a data integrity problem as these URIs should be unique." 

109 ) 

110 instance = sa.first().content_object 

111 logger.debug("Found existing instance %s", instance) 

112 if not instance: 

113 attributes = {} 

114 for field in self.model._meta.fields: 

115 if data.get(field.name, False): 

116 attributes[field.name] = data[field.name][0] 

117 instance = self.model.objects.create(**attributes) 

118 logger.debug("Created instance %s from attributes %s", instance, attributes) 

119 content_type = ContentType.objects.get_for_model(instance) 

120 for uri in same_as: 

121 Uri.objects.get_or_create( 

122 uri=uri, content_type=content_type, object_id=instance.id 

123 ) 

124 if "relations" in data: 

125 instance.create_relations_to_uris = data["relations"] 

126 instance.save() 

127 return instance