Coverage for apis_core/generic/importers.py: 0%

102 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-09-03 06:15 +0000

1import json 

2import logging 

3import urllib 

4from functools import cache 

5 

6from AcdhArcheAssets.uri_norm_rules import get_normalized_uri 

7from django.contrib.contenttypes.models import ContentType 

8from django.core.exceptions import ImproperlyConfigured 

9from django.db.utils import IntegrityError 

10 

11from apis_core.uris.models import Uri 

12from apis_core.uris.utils import create_object_from_uri 

13from apis_core.utils.rdf import get_something_from_uri 

14 

15logger = logging.getLogger(__name__) 

16 

17 

18class GenericModelImporter: 

19 """ 

20 A generic importer class which provides methods for 

21 importing data from a URI and creating a model instance from it. 

22 

23 By default, it fetches a resource and first tries to parse it using 

24 our RDF parser. If that fails, it tries to parse it using JSON and 

25 then extracts the fields whose keys match the model field names. 

26 Projects can inherit from this class and override the default 

27 methods or simply write their own from scratch. 

28 """ 

29 

30 model = None 

31 import_uri = None 

32 

33 def __init__(self, uri, model): 

34 self.model = model 

35 self.import_uri = self.clean_uri(uri) 

36 

37 @property 

38 def get_uri(self): 

39 return self.import_uri 

40 

41 def clean_uri(self, uri): 

42 return get_normalized_uri(uri) 

43 

44 @cache 

45 def request(self, uri): 

46 data = None 

47 # We first try to use the RDF parser 

48 if not data: 

49 try: 

50 data = get_something_from_uri( 

51 uri, 

52 [self.model], 

53 ) 

54 except Exception as e: 

55 logger.debug(e) 

56 # If there is no data yet, try parsing JSON 

57 if not data: 

58 try: 

59 data = json.loads(urllib.request.urlopen(uri).read()) 

60 except Exception as e: 

61 logger.debug(e) 

62 # Return the fetched data or an empty dict if there is none 

63 return data or {} 

64 

65 def mangle_data(self, data): 

66 return data 

67 

68 def get_data(self, drop_unknown_fields=True): 

69 """ 

70 Fetch the data using the `request` method and 

71 mangle it using the `mangle_data` method. 

72 

73 If the `drop_unknown_fields` argument is True, 

74 remove all fields from the data dict that do not 

75 have an equivalent field in the model. 

76 """ 

77 data = self.request(self.import_uri) 

78 data = self.mangle_data(data) 

79 if drop_unknown_fields: 

80 # we are dropping all fields that are not part of the model 

81 modelfields = [field.name for field in self.model._meta.fields] 

82 data = {key: data[key] for key in data if key in modelfields} 

83 if not data: 

84 raise ImproperlyConfigured( 

85 f"Could not import {self.import_uri}. Data fetched was: {data}" 

86 ) 

87 return data 

88 

89 def import_into_instance(self, instance, fields="__all__"): 

90 data = self.get_data() 

91 if fields == "__all__": 

92 fields = data.keys() 

93 for field in fields: 

94 if hasattr(instance, field) and field in data.keys(): 

95 setattr(instance, field, data[field][0]) 

96 instance.save() 

97 

98 def create_instance(self): 

99 logger.debug("Create instance from URI %s", self.import_uri) 

100 data = self.get_data(drop_unknown_fields=False) 

101 instance = None 

102 same_as = data.get("same_as", []) 

103 same_as = [get_normalized_uri(uri) for uri in same_as] 

104 if sa := Uri.objects.filter(uri__in=same_as): 

105 root_set = set([s.content_object for s in sa]) 

106 if len(root_set) > 1: 

107 raise IntegrityError( 

108 f"Multiple objects found for sameAs URIs {data['same_as']}. " 

109 f"This indicates a data integrity problem as these URIs should be unique." 

110 ) 

111 instance = sa.first().content_object 

112 logger.debug("Found existing instance %s", instance) 

113 if not instance: 

114 attributes = {} 

115 for field in self.model._meta.fields: 

116 if data.get(field.name, False): 

117 attributes[field.name] = data[field.name][0] 

118 instance = self.model.objects.create(**attributes) 

119 logger.debug("Created instance %s from attributes %s", instance, attributes) 

120 content_type = ContentType.objects.get_for_model(instance) 

121 for uri in same_as: 

122 Uri.objects.get_or_create( 

123 uri=uri, content_type=content_type, object_id=instance.id 

124 ) 

125 for relation, details in data.get("relations", {}).items(): 

126 rel_app_label, rel_model = relation.split(".") 

127 relation_model = ContentType.objects.get_by_natural_key( 

128 app_label=rel_app_label, model=rel_model 

129 ).model_class() 

130 

131 reld = details.get("obj", None) or details.get("subj", None) 

132 reld_app_label, reld_model = reld.split(".") 

133 related_content_type = ContentType.objects.get_by_natural_key( 

134 app_label=reld_app_label, model=reld_model 

135 ) 

136 related_model = related_content_type.model_class() 

137 

138 for related_uri in details["curies"]: 

139 try: 

140 related_instance = create_object_from_uri( 

141 uri=related_uri, model=related_model 

142 ) 

143 if details.get("obj"): 

144 subj_object_id = instance.pk 

145 subj_content_type = content_type 

146 obj_object_id = related_instance.pk 

147 obj_content_type = related_content_type 

148 else: 

149 obj_object_id = instance.pk 

150 obj_content_type = content_type 

151 subj_object_id = related_instance.pk 

152 subj_content_type = related_content_type 

153 rel, _ = relation_model.objects.get_or_create( 

154 subj_object_id=subj_object_id, 

155 subj_content_type=subj_content_type, 

156 obj_object_id=obj_object_id, 

157 obj_content_type=obj_content_type, 

158 ) 

159 logger.debug( 

160 "Created relation %s between %s and %s", 

161 relation_model.name(), 

162 rel.subj, 

163 rel.obj, 

164 ) 

165 except Exception as e: 

166 logger.error( 

167 "Could not create relation to %s due to %s", related_uri, e 

168 ) 

169 return instance