Coverage for apis_core/generic/importers.py: 0%

100 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-06-25 10:00 +0000

1import json 

2import logging 

3import urllib 

4from functools import cache 

5 

6from AcdhArcheAssets.uri_norm_rules import get_normalized_uri 

7from django.contrib.contenttypes.models import ContentType 

8from django.core.exceptions import ImproperlyConfigured 

9from django.db.utils import IntegrityError 

10 

11from apis_core.apis_metainfo.models import Uri 

12from apis_core.apis_metainfo.utils import create_object_from_uri 

13from apis_core.utils.rdf import get_something_from_uri 

14 

15logger = logging.getLogger(__name__) 

16 

17 

18class GenericModelImporter: 

19 """ 

20 A generic importer class which provides methods for 

21 importing data from a URI and creating a model instance from it. 

22 

23 By default, it fetches a resource and first tries to parse it using 

24 our RDF parser. If that fails, it tries to parse it using JSON and 

25 then extracts the fields whose keys match the model field names. 

26 Projects can inherit from this class and override the default 

27 methods or simply write their own from scratch. 

28 """ 

29 

30 model = None 

31 import_uri = None 

32 

33 def __init__(self, uri, model): 

34 self.model = model 

35 self.import_uri = self.clean_uri(uri) 

36 

37 @property 

38 def get_uri(self): 

39 return self.import_uri 

40 

41 def clean_uri(self, uri): 

42 return get_normalized_uri(uri) 

43 

44 @cache 

45 def request(self, uri): 

46 # we first try to use the RDF parser 

47 try: 

48 data = get_something_from_uri( 

49 uri, 

50 [self.model], 

51 ) 

52 return data 

53 except Exception as e: 

54 logger.debug(e) 

55 # if everything else fails, try parsing JSON 

56 # if even that does not help, return an empty dict 

57 try: 

58 return json.loads(urllib.request.urlopen(uri).read()) 

59 except Exception as e: 

60 logger.debug(e) 

61 return {} 

62 

63 def mangle_data(self, data): 

64 return data 

65 

66 def get_data(self, drop_unknown_fields=True): 

67 """ 

68 Fetch the data using the `request` method and 

69 mangle it using the `mangle_data` method. 

70 

71 If the `drop_unknown_fields` argument is True, 

72 remove all fields from the data dict that do not 

73 have an equivalent field in the model. 

74 """ 

75 data = self.request(self.import_uri) 

76 data = self.mangle_data(data) 

77 if drop_unknown_fields: 

78 # we are dropping all fields that are not part of the model 

79 modelfields = [field.name for field in self.model._meta.fields] 

80 data = {key: data[key] for key in data if key in modelfields} 

81 if not data: 

82 raise ImproperlyConfigured( 

83 f"Could not import {self.import_uri}. Data fetched was: {data}" 

84 ) 

85 return data 

86 

87 def import_into_instance(self, instance, fields="__all__"): 

88 data = self.get_data() 

89 if fields == "__all__": 

90 fields = data.keys() 

91 for field in fields: 

92 if hasattr(instance, field) and field in data.keys(): 

93 setattr(instance, field, data[field][0]) 

94 instance.save() 

95 

96 def create_instance(self): 

97 logger.debug("Create instance from URI %s", self.import_uri) 

98 data = self.get_data(drop_unknown_fields=False) 

99 instance = None 

100 same_as = data.get("same_as", []) 

101 same_as = [get_normalized_uri(uri) for uri in same_as] 

102 if sa := Uri.objects.filter(uri__in=same_as): 

103 root_set = set([s.content_object for s in sa]) 

104 if len(root_set) > 1: 

105 raise IntegrityError( 

106 f"Multiple objects found for sameAs URIs {data['same_as']}. " 

107 f"This indicates a data integrity problem as these URIs should be unique." 

108 ) 

109 instance = sa.first().content_object 

110 logger.debug("Found existing instance %s", instance) 

111 if not instance: 

112 attributes = {} 

113 for field in self.model._meta.fields: 

114 if data.get(field.name, False): 

115 attributes[field.name] = data[field.name][0] 

116 instance = self.model.objects.create(**attributes) 

117 logger.debug("Created instance %s from attributes %s", instance, attributes) 

118 content_type = ContentType.objects.get_for_model(instance) 

119 for uri in same_as: 

120 Uri.objects.get_or_create( 

121 uri=uri, content_type=content_type, object_id=instance.id 

122 ) 

123 for relation, details in data.get("relations", {}).items(): 

124 rel_app_label, rel_model = relation.split(".") 

125 relation_model = ContentType.objects.get_by_natural_key( 

126 app_label=rel_app_label, model=rel_model 

127 ).model_class() 

128 

129 reld = details.get("obj", None) or details.get("subj", None) 

130 reld_app_label, reld_model = reld.split(".") 

131 related_content_type = ContentType.objects.get_by_natural_key( 

132 app_label=reld_app_label, model=reld_model 

133 ) 

134 related_model = related_content_type.model_class() 

135 

136 for related_uri in details["curies"]: 

137 try: 

138 related_instance = create_object_from_uri( 

139 uri=related_uri, model=related_model 

140 ) 

141 if details.get("obj"): 

142 subj_object_id = instance.pk 

143 subj_content_type = content_type 

144 obj_object_id = related_instance.pk 

145 obj_content_type = related_content_type 

146 else: 

147 obj_object_id = instance.pk 

148 obj_content_type = content_type 

149 subj_object_id = related_instance.pk 

150 subj_content_type = related_content_type 

151 rel, _ = relation_model.objects.get_or_create( 

152 subj_object_id=subj_object_id, 

153 subj_content_type=subj_content_type, 

154 obj_object_id=obj_object_id, 

155 obj_content_type=obj_content_type, 

156 ) 

157 logger.debug( 

158 "Created relation %s between %s and %s", 

159 relation_model.name(), 

160 rel.subj, 

161 rel.obj, 

162 ) 

163 except Exception as e: 

164 logger.error( 

165 "Could not create relation to %s due to %s", related_uri, e 

166 ) 

167 return instance