Coverage for apis_core/generic/importers.py: 0%
100 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-06-25 10:00 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-06-25 10:00 +0000
1import json
2import logging
3import urllib
4from functools import cache
6from AcdhArcheAssets.uri_norm_rules import get_normalized_uri
7from django.contrib.contenttypes.models import ContentType
8from django.core.exceptions import ImproperlyConfigured
9from django.db.utils import IntegrityError
11from apis_core.apis_metainfo.models import Uri
12from apis_core.apis_metainfo.utils import create_object_from_uri
13from apis_core.utils.rdf import get_something_from_uri
15logger = logging.getLogger(__name__)
18class GenericModelImporter:
19 """
20 A generic importer class which provides methods for
21 importing data from a URI and creating a model instance from it.
23 By default, it fetches a resource and first tries to parse it using
24 our RDF parser. If that fails, it tries to parse it using JSON and
25 then extracts the fields whose keys match the model field names.
26 Projects can inherit from this class and override the default
27 methods or simply write their own from scratch.
28 """
30 model = None
31 import_uri = None
33 def __init__(self, uri, model):
34 self.model = model
35 self.import_uri = self.clean_uri(uri)
37 @property
38 def get_uri(self):
39 return self.import_uri
41 def clean_uri(self, uri):
42 return get_normalized_uri(uri)
44 @cache
45 def request(self, uri):
46 # we first try to use the RDF parser
47 try:
48 data = get_something_from_uri(
49 uri,
50 [self.model],
51 )
52 return data
53 except Exception as e:
54 logger.debug(e)
55 # if everything else fails, try parsing JSON
56 # if even that does not help, return an empty dict
57 try:
58 return json.loads(urllib.request.urlopen(uri).read())
59 except Exception as e:
60 logger.debug(e)
61 return {}
63 def mangle_data(self, data):
64 return data
66 def get_data(self, drop_unknown_fields=True):
67 """
68 Fetch the data using the `request` method and
69 mangle it using the `mangle_data` method.
71 If the `drop_unknown_fields` argument is True,
72 remove all fields from the data dict that do not
73 have an equivalent field in the model.
74 """
75 data = self.request(self.import_uri)
76 data = self.mangle_data(data)
77 if drop_unknown_fields:
78 # we are dropping all fields that are not part of the model
79 modelfields = [field.name for field in self.model._meta.fields]
80 data = {key: data[key] for key in data if key in modelfields}
81 if not data:
82 raise ImproperlyConfigured(
83 f"Could not import {self.import_uri}. Data fetched was: {data}"
84 )
85 return data
87 def import_into_instance(self, instance, fields="__all__"):
88 data = self.get_data()
89 if fields == "__all__":
90 fields = data.keys()
91 for field in fields:
92 if hasattr(instance, field) and field in data.keys():
93 setattr(instance, field, data[field][0])
94 instance.save()
96 def create_instance(self):
97 logger.debug("Create instance from URI %s", self.import_uri)
98 data = self.get_data(drop_unknown_fields=False)
99 instance = None
100 same_as = data.get("same_as", [])
101 same_as = [get_normalized_uri(uri) for uri in same_as]
102 if sa := Uri.objects.filter(uri__in=same_as):
103 root_set = set([s.content_object for s in sa])
104 if len(root_set) > 1:
105 raise IntegrityError(
106 f"Multiple objects found for sameAs URIs {data['same_as']}. "
107 f"This indicates a data integrity problem as these URIs should be unique."
108 )
109 instance = sa.first().content_object
110 logger.debug("Found existing instance %s", instance)
111 if not instance:
112 attributes = {}
113 for field in self.model._meta.fields:
114 if data.get(field.name, False):
115 attributes[field.name] = data[field.name][0]
116 instance = self.model.objects.create(**attributes)
117 logger.debug("Created instance %s from attributes %s", instance, attributes)
118 content_type = ContentType.objects.get_for_model(instance)
119 for uri in same_as:
120 Uri.objects.get_or_create(
121 uri=uri, content_type=content_type, object_id=instance.id
122 )
123 for relation, details in data.get("relations", {}).items():
124 rel_app_label, rel_model = relation.split(".")
125 relation_model = ContentType.objects.get_by_natural_key(
126 app_label=rel_app_label, model=rel_model
127 ).model_class()
129 reld = details.get("obj", None) or details.get("subj", None)
130 reld_app_label, reld_model = reld.split(".")
131 related_content_type = ContentType.objects.get_by_natural_key(
132 app_label=reld_app_label, model=reld_model
133 )
134 related_model = related_content_type.model_class()
136 for related_uri in details["curies"]:
137 try:
138 related_instance = create_object_from_uri(
139 uri=related_uri, model=related_model
140 )
141 if details.get("obj"):
142 subj_object_id = instance.pk
143 subj_content_type = content_type
144 obj_object_id = related_instance.pk
145 obj_content_type = related_content_type
146 else:
147 obj_object_id = instance.pk
148 obj_content_type = content_type
149 subj_object_id = related_instance.pk
150 subj_content_type = related_content_type
151 rel, _ = relation_model.objects.get_or_create(
152 subj_object_id=subj_object_id,
153 subj_content_type=subj_content_type,
154 obj_object_id=obj_object_id,
155 obj_content_type=obj_content_type,
156 )
157 logger.debug(
158 "Created relation %s between %s and %s",
159 relation_model.name(),
160 rel.subj,
161 rel.obj,
162 )
163 except Exception as e:
164 logger.error(
165 "Could not create relation to %s due to %s", related_uri, e
166 )
167 return instance