Coverage for apis_core/generic/importers.py: 0%
102 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-09-03 06:15 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-09-03 06:15 +0000
1import json
2import logging
3import urllib
4from functools import cache
6from AcdhArcheAssets.uri_norm_rules import get_normalized_uri
7from django.contrib.contenttypes.models import ContentType
8from django.core.exceptions import ImproperlyConfigured
9from django.db.utils import IntegrityError
11from apis_core.uris.models import Uri
12from apis_core.uris.utils import create_object_from_uri
13from apis_core.utils.rdf import get_something_from_uri
15logger = logging.getLogger(__name__)
18class GenericModelImporter:
19 """
20 A generic importer class which provides methods for
21 importing data from a URI and creating a model instance from it.
23 By default, it fetches a resource and first tries to parse it using
24 our RDF parser. If that fails, it tries to parse it using JSON and
25 then extracts the fields whose keys match the model field names.
26 Projects can inherit from this class and override the default
27 methods or simply write their own from scratch.
28 """
30 model = None
31 import_uri = None
33 def __init__(self, uri, model):
34 self.model = model
35 self.import_uri = self.clean_uri(uri)
37 @property
38 def get_uri(self):
39 return self.import_uri
41 def clean_uri(self, uri):
42 return get_normalized_uri(uri)
44 @cache
45 def request(self, uri):
46 data = None
47 # We first try to use the RDF parser
48 if not data:
49 try:
50 data = get_something_from_uri(
51 uri,
52 [self.model],
53 )
54 except Exception as e:
55 logger.debug(e)
56 # If there is no data yet, try parsing JSON
57 if not data:
58 try:
59 data = json.loads(urllib.request.urlopen(uri).read())
60 except Exception as e:
61 logger.debug(e)
62 # Return the fetched data or an empty dict if there is none
63 return data or {}
65 def mangle_data(self, data):
66 return data
68 def get_data(self, drop_unknown_fields=True):
69 """
70 Fetch the data using the `request` method and
71 mangle it using the `mangle_data` method.
73 If the `drop_unknown_fields` argument is True,
74 remove all fields from the data dict that do not
75 have an equivalent field in the model.
76 """
77 data = self.request(self.import_uri)
78 data = self.mangle_data(data)
79 if drop_unknown_fields:
80 # we are dropping all fields that are not part of the model
81 modelfields = [field.name for field in self.model._meta.fields]
82 data = {key: data[key] for key in data if key in modelfields}
83 if not data:
84 raise ImproperlyConfigured(
85 f"Could not import {self.import_uri}. Data fetched was: {data}"
86 )
87 return data
89 def import_into_instance(self, instance, fields="__all__"):
90 data = self.get_data()
91 if fields == "__all__":
92 fields = data.keys()
93 for field in fields:
94 if hasattr(instance, field) and field in data.keys():
95 setattr(instance, field, data[field][0])
96 instance.save()
98 def create_instance(self):
99 logger.debug("Create instance from URI %s", self.import_uri)
100 data = self.get_data(drop_unknown_fields=False)
101 instance = None
102 same_as = data.get("same_as", [])
103 same_as = [get_normalized_uri(uri) for uri in same_as]
104 if sa := Uri.objects.filter(uri__in=same_as):
105 root_set = set([s.content_object for s in sa])
106 if len(root_set) > 1:
107 raise IntegrityError(
108 f"Multiple objects found for sameAs URIs {data['same_as']}. "
109 f"This indicates a data integrity problem as these URIs should be unique."
110 )
111 instance = sa.first().content_object
112 logger.debug("Found existing instance %s", instance)
113 if not instance:
114 attributes = {}
115 for field in self.model._meta.fields:
116 if data.get(field.name, False):
117 attributes[field.name] = data[field.name][0]
118 instance = self.model.objects.create(**attributes)
119 logger.debug("Created instance %s from attributes %s", instance, attributes)
120 content_type = ContentType.objects.get_for_model(instance)
121 for uri in same_as:
122 Uri.objects.get_or_create(
123 uri=uri, content_type=content_type, object_id=instance.id
124 )
125 for relation, details in data.get("relations", {}).items():
126 rel_app_label, rel_model = relation.split(".")
127 relation_model = ContentType.objects.get_by_natural_key(
128 app_label=rel_app_label, model=rel_model
129 ).model_class()
131 reld = details.get("obj", None) or details.get("subj", None)
132 reld_app_label, reld_model = reld.split(".")
133 related_content_type = ContentType.objects.get_by_natural_key(
134 app_label=reld_app_label, model=reld_model
135 )
136 related_model = related_content_type.model_class()
138 for related_uri in details["curies"]:
139 try:
140 related_instance = create_object_from_uri(
141 uri=related_uri, model=related_model
142 )
143 if details.get("obj"):
144 subj_object_id = instance.pk
145 subj_content_type = content_type
146 obj_object_id = related_instance.pk
147 obj_content_type = related_content_type
148 else:
149 obj_object_id = instance.pk
150 obj_content_type = content_type
151 subj_object_id = related_instance.pk
152 subj_content_type = related_content_type
153 rel, _ = relation_model.objects.get_or_create(
154 subj_object_id=subj_object_id,
155 subj_content_type=subj_content_type,
156 obj_object_id=obj_object_id,
157 obj_content_type=obj_content_type,
158 )
159 logger.debug(
160 "Created relation %s between %s and %s",
161 relation_model.name(),
162 rel.subj,
163 rel.obj,
164 )
165 except Exception as e:
166 logger.error(
167 "Could not create relation to %s due to %s", related_uri, e
168 )
169 return instance