Coverage for apis_core/generic/importers.py: 0%
81 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-10-10 13:36 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-10-10 13:36 +0000
1import json
2import logging
3import urllib
4from functools import cache
6from AcdhArcheAssets.uri_norm_rules import get_normalized_uri
7from django.contrib.contenttypes.models import ContentType
8from django.core.exceptions import ImproperlyConfigured
9from django.db.utils import IntegrityError
11from apis_core.uris.models import Uri
12from apis_core.utils.rdf import get_something_from_uri
14logger = logging.getLogger(__name__)
17class GenericModelImporter:
18 """
19 A generic importer class which provides methods for
20 importing data from a URI and creating a model instance from it.
22 By default, it fetches a resource and first tries to parse it using
23 our RDF parser. If that fails, it tries to parse it using JSON and
24 then extracts the fields whose keys match the model field names.
25 Projects can inherit from this class and override the default
26 methods or simply write their own from scratch.
27 """
29 model = None
30 import_uri = None
32 def __init__(self, uri, model):
33 self.model = model
34 self.import_uri = self.clean_uri(uri)
36 @property
37 def get_uri(self):
38 return self.import_uri
40 def clean_uri(self, uri):
41 return get_normalized_uri(uri)
43 @cache
44 def request(self, uri):
45 data = None
46 # We first try to use the RDF parser
47 if not data:
48 try:
49 data = get_something_from_uri(
50 uri,
51 [self.model],
52 )
53 except Exception as e:
54 logger.debug(e)
55 # If there is no data yet, try parsing JSON
56 if not data:
57 try:
58 data = json.loads(urllib.request.urlopen(uri).read())
59 except Exception as e:
60 logger.debug(e)
61 # Return the fetched data or an empty dict if there is none
62 return data or {}
64 def mangle_data(self, data):
65 return data
67 def get_data(self, drop_unknown_fields=True):
68 """
69 Fetch the data using the `request` method and
70 mangle it using the `mangle_data` method.
72 If the `drop_unknown_fields` argument is True,
73 remove all fields from the data dict that do not
74 have an equivalent field in the model.
75 """
76 data = self.request(self.import_uri)
77 data = self.mangle_data(data)
78 if drop_unknown_fields:
79 # we are dropping all fields that are not part of the model
80 modelfields = [field.name for field in self.model._meta.fields]
81 data = {key: data[key] for key in data if key in modelfields}
82 if not data:
83 raise ImproperlyConfigured(
84 f"Could not import {self.import_uri}. Data fetched was: {data}"
85 )
86 return data
88 def import_into_instance(self, instance, fields="__all__"):
89 data = self.get_data()
90 if fields == "__all__":
91 fields = data.keys()
92 for field in fields:
93 if hasattr(instance, field) and field in data.keys():
94 setattr(instance, field, data[field][0])
95 instance.save()
97 def create_instance(self):
98 logger.debug("Create instance from URI %s", self.import_uri)
99 data = self.get_data(drop_unknown_fields=False)
100 instance = None
101 same_as = data.get("same_as", [])
102 same_as = [get_normalized_uri(uri) for uri in same_as]
103 if sa := Uri.objects.filter(uri__in=same_as):
104 root_set = set([s.content_object for s in sa])
105 if len(root_set) > 1:
106 raise IntegrityError(
107 f"Multiple objects found for sameAs URIs {data['same_as']}. "
108 f"This indicates a data integrity problem as these URIs should be unique."
109 )
110 instance = sa.first().content_object
111 logger.debug("Found existing instance %s", instance)
112 if not instance:
113 attributes = {}
114 for field in self.model._meta.fields:
115 if data.get(field.name, False):
116 attributes[field.name] = data[field.name][0]
117 instance = self.model.objects.create(**attributes)
118 logger.debug("Created instance %s from attributes %s", instance, attributes)
119 content_type = ContentType.objects.get_for_model(instance)
120 for uri in same_as:
121 Uri.objects.get_or_create(
122 uri=uri, content_type=content_type, object_id=instance.id
123 )
124 if "relations" in data:
125 instance.create_relations_to_uris = data["relations"]
126 instance.save()
127 return instance