Coverage for apis_core/utils/DateParser.py: 58%
133 statements
« prev ^ index » next coverage.py v7.6.8, created at 2024-12-20 09:24 +0000
« prev ^ index » next coverage.py v7.6.8, created at 2024-12-20 09:24 +0000
1import math
2import re
3from datetime import datetime, timedelta
6def parse_date(date_string: str) -> (datetime, datetime, datetime):
7 """
8 function to parse a string date field of an entity
10 :param date_string : str :
11 the field value passed by a user
12 :return date_single : datetime :
13 single date which represents either the precise date given by user or median in between a range.
14 :return date_ab : datetime :
15 starting date of a range if user passed a range value either implicit or explicit.
16 :return date_bis : datetime :
17 ending date of a range if user passed a range value either implicit or explicit.
18 """
20 def parse_date_range_individual(date, ab=False, bis=False):
21 """
22 As a sub function to parse_date, this function parse_date_individual handles a very single date since
23 in a text field a user can pass multiple dates.
25 :param date : str :
26 recognized sub string which potentially is a date (in julian calendar format)
27 :param ab : boolean : optional
28 indicates if a single date shall be intepreted as a starting date of a range
29 :param bis : boolean : optional
30 indicates if a single date shall be intepreted as an ending date of a range
31 :return tuple (datetime, datetime) :
32 two datetime objects representing the dates.
33 Two indicate that an implicit single date range was given (e.g. a year without months or days).
34 Has to be further processed then since it can be either a starting or ending date range.
35 or
36 :return datetime :
37 One datetime object representing the date.
38 if a single date was given.
39 """
41 def get_last_day_of_month(month, year):
42 """
43 Helper function to return the last day of a given month and year (respecting leap years)
45 :param month : int
46 :param year : int
47 :return day : int
48 """
50 if month in [1, 3, 5, 7, 8, 10, 12]:
51 # 31 day months
52 return 31
53 elif month in [4, 6, 9, 11]:
54 # 30 day months
55 return 30
56 elif month == 2:
57 # special case february, differentiate leap years with respect to gregorian leap rules
58 if year % 4 == 0:
59 if year % 100 == 0:
60 if year % 400 == 0:
61 # divisible by 4, by 100, by 400
62 # thus is leap year
63 return 29
64 else:
65 # divisible by 4, by 100, not by 400
66 # thus is not leap yar
67 return 28
68 else:
69 # divisible by 4, not by 100, if by 400 doesn't matter
70 # thus is leap year
71 return 29
72 else:
73 # not divisible by 4, if by 100 or by 400 doesn't matter
74 return 28
75 else:
76 # no valid month
77 raise ValueError("Month " + str(month) + " does not exist.")
79 # replace all kinds of delimiters
80 date = (
81 date.replace(" ", "").replace("-", ".").replace("/", ".").replace("\\", ".")
82 )
83 # parse into variables for use later
84 year = None
85 month = None
86 day = None
87 # check for all kind of Y-M-D combinations
88 if re.match(r"\d{3,4}$", date):
89 # year
90 year = int(date)
91 elif re.match(r"\d{1,2}\.\d{3,4}$", date):
92 # month - year
93 tmp = re.split(r"\.", date)
94 month = int(tmp[0])
95 year = int(tmp[1])
96 elif re.match(r"\d{1,2}\.\d{1,2}\.\d{3,4}$", date):
97 # day - month - year
98 tmp = re.split(r"\.", date)
99 day = int(tmp[0])
100 month = int(tmp[1])
101 year = int(tmp[2])
102 elif re.match(r"\d{3,4}\.\d{1,2}\.?$", date):
103 # year - month
104 tmp = re.split(r"\.", date)
105 year = int(tmp[0])
106 month = int(tmp[1])
107 elif re.match(r"\d{3,4}\.\d{1,2}\.\d{1,2}\.?$", date):
108 # year - month - day
109 tmp = re.split(r"\.", date)
110 year = int(tmp[0])
111 month = int(tmp[1])
112 day = int(tmp[2])
113 else:
114 # No sensical interpretation found
115 raise ValueError("Could not interpret date.")
116 if (ab and bis) or year is None:
117 # both ab and bis in one single date are not valid, neither is the absence of a year.
118 raise ValueError("Could not interpret date.")
119 elif not ab and not bis and (month is None or day is None):
120 # if both ab and bis are False and either month or day is empty, then it was given
121 # an implicit date range (range of all months if given a year or all days if given a month)
122 # construct implicit month range
123 if month is None:
124 month_ab = 1
125 month_bis = 12
126 else:
127 month_ab = month
128 month_bis = month
129 # construct implicit day range
130 if day is None:
131 day_ab = 1
132 day_bis = get_last_day_of_month(month_bis, year)
133 else:
134 day_ab = day
135 day_bis = day
137 # return a tuple from a single date (which the calling function has to further process)
138 return (
139 datetime(year=year, month=month_ab, day=day_ab),
140 datetime(year=year, month=month_bis, day=day_bis),
141 )
142 else:
143 # Either ab or bis is True. Then use the respective beginning or end of range and construct a precise date
144 # Or both ab and bis are False. Then construct a precise date from parsed values
145 # construct implicit month range if month is None
146 if month is None:
147 if ab and not bis:
148 # is a starting date, thus take first month of year
149 month = 1
150 elif not ab and bis:
151 # is an ending date, thus take last month of year
152 month = 12
153 # construct implicit day range if day is None
154 if day is None:
155 if ab and not bis:
156 # is a starting date, thus take first day of month
157 day = 1
158 elif not ab and bis:
159 # is an ending date, thus take last month of year
160 day = get_last_day_of_month(month=month, year=year)
162 return datetime(year=year, month=month, day=day)
164 try:
165 # return variables
166 date_single = None
167 date_ab = None
168 date_bis = None
169 # split for angle brackets, check if explicit iso date is contained within them
170 date_split_angle = re.split(r"(<.*?>)", date_string)
171 if len(date_split_angle) > 1:
172 # date string contains angle brackets. Parse them, ignore the rest
173 def parse_iso_date(date_string):
174 date_string_split = date_string.split("-")
175 try:
176 return datetime(
177 year=int(date_string_split[0]),
178 month=int(date_string_split[1]),
179 day=int(date_string_split[2]),
180 )
181 except Exception:
182 raise ValueError("Invalid iso date: ", date_string)
184 if len(date_split_angle) > 3:
185 # invalid case
186 raise ValueError("Too many angle brackets.")
187 elif len(date_split_angle) == 3:
188 # the right amount of substrings, indicating exactly one pair of angle brackets.
189 # Parse the iso date in between
190 # remove angle brackets and split by commas
191 dates_iso = date_split_angle[1][1:-1]
192 # check for commas, which would indicate that either one iso date or three are being input
193 dates_iso = dates_iso.split(",")
194 if len(dates_iso) != 1 and len(dates_iso) != 3:
195 # only either one iso date or three are allowed
196 raise ValueError(
197 "Incorrect number of dates given. Within angle brackets only one or three (separated by commas) are allowed."
198 )
199 elif len(dates_iso) == 3:
200 # three iso dates indicate further start and end dates
201 # parse start date
202 date_ab_string = dates_iso[1].strip()
203 if date_ab_string != "":
204 date_ab = parse_iso_date(date_ab_string)
205 # parse end date
206 date_bis_string = dates_iso[2].strip()
207 if date_bis_string != "":
208 date_bis = parse_iso_date(date_bis_string)
209 # parse single date
210 date_single_string = dates_iso[0].strip()
211 if date_single_string != "":
212 date_single = parse_iso_date(date_single_string)
213 else:
214 # date string contains no angle brackets. Interpret the possible date formats
215 date_string = date_string.lower()
216 date_string = date_string.replace(" ", "")
217 # helper variables for the following loop
218 found_ab = False
219 found_bis = False
220 found_single = False
221 # split by allowed keywords 'ab' and 'bis' and iterate over them
222 date_split_ab_bis = re.split(r"(ab|bis)", date_string)
223 for i, v in enumerate(date_split_ab_bis):
224 if v == "ab":
225 # indicates that the next value must be a start date
226 if found_ab or found_single:
227 # if already found a ab_date or single date before then there is non-conformative redundancy
228 raise ValueError("Redundant dates found.")
229 found_ab = True
230 # parse the next value which must be a parsable date string
231 date_ab = parse_date_range_individual(
232 date_split_ab_bis[i + 1], ab=True
233 )
234 elif v == "bis":
235 # indicates that the next value must be an end date
236 if found_bis or found_single:
237 # if already found a bis_date or single date before then there is non-conformative redundancy
238 raise ValueError("Redundant dates found.")
239 found_bis = True
241 # parse the next value which must be a parsable date string
242 date_bis = parse_date_range_individual(
243 date_split_ab_bis[i + 1], bis=True
244 )
245 elif v != "" and not found_ab and not found_bis and not found_single:
246 # indicates that this value must be a date
247 found_single = True
248 # parse the this value which must be a parsable date string
249 date_single = parse_date_range_individual(v)
250 if type(date_single) is tuple:
251 # if result of parse_date_range_individual is a tuple then the date was an implict range.
252 # Then split it into start and end dates
253 date_ab = date_single[0]
254 date_bis = date_single[1]
255 if date_ab and date_bis:
256 # date is a range
257 if date_ab > date_bis:
258 raise ValueError("'ab-date' must be before 'bis-date' in time")
259 # calculate difference between start and end date of range,
260 # and use it to calculate a single date for usage as median.
261 days_delta_half = math.floor(
262 (date_bis - date_ab).days / 2,
263 )
264 date_single = date_ab + timedelta(days=days_delta_half)
265 elif date_ab is not None and date_bis is None:
266 # date is only the start of a range, save it also as the single date
267 date_single = date_ab
268 elif date_ab is None and date_bis is not None:
269 # date is only the end of a range, save it also as the single date
270 date_single = date_bis
272 except Exception as e:
273 print("Could not parse date: '", date_string, "' due to error: ", e)
275 return date_single, date_ab, date_bis