Coverage for apis_core/utils/DateParser.py: 58%

133 statements  

« prev     ^ index     » next       coverage.py v7.6.8, created at 2024-12-20 09:24 +0000

1import math 

2import re 

3from datetime import datetime, timedelta 

4 

5 

6def parse_date(date_string: str) -> (datetime, datetime, datetime): 

7 """ 

8 function to parse a string date field of an entity 

9 

10 :param date_string : str : 

11 the field value passed by a user 

12 :return date_single : datetime : 

13 single date which represents either the precise date given by user or median in between a range. 

14 :return date_ab : datetime : 

15 starting date of a range if user passed a range value either implicit or explicit. 

16 :return date_bis : datetime : 

17 ending date of a range if user passed a range value either implicit or explicit. 

18 """ 

19 

20 def parse_date_range_individual(date, ab=False, bis=False): 

21 """ 

22 As a sub function to parse_date, this function parse_date_individual handles a very single date since 

23 in a text field a user can pass multiple dates. 

24 

25 :param date : str : 

26 recognized sub string which potentially is a date (in julian calendar format) 

27 :param ab : boolean : optional 

28 indicates if a single date shall be intepreted as a starting date of a range 

29 :param bis : boolean : optional 

30 indicates if a single date shall be intepreted as an ending date of a range 

31 :return tuple (datetime, datetime) : 

32 two datetime objects representing the dates. 

33 Two indicate that an implicit single date range was given (e.g. a year without months or days). 

34 Has to be further processed then since it can be either a starting or ending date range. 

35 or 

36 :return datetime : 

37 One datetime object representing the date. 

38 if a single date was given. 

39 """ 

40 

41 def get_last_day_of_month(month, year): 

42 """ 

43 Helper function to return the last day of a given month and year (respecting leap years) 

44 

45 :param month : int 

46 :param year : int 

47 :return day : int 

48 """ 

49 

50 if month in [1, 3, 5, 7, 8, 10, 12]: 

51 # 31 day months 

52 return 31 

53 elif month in [4, 6, 9, 11]: 

54 # 30 day months 

55 return 30 

56 elif month == 2: 

57 # special case february, differentiate leap years with respect to gregorian leap rules 

58 if year % 4 == 0: 

59 if year % 100 == 0: 

60 if year % 400 == 0: 

61 # divisible by 4, by 100, by 400 

62 # thus is leap year 

63 return 29 

64 else: 

65 # divisible by 4, by 100, not by 400 

66 # thus is not leap yar 

67 return 28 

68 else: 

69 # divisible by 4, not by 100, if by 400 doesn't matter 

70 # thus is leap year 

71 return 29 

72 else: 

73 # not divisible by 4, if by 100 or by 400 doesn't matter 

74 return 28 

75 else: 

76 # no valid month 

77 raise ValueError("Month " + str(month) + " does not exist.") 

78 

79 # replace all kinds of delimiters 

80 date = ( 

81 date.replace(" ", "").replace("-", ".").replace("/", ".").replace("\\", ".") 

82 ) 

83 # parse into variables for use later 

84 year = None 

85 month = None 

86 day = None 

87 # check for all kind of Y-M-D combinations 

88 if re.match(r"\d{3,4}$", date): 

89 # year 

90 year = int(date) 

91 elif re.match(r"\d{1,2}\.\d{3,4}$", date): 

92 # month - year 

93 tmp = re.split(r"\.", date) 

94 month = int(tmp[0]) 

95 year = int(tmp[1]) 

96 elif re.match(r"\d{1,2}\.\d{1,2}\.\d{3,4}$", date): 

97 # day - month - year 

98 tmp = re.split(r"\.", date) 

99 day = int(tmp[0]) 

100 month = int(tmp[1]) 

101 year = int(tmp[2]) 

102 elif re.match(r"\d{3,4}\.\d{1,2}\.?$", date): 

103 # year - month 

104 tmp = re.split(r"\.", date) 

105 year = int(tmp[0]) 

106 month = int(tmp[1]) 

107 elif re.match(r"\d{3,4}\.\d{1,2}\.\d{1,2}\.?$", date): 

108 # year - month - day 

109 tmp = re.split(r"\.", date) 

110 year = int(tmp[0]) 

111 month = int(tmp[1]) 

112 day = int(tmp[2]) 

113 else: 

114 # No sensical interpretation found 

115 raise ValueError("Could not interpret date.") 

116 if (ab and bis) or year is None: 

117 # both ab and bis in one single date are not valid, neither is the absence of a year. 

118 raise ValueError("Could not interpret date.") 

119 elif not ab and not bis and (month is None or day is None): 

120 # if both ab and bis are False and either month or day is empty, then it was given 

121 # an implicit date range (range of all months if given a year or all days if given a month) 

122 # construct implicit month range 

123 if month is None: 

124 month_ab = 1 

125 month_bis = 12 

126 else: 

127 month_ab = month 

128 month_bis = month 

129 # construct implicit day range 

130 if day is None: 

131 day_ab = 1 

132 day_bis = get_last_day_of_month(month_bis, year) 

133 else: 

134 day_ab = day 

135 day_bis = day 

136 

137 # return a tuple from a single date (which the calling function has to further process) 

138 return ( 

139 datetime(year=year, month=month_ab, day=day_ab), 

140 datetime(year=year, month=month_bis, day=day_bis), 

141 ) 

142 else: 

143 # Either ab or bis is True. Then use the respective beginning or end of range and construct a precise date 

144 # Or both ab and bis are False. Then construct a precise date from parsed values 

145 # construct implicit month range if month is None 

146 if month is None: 

147 if ab and not bis: 

148 # is a starting date, thus take first month of year 

149 month = 1 

150 elif not ab and bis: 

151 # is an ending date, thus take last month of year 

152 month = 12 

153 # construct implicit day range if day is None 

154 if day is None: 

155 if ab and not bis: 

156 # is a starting date, thus take first day of month 

157 day = 1 

158 elif not ab and bis: 

159 # is an ending date, thus take last month of year 

160 day = get_last_day_of_month(month=month, year=year) 

161 

162 return datetime(year=year, month=month, day=day) 

163 

164 try: 

165 # return variables 

166 date_single = None 

167 date_ab = None 

168 date_bis = None 

169 # split for angle brackets, check if explicit iso date is contained within them 

170 date_split_angle = re.split(r"(<.*?>)", date_string) 

171 if len(date_split_angle) > 1: 

172 # date string contains angle brackets. Parse them, ignore the rest 

173 def parse_iso_date(date_string): 

174 date_string_split = date_string.split("-") 

175 try: 

176 return datetime( 

177 year=int(date_string_split[0]), 

178 month=int(date_string_split[1]), 

179 day=int(date_string_split[2]), 

180 ) 

181 except Exception: 

182 raise ValueError("Invalid iso date: ", date_string) 

183 

184 if len(date_split_angle) > 3: 

185 # invalid case 

186 raise ValueError("Too many angle brackets.") 

187 elif len(date_split_angle) == 3: 

188 # the right amount of substrings, indicating exactly one pair of angle brackets. 

189 # Parse the iso date in between 

190 # remove angle brackets and split by commas 

191 dates_iso = date_split_angle[1][1:-1] 

192 # check for commas, which would indicate that either one iso date or three are being input 

193 dates_iso = dates_iso.split(",") 

194 if len(dates_iso) != 1 and len(dates_iso) != 3: 

195 # only either one iso date or three are allowed 

196 raise ValueError( 

197 "Incorrect number of dates given. Within angle brackets only one or three (separated by commas) are allowed." 

198 ) 

199 elif len(dates_iso) == 3: 

200 # three iso dates indicate further start and end dates 

201 # parse start date 

202 date_ab_string = dates_iso[1].strip() 

203 if date_ab_string != "": 

204 date_ab = parse_iso_date(date_ab_string) 

205 # parse end date 

206 date_bis_string = dates_iso[2].strip() 

207 if date_bis_string != "": 

208 date_bis = parse_iso_date(date_bis_string) 

209 # parse single date 

210 date_single_string = dates_iso[0].strip() 

211 if date_single_string != "": 

212 date_single = parse_iso_date(date_single_string) 

213 else: 

214 # date string contains no angle brackets. Interpret the possible date formats 

215 date_string = date_string.lower() 

216 date_string = date_string.replace(" ", "") 

217 # helper variables for the following loop 

218 found_ab = False 

219 found_bis = False 

220 found_single = False 

221 # split by allowed keywords 'ab' and 'bis' and iterate over them 

222 date_split_ab_bis = re.split(r"(ab|bis)", date_string) 

223 for i, v in enumerate(date_split_ab_bis): 

224 if v == "ab": 

225 # indicates that the next value must be a start date 

226 if found_ab or found_single: 

227 # if already found a ab_date or single date before then there is non-conformative redundancy 

228 raise ValueError("Redundant dates found.") 

229 found_ab = True 

230 # parse the next value which must be a parsable date string 

231 date_ab = parse_date_range_individual( 

232 date_split_ab_bis[i + 1], ab=True 

233 ) 

234 elif v == "bis": 

235 # indicates that the next value must be an end date 

236 if found_bis or found_single: 

237 # if already found a bis_date or single date before then there is non-conformative redundancy 

238 raise ValueError("Redundant dates found.") 

239 found_bis = True 

240 

241 # parse the next value which must be a parsable date string 

242 date_bis = parse_date_range_individual( 

243 date_split_ab_bis[i + 1], bis=True 

244 ) 

245 elif v != "" and not found_ab and not found_bis and not found_single: 

246 # indicates that this value must be a date 

247 found_single = True 

248 # parse the this value which must be a parsable date string 

249 date_single = parse_date_range_individual(v) 

250 if type(date_single) is tuple: 

251 # if result of parse_date_range_individual is a tuple then the date was an implict range. 

252 # Then split it into start and end dates 

253 date_ab = date_single[0] 

254 date_bis = date_single[1] 

255 if date_ab and date_bis: 

256 # date is a range 

257 if date_ab > date_bis: 

258 raise ValueError("'ab-date' must be before 'bis-date' in time") 

259 # calculate difference between start and end date of range, 

260 # and use it to calculate a single date for usage as median. 

261 days_delta_half = math.floor( 

262 (date_bis - date_ab).days / 2, 

263 ) 

264 date_single = date_ab + timedelta(days=days_delta_half) 

265 elif date_ab is not None and date_bis is None: 

266 # date is only the start of a range, save it also as the single date 

267 date_single = date_ab 

268 elif date_ab is None and date_bis is not None: 

269 # date is only the end of a range, save it also as the single date 

270 date_single = date_bis 

271 

272 except Exception as e: 

273 print("Could not parse date: '", date_string, "' due to error: ", e) 

274 

275 return date_single, date_ab, date_bis