-
Notifications
You must be signed in to change notification settings - Fork 7
/
feed.py
669 lines (567 loc) · 24.2 KB
/
feed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
# -*- coding: UTF-8 -*-
from collections import namedtuple
import datetime
import re
from xml.dom.minidom import Document
try:
from collections import OrderedDict
except ImportError: # support python 2.6
OrderedDict = dict
# Helpers to extract dates from strings
# -------------------------------------
date_format = re.compile(".*?(?P<datestr>(" +
"\d{2}(\d{2})?-[01]?\d-[0-3]?\d|" +
"[0-3]?\d\.[01]?\d\.\d{2}(\d{2})?|" +
"(?P<day>[0-3]?\d)\.? ?(?P<month>\S+) ?" +
"(?P<year>\d{2}(\d{2})?))).*",
re.UNICODE)
month_names = {
'januar': '01',
'january': '01',
'februar': '02',
'february': '02',
'märz': '03',
'maerz': '03',
'march': '03',
'april': '04',
'mai': '05',
'may': '05',
'juni': '06',
'june': '06',
'juli': '07',
'july': '07',
'august': '08',
'september': '09',
'oktober': '10',
'october': '10',
'november': '11',
'dezember': '12',
'december': '12',
}
def extractDate(text):
""" Tries to extract a date from a given :obj:`str`.
:param str text: Input date. A :obj:`datetime.date` object is passed
thought without modification.
:rtype: :obj:`datetime.date`"""
if type(text) is datetime.date:
return text
match = date_format.search(text.lower())
if not match:
raise ValueError('unsupported date format: {0}'.format(text.lower()))
# convert DD.MM.YYYY into YYYY-MM-DD
if match.group('month'):
if not match.group('month') in month_names:
raise ValueError('unknown month names: "{0}"'
.format(match.group('month')))
year = int(match.group('year'))
return datetime.date(
year if year > 2000 else 2000 + year,
int(month_names[match.group('month')]),
int(match.group('day')))
else:
parts = list(map(lambda v: int(v), '-'.join(reversed(
match.group('datestr').split('.'))).split('-')))
if parts[0] < 2000:
parts[0] += 2000
return datetime.date(*parts)
class extractWeekDates():
weekdays = {
0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6,
'Mon': 0,
'Montag': 0,
'Dienstag': 1,
'Mittwoch': 2,
'Donnerstag': 3,
'Freitag': 4,
'Samstag': 5,
'Sonntag': 6
}
def __init__(self, start, end=None):
self.monday = extractDate(start)
def __getitem__(self, value):
if type(value) not in [int, str]:
raise TypeError
if value not in self.weekdays:
raise ValueError
return self.monday + datetime.date.resolution * self.weekdays[value]
def __iter__(self):
for i in range(7):
yield self.monday + datetime.date.resolution * i
# Helpers for price handling
# -------------------------------------
#: The default compiled regex that is used by :func:`.convertPrice` and
#: :func:`.buildPrices`
default_price_regex = re.compile(r'(^|[^\d,.])(?P<euro>\d+)[,.]' +
r'(?P<cent>\d{2}|\d{1}(?=\s*€))',
re.UNICODE)
short_price_regex = re.compile(r'[^\d]*(?P<euro>\d+)\s*€[^\d]*', re.UNICODE)
none_price_regex = re.compile(r'^\s*-\s*$', re.UNICODE)
def convertPrice(variant, regex=None, short_regex=None, none_regex=none_price_regex):
''' Helper function to convert the given input price into integers (cents
count). :obj:`int`, :obj:`float` and :obj:`str` are supported
:param variant: Price
:param re.compile regex: Regex to convert str into price. The re should
contain two named groups `euro` and `cent`
:param re.compile short_regex: Short regex version (no cent part)
group `euro` should contain a valid integer.
:param re.compile none_regex: Regex to detect that no value is provided
if the input data is str, the normal regex do not match and this
regex matches `None` is returned.
:rtype: int/None'''
if isinstance(variant, int) and not isinstance(variant, bool):
return variant
elif isinstance(variant, float):
return round(variant * 100)
elif isinstance(variant, str):
match = (regex or default_price_regex).search(variant) \
or (short_regex or short_price_regex).match(variant)
if not match:
if none_regex and none_regex.match(variant):
return None
raise ValueError('Could not extract price: {0}'.format(variant))
return int(match.group('euro')) * 100 + \
int(match.groupdict().get('cent', '').ljust(2, '0'))
else:
raise TypeError('Unknown price type: {0!r}'.format(variant))
def buildPrices(data, roles=None, regex=default_price_regex,
default=None, additional={}):
''' Create a dictionary with price information. Multiple ways are
supported.
:rtype: :obj:`dict`: keys are role as str, values are the prices as
cent count'''
if isinstance(data, dict):
data = [(item[0], convertPrice(item[1])) for item in data.items()]
return dict([v for v in data if v[1] is not None])
elif isinstance(data, (str, float, int)) and not isinstance(data, bool):
if default is None:
raise ValueError('You have to call setAdditionalCharges '
'before it is possible to pass a string as price')
basePrice = convertPrice(data)
if basePrice is None:
return {}
prices = {default: basePrice}
for role in additional:
extraCharge = convertPrice(additional[role])
if extraCharge is None:
continue
prices[role] = basePrice + extraCharge
return prices
elif roles:
prices = {}
priceRoles = iter(roles)
for priceData in data:
price = convertPrice(priceData)
if price is None:
continue
prices[next(priceRoles)] = price
return prices
else:
raise TypeError('This type is for prices not supported!')
# Helpers for notes and legend handling
# -------------------------------------
#: Default regex str for :func:`buildLegend`
default_legend_regex = '(?P<name>(\d|[a-z])+)\)\s*' + \
'(?P<value>\w+((\s+\w+)*[^0-9)]))'
#: Default compiled regex for :func:`extractNotes`
default_extra_regex = re.compile('\((?P<extra>[0-9a-zA-Z]{1,2}'
'(?:,[0-9a-zA-Z]{1,2})*)\)', re.UNICODE)
def buildLegend(legend=None, text=None, regex=None, key=lambda v: v):
''' Helper method to build or extend a legend from a text. The given regex
will be used to find legend inside the text.
:param dict legend: Initial legend data
:param str text: Text from which should legend information extracted.
None means do no extraction.
:param str regex: Regex to find legend part inside the given text. The
regex should have a named group `name` (key) and a named group
`value` (value).
:param callable key: function to map the key to a legend key
:rtype: dict'''
if legend is None:
legend = {}
if text is not None:
for match in re.finditer(regex or default_legend_regex,
text, re.UNICODE):
legend[key(match.group('name'))] = match.group('value').strip()
return legend
def extractNotes(name, notes, legend=None, regex=None, key=lambda v: v):
''' This functions uses legend data to extract e.g. (1) references in a
meal name and add these in full text to the notes.
:param str name: The meal name
:param list notes: The initial list of notes for this meal
:param dict legend: The legend data. Use `None` to skip extraction. The
key is searched inside the meal name (with the given regex) and if
found the value is added to the notes list.
:param re.compile regex: The regex to find legend references in the
meal name. The regex must have exactly one group which identifies
the key in the legend data. If you pass None the
:py:data:`default_extra_regex` is used. Only compiled regex are
supported.
:param callable key: function to map the key to a legend key
:rtype: tuple with name and notes'''
if legend is None:
return name, notes
if regex is None:
regex = default_extra_regex
# extract note
for note in list(','.join(regex.findall(name)).split(',')):
if not note:
continue
note = key(note)
if note in legend:
if legend[note] not in notes:
notes.append(legend[note])
else:
print('could not find extra note "{0}"'.format(note))
# from notes from name
name = regex.sub('', name).replace('\xa0', ' ').replace(' ', ' ').strip()
return name, notes
class Feed(namedtuple('FeedRecord', ['name', 'priority', 'url', 'source', 'dayOfWeek', 'dayOfMonth', 'hour', 'minute', 'retry'])):
def toTag(self, output):
''' This methods returns all data of this feed as feed xml tag
:param output: XML Document to which the data should be added
:type output: xml.dom.DOMImplementation.createDocument
'''
feed = output.createElement('feed')
feed.setAttribute('name', self.name)
feed.setAttribute('priority', str(self.priority))
# schedule
schedule = output.createElement('schedule')
schedule.setAttribute('dayOfMonth', self.dayOfMonth)
schedule.setAttribute('dayOfWeek', self.dayOfWeek)
schedule.setAttribute('hour', self.hour)
schedule.setAttribute('minute', self.minute)
if self.retry:
schedule.setAttribute('retry', self.retry)
feed.appendChild(schedule)
# url
url = output.createElement('url')
url.appendChild(output.createTextNode(self.url))
feed.appendChild(url)
# source
if self.source:
source = output.createElement('source')
source.appendChild(output.createTextNode(self.source))
feed.appendChild(source)
return feed
# Base canteen with meal data
# ---------------------------
class BaseBuilder(object):
""" This class represents and stores all information
about OpenMensa canteens. It helps writing new
python parsers with helper and shortcuts methods.
So the complete object can be converted to a valid
OpenMensa v2 xml feed string. """
allowed_price_roles = ['pupil', 'student', 'employee', 'other']
def __init__(self, version=None):
self._days = {}
self._version = version
self._name = None
self._address = None
self._city = None
self._phone = None
self._email = None
self._location = None
self._availability = None
self.feeds = []
@property
def version(self):
return self._version
@version.setter
def version(self, version):
self._version = version
@property
def name(self):
return self._name
@name.setter
def name(self, name):
self._name = name
@property
def address(self):
return self._address
@address.setter
def address(self, address):
self._address = address
@property
def city(self):
return self._city
@city.setter
def city(self, city):
self._city = city
@property
def phone(self):
return self._phone
@phone.setter
def phone(self, phone):
self._phone = phone
@property
def email(self):
return self._email
@email.setter
def email(self, email):
self._email = email
def location(self, longitude, latitude):
self._location = (longitude, latitude)
@property
def availability(self):
return self._availability
@availability.setter
def availability(self, availability):
self._availability = availability
def define(self, **kwargs):
self.feeds.append(Feed(**kwargs))
def addMeal(self, date, category, name, notes=None, prices=None):
""" This is the main helper, it adds a meal to the
canteen. The following data are needed:
:param datetime.date date: Date for the meal
:param str category: Name of the meal category
:param str meal: Meal name.
:raises ValueError: if the meal name is empty or longer that 250 characters
:raises ValueError: if the price role is unknown
:raises ValueError: if the category name is empty
:raises ValueError: if note list contains empty note
:raises TypeError: if the price value is not an integer
Additional the following data are also supported:
:param notes: List of notes
:type notes: list
:param prices: Price of the meal; Every key must be a string for
the role of the persons who can use this tariff; The value is
the price in Euro Cents, The site of the OpenMensa project
offers more detailed information.
:type prices: dict"""
# check name:
if not len(name):
raise ValueError('Meal names must not be empty')
if len(name) > 250:
raise ValueError('Meal names must be shorter than 251 characters')
# check category:
if not len(category):
raise ValueError('Category names must not be empty')
# process notes
if notes:
for note in notes:
if not len(note):
raise ValueError('Note must not be empty. Left it out, if not needed')
# process prices:
if prices is None:
prices = {}
else:
for role in prices:
if role not in self.allowed_price_roles:
raise ValueError('Unknown price role "%s"' % role)
if not isinstance(prices[role], int):
raise TypeError('Unsupport price type - expect integer')
# ensure we have an entry for this date
date = self._handleDate(date)
if date not in self._days:
self._days[date] = OrderedDict()
# ensure we have a category element for this category
if category not in self._days[date]:
self._days[date][category] = []
# add meal into category:
self._days[date][category].append((name, notes or [], prices))
def setDayClosed(self, date):
""" Define that the canteen is closed on this date. If a day is closed,
all stored meals for this day will be removed.
:param date: Date of the day
:type date: datetime.date"""
self._days[self._handleDate(date)] = False
def clearDay(self, date):
""" Remove all stored information about this date (meals or closed
information).
:param date: Date of the day
:type date: datetime.date"""
date = self._handleDate(date)
if date in self._days:
del self._days[date]
def dayCount(self):
""" Return the number of dates for which information are stored.
:rtype: int"""
return len(self._days)
def hasMealsFor(self, date):
""" Checks whether for this day are information stored.
:param date: Date of the day
:type date: datetime.date
:rtype: bool"""
date = self._handleDate(date)
if date not in self._days or self._days[date] is False:
return False
return len(self._days[date]) > 0
@staticmethod
def _handleDate(date):
""" Internal method that is used to handle/convert input date. It
raises a :exc:`ValueError` if the type is no
:class:`datetime.date`. This method should be overwritten in
subclasses to support other date input types.
:param date: input to be handled/converted
:rtype: datetime.date"""
if type(date) is not datetime.date:
raise TypeError('Dates needs to be specified by datetime.date')
return date
# methods to create feed
# ----------------------
def toXML(self):
feed, document = self._createDocument()
if self.version is not None:
feed.appendChild(self._buildStringTag('version', self.version, document))
feed.appendChild(self.toTag(document))
return feed
def toXMLFeed(self):
""" Convert this cateen information into string
which is a valid OpenMensa v2 xml feed
:rtype: str"""
feed = self.toXML()
xml_header = '<?xml version="1.0" encoding="UTF-8"?>\n'
return xml_header + feed.toprettyxml(indent=' ')
@staticmethod
def _createDocument():
# create xml document
output = Document()
# build main openmensa element with correct xml namespaces
openmensa = output.createElement('openmensa')
openmensa.setAttribute('version', '2.1')
openmensa.setAttribute('xmlns', 'http://openmensa.org/open-mensa-v2')
openmensa.setAttribute('xmlns:xsi',
'http://www.w3.org/2001/XMLSchema-instance')
openmensa.setAttribute('xsi:schemaLocation',
'http://openmensa.org/open-mensa-v2 ' +
'http://openmensa.org/open-mensa-v2.xsd')
return openmensa, output
def toTag(self, output):
''' This methods adds all data of this canteen as canteen xml tag
to the given xml Document.
:meth:`toXMLFeed` uses this method to create the XML Feed. So there is
normally no need to call it directly.
:param output: XML Document to which the data should be added
:type output: xml.dom.DOMImplementation.createDocument
'''
# create canteen tag, which represents our data
canteen = output.createElement('canteen')
if self._name is not None:
canteen.appendChild(self._buildStringTag('name', self._name, output))
if self._address is not None:
canteen.appendChild(self._buildStringTag('address', self._address, output))
if self._city is not None:
canteen.appendChild(self._buildStringTag('city', self._city, output))
if self._phone is not None:
canteen.appendChild(self._buildStringTag('phone', self._phone, output))
if self._email is not None:
canteen.appendChild(self._buildStringTag('email', self._email, output))
if self._location is not None:
canteen.appendChild(self._buildLocationTag(self._location, output))
if self._availability is not None:
canteen.appendChild(self._buildStringTag('availability', self._availability, output))
# iterate above all feeds:
for feed in sorted(self.feeds, key=lambda v: v.priority):
canteen.appendChild(feed.toTag(output))
# iterate above all days (sorted):
for date in sorted(self._days.keys()):
day = output.createElement('day')
day.setAttribute('date', str(date))
if self._days[date] is False: # canteen closed
closed = output.createElement('closed')
day.appendChild(closed)
canteen.appendChild(day)
continue
# canteen is open
for categoryname in self._days[date]:
day.appendChild(self._buildCategoryTag(
categoryname, self._days[date][categoryname], output))
canteen.appendChild(day)
return canteen
@classmethod
def _buildStringTag(cls, tag_name, value, output):
tag = output.createElement(tag_name)
tag.appendChild(output.createTextNode(value))
return tag
@classmethod
def _buildLocationTag(cls, location, output):
tag = output.createElement('location')
tag.setAttribute('longitude', location[0])
tag.setAttribute('latitude', location[1])
return tag
@classmethod
def _buildCategoryTag(cls, name, data, output):
# skip empty categories:
if len(data) < 1:
return None
category = output.createElement('category')
category.setAttribute('name', name)
for meal in data:
category.appendChild(cls._buildMealTag(meal, output))
return category
@staticmethod
def _buildMealTag(mealData, output):
name, notes, prices = mealData
meal = output.createElement('meal')
# add name
nametag = output.createElement('name')
nametag.appendChild(output.createTextNode(name))
meal.appendChild(nametag)
# add notes:
for note in sorted(notes):
notetag = output.createElement('note')
notetag.appendChild(output.createTextNode(note))
meal.appendChild(notetag)
# add prices:
for role in sorted(prices):
price = output.createElement('price')
price.setAttribute('role', role)
price.appendChild(output.createTextNode("{euros}.{cents:0>2}"
.format(euros=prices[role] // 100,
cents=prices[role] % 100)))
meal.appendChild(price)
return meal
# Lazy version with auto extraction and convertion functions
# ----------------------------------------------------------
class LazyBuilder(BaseBuilder):
""" An extended builder class which uses a set of helper and
auto-converting functions to reduce common converting tasks
:ivar extra_regex: None: regex to be passed to extractNotes, Use `None`
to use default regex provided by this module.
"""
def __init__(self, *args, **kwargs):
super(LazyBuilder, self).__init__(*args, **kwargs)
self.legendData = None
#: function passed as key parameter to :py:func:`.buildLegend` and
#: :py:func:`.extractNotes`; use `lambda v: v.lower()` for
#: case-insensitive legend names (instance member)
self.legendKeyFunc = lambda v: v
self.extra_regex = None
self.additionalCharges = (None, {})
def setLegendData(self, *args, **kwargs):
""" Set or genernate the legend data from this canteen.
Uses :py:func:`.buildLegend` for genernating """
self.legendData = buildLegend(*args, key=self.legendKeyFunc, **kwargs)
def setAdditionalCharges(self, default, additional):
""" This is a helper function, which fast up the calculation
of prices. It is useable if the canteen has fixed
additional charges for special roles.
:param str default: specifies for which price role the price of
addMeal are.
:param dict additional: Defines the extra costs (value) for other
roles (key)."""
self.additionalCharges = (default, buildPrices(additional))
def addMeal(self, date, category, name, notes=None, prices=None,
roles=None):
""" Same as :py:meth:`.BaseBuilder.addMeal` but uses
helper functions to convert input parameters into needed types.
Meals names are auto-shortend to the allowed 250 characters.
The following paramer is new:
:param roles: Is passed as role parameter to :func:`buildPrices`
"""
if self.legendData: # do legend extraction
name, notes = extractNotes(name, notes or [],
legend=self.legendData,
key=self.legendKeyFunc,
regex=self.extra_regex)
prices = buildPrices(prices or {}, roles,
default=self.additionalCharges[0],
additional=self.additionalCharges[1])
if len(name) > 250:
name = name[:247] + '...'
super(LazyBuilder, self).addMeal(extractDate(date), category, name,
notes or [], prices)
@staticmethod
def _handleDate(date):
return extractDate(date)
OpenMensaCanteen = LazyBuilder