From 21ee7c5fde7e87de751dbfc5008e8ae2e41a4238 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabriel=20P=C3=A9rez-Cerezo?= Date: Fri, 22 Sep 2017 16:16:03 +0200 Subject: First working program --- common.py | 8 ++++++++ mathemensa.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ mensa.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+) create mode 100644 common.py create mode 100644 mathemensa.py create mode 100644 mensa.py diff --git a/common.py b/common.py new file mode 100644 index 0000000..3ebea36 --- /dev/null +++ b/common.py @@ -0,0 +1,8 @@ +# Common classes and methods. + +class Food : + def __init__(self,name, price="", category=None, veggie=False) : + self.name = name + self.price = price + self.category = category + self.veggie = veggie diff --git a/mathemensa.py b/mathemensa.py new file mode 100644 index 0000000..b4f97af --- /dev/null +++ b/mathemensa.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- + +import urllib2 +from lxml import etree +from lxml.cssselect import CSSSelector + +import xml.sax.saxutils as saxutils +import html5lib +import datetime +from common import Food + +def get_food_items() : + weekday = datetime.datetime.today().weekday() + if weekday > 4 : + print "Error: No food today" + return "" + user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)' + headers = {'User-Agent': user_agent} + + req = urllib2.Request('http://personalkantine.personalabteilung.tu-berlin.de/', headers=headers) + response = urllib2.urlopen(req) + the_page = response.read() + document = html5lib.parse(the_page, treebuilder="lxml") + sel = CSSSelector('.Menu__accordion') + fl = [] + for k in sel(document)[0][weekday] : + if k.tag.endswith("ul") : + for j in k : + price = j[1].text + name = j[0].text + etree.fromstring("

"+etree.tostring(j).split("\n")[2].split("<")[0]+"

").text.strip() # really extremely dirty hack + veg = 0 + if "(v)" in name or u"Gemüseplatte" in name : + veg = 1 + fl.append(Food(name, price, u"Menü", veg)) + return fl +if __name__ == "__main__": + food = get_food_items() + cat = [] + vegkeys = [ "", "Vegetarian", "Vegan" ] + for i in food: + if not i.category in cat : + cat.append(i.category) + if not i.category == None : + print i.category + print "\t" + i.name.ljust(80) + "\t"+ i.price.ljust(20) + vegkeys[i.veggie] diff --git a/mensa.py b/mensa.py new file mode 100644 index 0000000..1ecef11 --- /dev/null +++ b/mensa.py @@ -0,0 +1,52 @@ +# Copyright (C) 2017 Gabriel Perez-Cerezo + +import urllib2 +from lxml import etree +from lxml.cssselect import CSSSelector +import html5lib +from common import Food + +def get_food_items(mensa="mensa-tu-hardenbergstra%C3%9Fe", ignore_nudelauswahl=False) : + user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)' + headers = {'User-Agent': user_agent} + + req = urllib2.Request('https://www.stw.berlin/mensen/mensa-tu-hardenbergstra%C3%9Fe.html', headers=headers) + response = urllib2.urlopen(req) + the_page = response.read() + document = html5lib.parse(the_page, treebuilder="lxml") + groupsel = CSSSelector('.splGroupWrapper') + groups = [e for e in groupsel(document)] + fl = [] + for i in groups : + name = CSSSelector('.splGroup')(i)[0].text + sel = CSSSelector('.splMeal') + meals = [e for e in sel(i)] + for m in meals : + namesel = CSSSelector('.bold') + nm = namesel(m)[0].text + if ignore_nudelauswahl and "Nudelauswahl" in nm: + continue + pricesel = CSSSelector('.col-md-3') + veg = 0 + if len(pricesel(m)[0]) >= 2 : + if "15" in pricesel(m)[0][1].attrib["src"] : + veg = 2 + elif "1.png" in pricesel(m)[0][1].attrib["src"] : + veg = 1 + price = pricesel(m)[-1].text.strip() + fl.append(Food(nm, price, name, veg)) + return fl + + + +# format: +if __name__ == "__main__": + food = get_food_items("", ignore_nudelauswahl=True) + cat = [] + vegkeys = [ "", "Vegetarian", "Vegan" ] + for i in food: + if not i.category in cat : + cat.append(i.category) + if not i.category == None : + print i.category + print "\t" + i.name.ljust(80) + "\t"+ i.price.ljust(20) + vegkeys[i.veggie] -- cgit v1.2.3