From 7355b3e3503ca95e1ee11ebb814c3f2551143a86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabriel=20P=C3=A9rez-Cerezo?= Date: Thu, 1 Mar 2018 17:47:10 +0100 Subject: Bumped version to 0.3, added html formatter --- mensa/backends/studentenwerk.py | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'mensa/backends/studentenwerk.py') diff --git a/mensa/backends/studentenwerk.py b/mensa/backends/studentenwerk.py index a8f6c2b..f916096 100644 --- a/mensa/backends/studentenwerk.py +++ b/mensa/backends/studentenwerk.py @@ -8,33 +8,28 @@ import html5lib from mensa.base import * from yapsy.IPlugin import IPlugin import multiprocessing - +import datetime from yapsy import NormalizePluginNameForModuleName as normalize -mensenliste = {"TU Hardenbergstraße" : "mensa-tu-hardenbergstra%C3%9Fe", "TU Marchstraße": "cafeteria-tu-marchstra%C3%9Fe", "TU Skyline": "cafeteria-tu-skyline", "TU Architektur": "cafeteria-tu-architektur", "TU Ackerstraße": "cafeteria-tu-ackerstra%C3%9Fe"} - - - -def pr_f(j) : - i,k = j - food = get_food_items(k, ignore_nudelauswahl=True) - return (i,"*"*20+i+"*"*20+"\n"+formt(food)) - class Studentenwerk(IPlugin) : - def register_restaurants (self) : - mensenliste = {"TU Hardenbergstraße" : "mensa-tu-hardenbergstra%C3%9Fe", "TU Marchstraße": "cafeteria-tu-marchstra%C3%9Fe", "TU Skyline": "cafeteria-tu-skyline", "TU Architektur": "cafeteria-tu-architektur", "TU Ackerstraße": "cafeteria-tu-ackerstra%C3%9Fe"} - for h,n in mensenliste.items() : - r = Restaurant(normalize(h), h, self, "dummy", [n]) - register_restaurant(r) - def get_food_items(self, mensa="mensa-tu-hardenbergstra%C3%9Fe", ignore_nudelauswahl=False) : + def fetch_page(self, mensa) : user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)' headers = {'User-Agent': user_agent} req = urllib.request.Request('https://www.stw.berlin/mensen/%s.html' % mensa, headers=headers) response = urllib.request.urlopen(req) the_page = response.read() + open("/tmp/the_page","w").write(str(the_page)) document = html5lib.parse(the_page, treebuilder="lxml") + return document + def register_restaurants (self) : + mensenliste = {"TU Hardenbergstraße" : "mensa-tu-hardenbergstra%C3%9Fe", "TU Marchstraße": "cafeteria-tu-marchstra%C3%9Fe", "TU Skyline": "cafeteria-tu-skyline", "TU Architektur": "cafeteria-tu-architektur", "TU Ackerstraße": "cafeteria-tu-ackerstra%C3%9Fe"} + for h,n in mensenliste.items() : + r = Restaurant(normalize(h), h, self, "dummy", [n]) + register_restaurant(r) + def get_food_items(self, mensa="mensa-tu-hardenbergstra%C3%9Fe", ignore_nudelauswahl=False) : + document = self.fetch_page(mensa) groupsel = CSSSelector('.splGroupWrapper') groups = [e for e in groupsel(document)] fl = [] @@ -60,7 +55,13 @@ class Studentenwerk(IPlugin) : price = pricesel(m)[-1].text.strip() fl.append(Food(nm, price, name, veg)) return fl - + def get_opening_hours(self, mensa) : + #### Rudiment of a function for getting opening hours. Does NOT work yet due to unknown issues. + doc = self.fetch_page(mensa) + groupsel = CSSSelector('div.col-xs-10') + groups = [e for e in groupsel(doc)] + print(groups) + return doc # format: -- cgit v1.2.3