• R/O
  • SSH

Commit

Tags
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

Revisão0fa41a222be5999485ee73d7b3a1d59cf0a18269 (tree)
Hora2019-08-08 20:20:52
AutorDan Villiom Podlaski Christiansen <dan@mage...>
CommiterDan Villiom Podlaski Christiansen

Mensagem de Log

kill old food

Mudança Sumário

  • delete: diner-deluxe.py

Diff

diff -r 338edd509b14 -r 0fa41a222be5 diner-deluxe.py
--- a/diner-deluxe.py Tue Aug 06 11:33:22 2019 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,177 +0,0 @@
1-#!/usr/bin/env python3.7
2-#
3-# The employees of Magenta ApS wrote this file.
4-#
5-# As long as you retain this notice you can do whatever you want with
6-# this stuff. If we meet some day, and you think this stuff is worth
7-# it, you can buy us a beer in return.
8-#
9-# Dan Villiom Podlaski Christiansen (channeling Poul-Henning Kamp)
10-#
11-
12-import datetime
13-import json
14-import locale
15-import pathlib
16-import sys
17-
18-import click
19-import consolemd
20-import dateparser
21-import pdf2image
22-import pyquery
23-import pytesseract
24-import regex
25-import requests
26-
27-DEFAULT_URL = 'http://www.firma-catering.dk/frokostordning/ugens-menu'
28-ICON_URL = (
29- 'https://git.magenta.dk/uploads/-/system/project/avatar/36/clipart.png'
30-)
31-
32-USER_AGENT = 'Mozilla/5.0 FoodBot/1337 (+https://git.magenta.dk/dan/food)'
33-
34-
35-def get_pdf_url(weeknum, url):
36- r = requests.get(url, headers={'User-Agent': USER_AGENT})
37- r.raise_for_status()
38-
39- doc = pyquery.PyQuery(r.content)
40- doc.make_links_absolute(url)
41-
42- for el in doc.find('a.ugemenu'):
43- text = ''.join(el.itertext())
44-
45- if text.lower() == f'uge {weeknum}':
46- return el.get('href')
47-
48- return None
49-
50-
51-def get_menu(dt, url):
52- locale.setlocale(locale.LC_ALL, 'da_DK.UTF-8')
53-
54- weeknum = str(int(dt.strftime('%V')))
55- weekday = dt.strftime('%A')
56- month = dt.strftime('%b').rstrip('.')
57-
58- topdir = pathlib.Path(__file__).parent
59- cachedir = topdir / 'cache'
60- pdfcache = cachedir / f'week_{weeknum}.pdf'
61- txtcache = cachedir / f'week_{weeknum}.txt'
62-
63- spellbugs = json.loads(topdir.joinpath('spellbugs.json').read_text())
64-
65- pretty_date = (
66- dt.strftime('%c').replace(dt.strftime('%X '), '').replace(' ', ' ')
67- )
68-
69- if not pdfcache.exists():
70- pdf_url = get_pdf_url(weeknum, url)
71-
72- if not pdf_url:
73- return
74-
75- with requests.get(
76- pdf_url, stream=True, headers={'User-Agent': USER_AGENT}
77- ) as r:
78- r.raise_for_status()
79-
80- pdfcache.parent.mkdir(parents=True, exist_ok=True)
81- pdfcache.write_bytes(r.content)
82-
83- if not txtcache.exists():
84- images = pdf2image.convert_from_path(pdfcache, dpi=450)
85-
86- txtcache.write_text(
87- '\n'.join(
88- pytesseract.image_to_string(
89- image, lang='dan', config='--psm 6'
90- )
91- for image in images
92- )
93- )
94-
95- first = True
96-
97- for line in txtcache.read_text().splitlines():
98- if regex.match(f'^{weekday}', line, flags=regex.IGNORECASE):
99- if first:
100- yield f'[Frokostmenuen]({url}) for {pretty_date} er:'
101- yield ''
102-
103- first = False
104-
105- # gah
106- for needle, better_needle in spellbugs.items():
107- line = line.replace(needle, better_needle)
108-
109- line = regex.sub(r':(?=[^ ])', ': ', line)
110- line = regex.sub(
111- f'^{weekday} ', '', line, count=1, flags=regex.IGNORECASE
112- )
113- line = regex.sub(
114- r'^[\d/]+ ?', '', line, count=1, flags=regex.IGNORECASE
115- )
116- line = regex.sub(
117- f'^{month} ', '', line, count=1, flags=regex.IGNORECASE
118- )
119- line = regex.sub(f'^— ', '', line, count=1)
120-
121- line = regex.sub(
122- r'^(\p{General_Category=Uppercase_Letter}\w+) '
123- r'([\d\p{General_Category=Uppercase_Letter}])',
124- r'\1: \2',
125- line,
126- count=1,
127- )
128-
129- line = regex.sub(r'^([^:]+:)', r'*\1*', line, count=1)
130-
131- yield line
132-
133-
134-@click.command()
135-@click.pass_context
136-@click.argument('url', default=DEFAULT_URL)
137-@click.option('-c', '--channel', default='test')
138-@click.option('-u', '--json-url')
139-@click.option('-d', '--date', metavar='DATE')
140-def main(ctxt, date, url, json_url, channel):
141- if date is not None:
142- dt = dateparser.parse(
143- date, settings={'DATE_ORDER': 'DMY', 'PREFER_DATES_FROM': 'future'}
144- )
145- else:
146- dt = datetime.datetime.now()
147-
148- if dt is None:
149- click.echo(f'error: not a valid date: {date}', err=True)
150- raise click.exceptions.Exit(1)
151-
152- menu = '\n'.join(get_menu(dt, url))
153-
154- if not menu:
155- click.echo(f'error: no menu for {dt.date()}!', err=True)
156- raise click.exceptions.Exit(1)
157- elif not json_url:
158- if sys.stdout.isatty():
159- menu = consolemd.Renderer().render(menu)
160-
161- click.echo(menu)
162- else:
163- r = requests.post(
164- json_url,
165- json={
166- 'text': menu,
167- 'channel': f'#{channel}',
168- 'from': 'Madbåtten',
169- 'icon_url': ICON_URL,
170- },
171- )
172-
173- r.raise_for_status()
174-
175-
176-if __name__ == '__main__':
177- main()