#! /usr/bin/env python3 import requests import os import re import caldav import sys import hashlib from time import sleep from signal import signal, SIGTERM, SIGINT from icalendar import Event, Calendar as iCalCalendar from caldav.objects import Calendar, SynchronizableCalendarObjectCollection import datetime from typing import Optional, Tuple, Iterator from datetime import datetime from bs4 import BeautifulSoup as bs from dataclasses import dataclass class FetchException(Exception): pass class ParseError(Exception): pass def _fetch_page(url: str) -> str: response = requests.get(url) if response.status_code != 200: raise FetchException( f"error fetching url: {url} status_code: {response.status_code}" ) return response.text @dataclass class HSMREvent: date: datetime what: str link: str def uid(self): sha1 = hashlib.sha1(f'{self.date.isoformat()}-{self.what}'.encode()) return f'{sha1.hexdigest()}@hsmr.cc' def save(self, calendar: iCalCalendar): uid = self.uid() event = Event() event.add('uid', uid) event.add('dtstamp', datetime.now()) event.add('dtstart', self.date) event.add('summary', self.what) event.add('url', self.link) calendar.add_component(event) @staticmethod def from_caldav(data: str) -> 'HSMREvent': try: event = Event.from_ical(data) except ValueError: raise ParseError(f'ERROR: could not parse ical event, raw data: {data}') date = event.get('dtstart').dt what = str(event.get('summary')) url = str(event.get('url')) return HSMREvent(date, what, link) # groups year, month, day, hour, minute isodate_rgx = r"^(\d{4})-(\d{2})-(\d{2}),?\s*(\d{2}):(\d{2})\s*[uU]hr:\s*(.*)$" def _find_events(raw_page: str) -> Iterator[HSMREvent]: lines = raw_page.split('\n') for line in lines: match = re.match(isodate_rgx, line) if match: soup = bs(line, features='lxml') text = soup.text timestr, what = re.split(r"[uU]hr:", text) what = what.strip().replace('\n', '') date = datetime.strptime(timestr.rstrip(), '%Y-%m-%d, %H:%M') link = soup.find(class_='wikilink', href=True) if link is not None: link = link['href'] else: print(f"WARNING: could not find link: {line}, skipping") continue yield HSMREvent(date, what, link) def _fetch_calendar(): caldav_url = os.environ.get('CALDAV_URL') username = os.environ.get('CALDAV_USERNAME') password = os.environ.get('CALDAV_PASSWORD') calendar_name = os.environ.get('CALDAV_CALENDAR_NAME', 'hsmr events') if any(map(lambda e: e is None, (caldav_url, username, password))): print( 'ERROR: please set CALDAV_URL, CALDAV_USERNAME and CALDAV_PASSWORD env vars' ) sys.exit(1) print('fetch old calendar data') client = caldav.DAVClient(url=caldav_url, username=username, password=password) principal = client.principal() calendar = principal.calendar(calendar_name) return calendar def _delete_all_from_cal(sync_obj: SynchronizableCalendarObjectCollection) -> int: print('nuking old calendar') deleted = 0 for event in sync_obj.calendar.events(): event.delete() deleted += 1 return deleted def _insert_all_into_cal( events: Iterator[HSMREvent], sync_obj: SynchronizableCalendarObjectCollection ) -> int: print('inserting new events') found = 0 for event in events: caldata = iCalCalendar() caldata.add('prodid', '-//hsmr.cc//hsmr-events2caldav//en_DK') caldata.add('version', '2.0') event.save(caldata) sync_obj.calendar.add_event(caldata) found += 1 return found def update(url: str): raw_page = _fetch_page(url) events = _find_events(raw_page) calendar = _fetch_calendar() sync_obj = calendar.objects(load_objects=True) deleted = _delete_all_from_cal(sync_obj) print(f"nuked {deleted} events") found = _insert_all_into_cal(events, sync_obj) print(f"found {found} events") def main(): url = os.environ.get('URL', "https://hsmr.cc/Category/Events") sleep_time = int(os.environ.get('SLEEP_TIME',60)) def handler(*args, **kwargs): sys.exit(0) signal(SIGINT, handler) signal(SIGTERM, handler) while True: print('update') update(url) print(f'sleeping for {sleep_time} seconds') sleep(sleep_time) if __name__ == '__main__': main()