From d7b07d02b531496fcbdd6c605aba02cab124d82c Mon Sep 17 00:00:00 2001 From: Johannes Rothe Date: Mon, 31 Oct 2022 19:07:50 +0100 Subject: [PATCH] React to new error message --- README.md | 10 +++++++--- pyproject.toml | 3 +-- scraperoog/scrape.py | 13 +++++++------ 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 7c142ba..ff02164 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,10 @@ `apt install python3-pydantic python3-httpx python3-bs4 python3-tqdm locales-all && sudo locale-gen` # Windows build -1. install wine -2. create venv and `pip install .` -3. `wine pyinstaller --paths=venv/lib/python3.10/site-packages/ --collect-submodules=lxml --onefile scraperoog/scrape.py` +1. Install wine +2. `winecfg` and configure windows 10 +3. Install [python](https://www.python.org/downloads/windows/) +4. Install pyinstaller in wine `wine pip install pyinstaller` +5. Create venv and `pip install .` +6. `wine pyinstaller --paths=venv/lib/python3.10/site-packages/ --onefile scraperoog/scrape.py` +7. .exe is found under dist/ diff --git a/pyproject.toml b/pyproject.toml index b62654b..5ec747d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,9 @@ [project] name = "scraperoog" -version = "0.0.2" +version = "0.0.3" dependencies = [ "tqdm", "bs4", - "lxml", "httpx", "pydantic", ] diff --git a/scraperoog/scrape.py b/scraperoog/scrape.py index 46fe185..a9989f9 100644 --- a/scraperoog/scrape.py +++ b/scraperoog/scrape.py @@ -1,10 +1,11 @@ import asyncio import csv -import json import locale -locale.setlocale(locale.LC_TIME, "German") # dates on that page are German -import pickle import platform +if platform.system() == "Windows": + locale.setlocale(locale.LC_TIME, "German") +else: + locale.setlocale(locale.LC_TIME, "de_DE.utf_8") from configparser import ConfigParser from datetime import datetime from typing import List, Optional, Tuple @@ -81,16 +82,16 @@ def convert_to_datestring(day: str, month: str, year: str) -> datetime: async def request_data(index: int, client: AsyncClient) -> Optional[Entry]: response_data = await client.get(DATA_URL + str(index), timeout=20.0) - if "Die Darstellung ist derzeit deaktiviert" not in response_data.text: + if "Dieser Belegungskalender ist derzeit nicht aktiv." not in response_data.text: response_title = await client.get(MAIN_URL + str(index), timeout=20.0) - title_soup = BeautifulSoup(response_title.text, "lxml") + title_soup = BeautifulSoup(response_title.text, "html.parser") apartment = ( title_soup.body.header.h2.get_text() .replace("\xa0", " ") .replace("Wohneinheit: ", "") ) - data_soup = BeautifulSoup(response_data.text, "lxml") + data_soup = BeautifulSoup(response_data.text, "html.parser") valid_element = data_soup.find_all("td", attrs={"data-daynum": True}) availabilities = [] for elm in valid_element: