React to new error message

This commit is contained in:
Johannes Rothe 2022-10-31 19:07:50 +01:00
parent 8c6b672d90
commit d7b07d02b5
3 changed files with 15 additions and 11 deletions

View File

@ -3,6 +3,10 @@
`apt install python3-pydantic python3-httpx python3-bs4 python3-tqdm locales-all && sudo locale-gen` `apt install python3-pydantic python3-httpx python3-bs4 python3-tqdm locales-all && sudo locale-gen`
# Windows build # Windows build
1. install wine 1. Install wine
2. create venv and `pip install .` 2. `winecfg` and configure windows 10
3. `wine pyinstaller --paths=venv/lib/python3.10/site-packages/ --collect-submodules=lxml --onefile scraperoog/scrape.py` 3. Install [python](https://www.python.org/downloads/windows/)
4. Install pyinstaller in wine `wine pip install pyinstaller`
5. Create venv and `pip install .`
6. `wine pyinstaller --paths=venv/lib/python3.10/site-packages/ --onefile scraperoog/scrape.py`
7. .exe is found under dist/

View File

@ -1,10 +1,9 @@
[project] [project]
name = "scraperoog" name = "scraperoog"
version = "0.0.2" version = "0.0.3"
dependencies = [ dependencies = [
"tqdm", "tqdm",
"bs4", "bs4",
"lxml",
"httpx", "httpx",
"pydantic", "pydantic",
] ]

View File

@ -1,10 +1,11 @@
import asyncio import asyncio
import csv import csv
import json
import locale import locale
locale.setlocale(locale.LC_TIME, "German") # dates on that page are German
import pickle
import platform import platform
if platform.system() == "Windows":
locale.setlocale(locale.LC_TIME, "German")
else:
locale.setlocale(locale.LC_TIME, "de_DE.utf_8")
from configparser import ConfigParser from configparser import ConfigParser
from datetime import datetime from datetime import datetime
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
@ -81,16 +82,16 @@ def convert_to_datestring(day: str, month: str, year: str) -> datetime:
async def request_data(index: int, client: AsyncClient) -> Optional[Entry]: async def request_data(index: int, client: AsyncClient) -> Optional[Entry]:
response_data = await client.get(DATA_URL + str(index), timeout=20.0) response_data = await client.get(DATA_URL + str(index), timeout=20.0)
if "Die Darstellung ist derzeit deaktiviert" not in response_data.text: if "Dieser Belegungskalender ist derzeit nicht aktiv." not in response_data.text:
response_title = await client.get(MAIN_URL + str(index), timeout=20.0) response_title = await client.get(MAIN_URL + str(index), timeout=20.0)
title_soup = BeautifulSoup(response_title.text, "lxml") title_soup = BeautifulSoup(response_title.text, "html.parser")
apartment = ( apartment = (
title_soup.body.header.h2.get_text() title_soup.body.header.h2.get_text()
.replace("\xa0", " ") .replace("\xa0", " ")
.replace("Wohneinheit: ", "") .replace("Wohneinheit: ", "")
) )
data_soup = BeautifulSoup(response_data.text, "lxml") data_soup = BeautifulSoup(response_data.text, "html.parser")
valid_element = data_soup.find_all("td", attrs={"data-daynum": True}) valid_element = data_soup.find_all("td", attrs={"data-daynum": True})
availabilities = [] availabilities = []
for elm in valid_element: for elm in valid_element: