Compare commits

...

2 Commits

Author SHA1 Message Date
8c6b672d90 Adapt to new error message thrown by the site 2022-08-01 22:17:31 +02:00
e6be0ab494 Setting up setuptools 2022-08-01 22:16:49 +02:00
4 changed files with 27 additions and 4 deletions

View File

@ -1,3 +1,8 @@
# Requirements
`apt install python3-pydantic python3-httpx python3-bs4 python3-tqdm locales-all && sudo locale-gen`
# Windows build
1. install wine
2. create venv and `pip install .`
3. `wine pyinstaller --paths=venv/lib/python3.10/site-packages/ --collect-submodules=lxml --onefile scraperoog/scrape.py`

16
pyproject.toml Normal file
View File

@ -0,0 +1,16 @@
[project]
name = "scraperoog"
version = "0.0.2"
dependencies = [
"tqdm",
"bs4",
"lxml",
"httpx",
"pydantic",
]
[tool.setuptools.packages]
find = {}
[project.scripts]
scraperoog = "scraperoog.scrape:main"

0
scraperoog/__init__.py Normal file
View File

View File

@ -81,7 +81,7 @@ def convert_to_datestring(day: str, month: str, year: str) -> datetime:
async def request_data(index: int, client: AsyncClient) -> Optional[Entry]:
response_data = await client.get(DATA_URL + str(index), timeout=20.0)
if "Fehler aufgetreten" not in response_data.text:
if "Die Darstellung ist derzeit deaktiviert" not in response_data.text:
response_title = await client.get(MAIN_URL + str(index), timeout=20.0)
title_soup = BeautifulSoup(response_title.text, "lxml")
apartment = (
@ -127,10 +127,12 @@ async def extract_results() -> None:
file.write(result.json())
generate_csv(result)
def main() -> None:
if platform.system() == "Windows":
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
asyncio.run(extract_results())
if __name__ == "__main__":
# with open("results.json", "r") as file:
# result = Result(**json.load(file))
if platform.system() == "Windows":
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
asyncio.run(extract_results())
main()