48 lines
1.4 KiB
Python
48 lines
1.4 KiB
Python
import asyncio
|
|
import pickle
|
|
from httpx import AsyncClient
|
|
from typing import Tuple
|
|
from bs4 import BeautifulSoup
|
|
|
|
MAIN_URL = (
|
|
"https://www.spiekeroog-vermieter.de/suche/monatskalenderSite.htm?wohnids="
|
|
)
|
|
DATA_URL = (
|
|
"https://www.spiekeroog-vermieter.de/suche/monatskalender.htm?wohnids="
|
|
)
|
|
FROM = 0
|
|
TO = 2000
|
|
|
|
|
|
async def request_async(index: int, client: AsyncClient) -> Tuple:
|
|
response_data = await client.get(DATA_URL + str(index), timeout=20.0)
|
|
if "Fehler aufgetreten" not in response_data.text:
|
|
response_title = await client.get(MAIN_URL + str(index), timeout=20.0)
|
|
soup = BeautifulSoup(response_title.text, "lxml")
|
|
# h1 contains the house name, h2 the apartment
|
|
apartment = soup.body.header.h1.get_text()
|
|
unit = soup.body.header.h2.get_text().replace(u'\xa0',u' ')
|
|
name = f"{apartment} - {unit}"
|
|
return index, name
|
|
else:
|
|
return 0, ""
|
|
|
|
|
|
async def get_valid_ids():
|
|
client = AsyncClient()
|
|
results = dict(
|
|
await asyncio.gather(
|
|
*[request_async(i, client) for i in range(FROM, TO)]
|
|
)
|
|
)
|
|
valid = dict(filter(lambda item: item[0] != 0, results.items()))
|
|
print(f"Valid ids: {valid}")
|
|
await client.aclose()
|
|
with open("valid_ids", "wb") as file:
|
|
pickle.dump(valid, file)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# print(pickle.load(open("valid_ids", "rb")))
|
|
asyncio.run(get_valid_ids())
|