import asyncio import pickle from httpx import AsyncClient from typing import Tuple from bs4 import BeautifulSoup MAIN_URL = ( "https://www.spiekeroog-vermieter.de/suche/monatskalenderSite.htm?wohnids=" ) DATA_URL = ( "https://www.spiekeroog-vermieter.de/suche/monatskalender.htm?wohnids=" ) FROM = 0 TO = 2000 async def request_async(index: int, client: AsyncClient) -> Tuple: response_data = await client.get(DATA_URL + str(index), timeout=20.0) if "Fehler aufgetreten" not in response_data.text: response_title = await client.get(MAIN_URL + str(index), timeout=20.0) soup = BeautifulSoup(response_title.text, "lxml") # h1 contains the house name, h2 the apartment apartment = soup.body.header.h1.get_text() unit = soup.body.header.h2.get_text().replace(u'\xa0',u' ') name = f"{apartment} - {unit}" return index, name else: return 0, "" async def get_valid_ids(): client = AsyncClient() results = dict( await asyncio.gather( *[request_async(i, client) for i in range(FROM, TO)] ) ) valid = dict(filter(lambda item: item[0] != 0, results.items())) print(f"Valid ids: {valid}") await client.aclose() with open("valid_ids", "wb") as file: pickle.dump(valid, file) if __name__ == "__main__": # print(pickle.load(open("valid_ids", "rb"))) asyncio.run(get_valid_ids())