Add config, make windows compatible

This commit is contained in:
Johannes Rothe 2022-01-16 22:15:19 +01:00
parent 11dd02d3c7
commit 50c344c583
2 changed files with 48 additions and 26 deletions

5
config.ini Normal file
View File

@ -0,0 +1,5 @@
[Allgemein]
haupt_url = https://www.spiekeroog-vermieter.de/suche/monatskalenderSite.htm?wohnids=
data_url = https://www.spiekeroog-vermieter.de/suche/monatskalender.htm?wohnids=
von_id = 200
bis_id = 300

View File

@ -3,21 +3,26 @@ import csv
import json import json
import locale import locale
import pickle import pickle
import platform
from configparser import ConfigParser
from datetime import datetime from datetime import datetime
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
if platform.system() == "Windows":
import encodings.idna
import tqdm
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from httpx import AsyncClient from httpx import AsyncClient
from pydantic import BaseModel from pydantic import BaseModel
MAIN_URL = ( config = ConfigParser()
"https://www.spiekeroog-vermieter.de/suche/monatskalenderSite.htm?wohnids=" config.read("config.ini")
)
DATA_URL = ( MAIN_URL = config['Allgemein'].get('haupt_url')
"https://www.spiekeroog-vermieter.de/suche/monatskalender.htm?wohnids=" DATA_URL = config['Allgemein'].get('data_url')
) FROM = config['Allgemein'].getint('von_id')
FROM = 0 TO = config['Allgemein'].getint('bis_id')
TO = 2000 DATEFORMAT = "%Y-%m-%d"
STATUS_MAPPING = {"DayF": 0, "DayB": 1, "DayFB": 0.5, "DayBF": 0.5} STATUS_MAPPING = {"DayF": 0, "DayB": 1, "DayFB": 0.5, "DayBF": 0.5}
@ -31,6 +36,17 @@ class Entry(BaseModel):
haus: Optional[str] haus: Optional[str]
wohneinheit: Optional[str] wohneinheit: Optional[str]
availabilities: Optional[List[Availability]] availabilities: Optional[List[Availability]]
def __lt__(self, other):
return self.index < other.index
def __le__(self, other):
return self.index <= other.index
def __gt__(self, other):
return self.index > other.index
def __ge__(self, other):
return self.index >= other.index
class Result(BaseModel): class Result(BaseModel):
@ -38,14 +54,11 @@ class Result(BaseModel):
def generate_csv(result: Result) -> None: def generate_csv(result: Result) -> None:
with open("result.csv", "w") as csvfile: with open("result.csv", "w", newline='') as csvfile:
fieldnames = list(Entry.schema()["properties"].keys()) fieldnames = list(Entry.schema()["properties"].keys())
fieldnames.remove("availabilities") fieldnames.remove("availabilities")
fieldnames.extend( fieldnames.extend(
[ [a.date.strftime(DATEFORMAT) for a in result.entries[0].availabilities]
a.date.strftime("%Y-%m-%d")
for a in result.entries[0].availabilities
]
) )
csvwriter = csv.DictWriter(csvfile, fieldnames=fieldnames) csvwriter = csv.DictWriter(csvfile, fieldnames=fieldnames)
csvwriter.writeheader() csvwriter.writeheader()
@ -56,12 +69,12 @@ def generate_csv(result: Result) -> None:
"wohneinheit": entry.wohneinheit, "wohneinheit": entry.wohneinheit,
} }
for avail in entry.availabilities: for avail in entry.availabilities:
row_content[avail.date.strftime("%Y-%m-%d")] = avail.status row_content[avail.date.strftime(DATEFORMAT)] = avail.status
csvwriter.writerow(row_content) csvwriter.writerow(row_content)
def convert_to_datestring(day: str, month: str, year: str) -> datetime: def convert_to_datestring(day: str, month: str, year: str) -> datetime:
locale.setlocale(locale.LC_TIME, "de_DE.utf8") locale.setlocale(locale.LC_TIME, "")
date = datetime.strptime(f"{day.zfill(2)} {month} {year}", "%d %B %Y") date = datetime.strptime(f"{day.zfill(2)} {month} {year}", "%d %B %Y")
return date return date
@ -91,8 +104,8 @@ async def request_data(index: int, client: AsyncClient) -> Optional[Entry]:
availabilities.append(Availability(date=date, status=status)) availabilities.append(Availability(date=date, status=status))
return Entry( return Entry(
index=index, index=index,
haus=title_soup.body.header.h1.get_text(), haus=title_soup.body.header.h1.get_text().encode("utf-8"),
wohneinheit=apartment, wohneinheit=apartment.encode("utf-8"),
availabilities=availabilities, availabilities=availabilities,
) )
else: else:
@ -101,19 +114,23 @@ async def request_data(index: int, client: AsyncClient) -> Optional[Entry]:
async def extract_results() -> None: async def extract_results() -> None:
client = AsyncClient() client = AsyncClient()
entries = await asyncio.gather( tasks = [request_data(i, client) for i in range(FROM, TO)]
*[request_data(i, client) for i in range(FROM, TO)] entries = [
) await f for f in tqdm.tqdm(asyncio.as_completed(tasks), total=len(tasks))
entries = list(filter(lambda entry: entry.index != 0, entries)) ]
result = Result(entries=entries) filtered_entries = list(filter(lambda entry: entry.index != 0, entries))
sorted_entries = list(sorted(filtered_entries))
result = Result(entries=sorted_entries)
await client.aclose() await client.aclose()
with open("results.json", "w") as file: with open("results.json", "w") as file:
file.write(result.json()) file.write(result.json())
generate_csv(result)
if __name__ == "__main__": if __name__ == "__main__":
with open("results.json", "r") as file: # with open("results.json", "r") as file:
result = Result(**json.load(file)) # result = Result(**json.load(file))
generate_csv(result) if platform.system() == "Windows":
#asyncio.run(extract_results()) asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
asyncio.run(extract_results())