100 lines
2.1 KiB
Python
100 lines
2.1 KiB
Python
"""
|
|
Create data in city;yyyy-mm-dd HH:mm:ss.SSS;temp format
|
|
|
|
Avg line length: ~36 bytes
|
|
Expected output file size: >3 GB -> 3 000 000 000 bytes
|
|
|
|
Needed lines count = ~83.(3)m
|
|
|
|
66 district cities * 75 years with measurement twice per hour = 66*75*365*24*2 = 86 724 000 -> ~3.1 GB
|
|
"""
|
|
|
|
import datetime
|
|
import numpy as np
|
|
|
|
cities = [
|
|
"Biała Podlaska",
|
|
"Białystok",
|
|
"Bielsko-Biała",
|
|
"Bydgoszcz",
|
|
"Bytom",
|
|
"Chełm",
|
|
"Chorzów",
|
|
"Częstochowa",
|
|
"Dąbrowa Górnicza",
|
|
"Elbląg",
|
|
"Gdańsk",
|
|
"Gdynia",
|
|
"Gliwice",
|
|
"Gorzów Wielkopolski",
|
|
"Grudziądz",
|
|
"Jastrzębie-Zdrój",
|
|
"Jaworzno",
|
|
"Jelenia Góra",
|
|
"Kalisz",
|
|
"Katowice",
|
|
"Kielce",
|
|
"Konin",
|
|
"Koszalin",
|
|
"Kraków",
|
|
"Krosno",
|
|
"Legnica",
|
|
"Leszno",
|
|
"Lublin",
|
|
"Łomża",
|
|
"Łódź",
|
|
"Mysłowice",
|
|
"Nowy Sącz",
|
|
"Olsztyn",
|
|
"Opole",
|
|
"Ostrołęka",
|
|
"Piekary Śląskie",
|
|
"Piotrków Trybunalski",
|
|
"Płock",
|
|
"Poznań",
|
|
"Przemyśl",
|
|
"Radom",
|
|
"Ruda Śląska",
|
|
"Rybnik",
|
|
"Rzeszów",
|
|
"Siedlce",
|
|
"Siemianowice Śląskie",
|
|
"Skierniewice",
|
|
"Słupsk",
|
|
"Sopot",
|
|
"Sosnowiec",
|
|
"Suwałki",
|
|
"Szczecin",
|
|
"Świętochłowice",
|
|
"Świnoujście",
|
|
"Tarnobrzeg",
|
|
"Tarnów",
|
|
"Toruń",
|
|
"Tychy",
|
|
"Wałbrzych",
|
|
"Włocławek",
|
|
"Wrocław",
|
|
"Zabrze",
|
|
"Zamość",
|
|
"Zielona Góra",
|
|
"Żory"
|
|
]
|
|
|
|
begin_date = datetime.datetime(year=1949, month=1, day=1, hour=0, minute=0, second=0)
|
|
end_date = begin_date + datetime.timedelta(days=365 * 75)
|
|
|
|
|
|
generator = np.random.default_rng(790492283396)
|
|
batch = iter(generator.integers(low=-1500, high=3500, size=66*75*365*24*2))
|
|
|
|
start = datetime.datetime.now()
|
|
with open('../data/temperatures.csv', 'w', encoding='utf-8') as target:
|
|
for city in cities:
|
|
print(city)
|
|
now = begin_date
|
|
while now < end_date:
|
|
target.write("{};{}.000;{}\n".format(city, now, int(next(batch)) / 100.0))
|
|
now += datetime.timedelta(minutes=30)
|
|
|
|
end = datetime.datetime.now()
|
|
print("Completed in {}".format(end - start))
|