chore: Python test data generator
This commit is contained in:
parent
12ca1369f3
commit
625dd63137
1 changed files with 100 additions and 0 deletions
100
util/test-generator.py
Normal file
100
util/test-generator.py
Normal file
|
@ -0,0 +1,100 @@
|
|||
"""
|
||||
Create data in city;yyyy-mm-dd HH:mm:ss.SSS;temp format
|
||||
|
||||
Avg line length: ~36 bytes
|
||||
Expected output file size: >3 GB -> 3 000 000 000 bytes
|
||||
|
||||
Needed lines count = ~83.(3)m
|
||||
|
||||
66 district cities * 75 years with measurement twice per hour = 66*75*365*24*2 = 86 724 000 -> ~3.1 GB
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import numpy as np
|
||||
|
||||
cities = [
|
||||
"Biała Podlaska",
|
||||
"Białystok",
|
||||
"Bielsko-Biała",
|
||||
"Bydgoszcz",
|
||||
"Bytom",
|
||||
"Chełm",
|
||||
"Chorzów",
|
||||
"Częstochowa",
|
||||
"Dąbrowa Górnicza",
|
||||
"Elbląg",
|
||||
"Gdańsk",
|
||||
"Gdynia",
|
||||
"Gliwice",
|
||||
"Gorzów Wielkopolski",
|
||||
"Grudziądz",
|
||||
"Jastrzębie-Zdrój",
|
||||
"Jaworzno",
|
||||
"Jelenia Góra",
|
||||
"Kalisz",
|
||||
"Katowice",
|
||||
"Kielce",
|
||||
"Konin",
|
||||
"Koszalin",
|
||||
"Kraków",
|
||||
"Krosno",
|
||||
"Legnica",
|
||||
"Leszno",
|
||||
"Lublin",
|
||||
"Łomża",
|
||||
"Łódź",
|
||||
"Mysłowice",
|
||||
"Nowy Sącz",
|
||||
"Olsztyn",
|
||||
"Opole",
|
||||
"Ostrołęka",
|
||||
"Piekary Śląskie",
|
||||
"Piotrków Trybunalski",
|
||||
"Płock",
|
||||
"Poznań",
|
||||
"Przemyśl",
|
||||
"Radom",
|
||||
"Ruda Śląska",
|
||||
"Rybnik",
|
||||
"Rzeszów",
|
||||
"Siedlce",
|
||||
"Siemianowice Śląskie",
|
||||
"Skierniewice",
|
||||
"Słupsk",
|
||||
"Sopot",
|
||||
"Sosnowiec",
|
||||
"Suwałki",
|
||||
"Szczecin",
|
||||
"Świętochłowice",
|
||||
"Świnoujście",
|
||||
"Tarnobrzeg",
|
||||
"Tarnów",
|
||||
"Toruń",
|
||||
"Tychy",
|
||||
"Wałbrzych",
|
||||
"Włocławek",
|
||||
"Wrocław",
|
||||
"Zabrze",
|
||||
"Zamość",
|
||||
"Zielona Góra",
|
||||
"Żory"
|
||||
]
|
||||
|
||||
begin_date = datetime.datetime(year=1949, month=1, day=1, hour=0, minute=0, second=0)
|
||||
end_date = begin_date + datetime.timedelta(days=365 * 75)
|
||||
|
||||
|
||||
generator = np.random.default_rng(790492283396)
|
||||
batch = iter(generator.integers(low=-1500, high=3500, size=66*75*365*24*2))
|
||||
|
||||
start = datetime.datetime.now()
|
||||
with open('../data/temperatures.csv', 'w', encoding='utf-8') as target:
|
||||
for city in cities:
|
||||
print(city)
|
||||
now = begin_date
|
||||
while now < end_date:
|
||||
target.write("{};{}.000;{}\n".format(city, now, int(next(batch)) / 100.0))
|
||||
now += datetime.timedelta(minutes=30)
|
||||
|
||||
end = datetime.datetime.now()
|
||||
print("Completed in {}".format(end - start))
|
Loading…
Add table
Add a link
Reference in a new issue