diff --git a/util/test-generator.py b/util/test-generator.py new file mode 100644 index 0000000..517245c --- /dev/null +++ b/util/test-generator.py @@ -0,0 +1,100 @@ +""" +Create data in city;yyyy-mm-dd HH:mm:ss.SSS;temp format + +Avg line length: ~36 bytes +Expected output file size: >3 GB -> 3 000 000 000 bytes + +Needed lines count = ~83.(3)m + +66 district cities * 75 years with measurement twice per hour = 66*75*365*24*2 = 86 724 000 -> ~3.1 GB +""" + +import datetime +import numpy as np + +cities = [ + "Biała Podlaska", + "Białystok", + "Bielsko-Biała", + "Bydgoszcz", + "Bytom", + "Chełm", + "Chorzów", + "Częstochowa", + "Dąbrowa Górnicza", + "Elbląg", + "Gdańsk", + "Gdynia", + "Gliwice", + "Gorzów Wielkopolski", + "Grudziądz", + "Jastrzębie-Zdrój", + "Jaworzno", + "Jelenia Góra", + "Kalisz", + "Katowice", + "Kielce", + "Konin", + "Koszalin", + "Kraków", + "Krosno", + "Legnica", + "Leszno", + "Lublin", + "Łomża", + "Łódź", + "Mysłowice", + "Nowy Sącz", + "Olsztyn", + "Opole", + "Ostrołęka", + "Piekary Śląskie", + "Piotrków Trybunalski", + "Płock", + "Poznań", + "Przemyśl", + "Radom", + "Ruda Śląska", + "Rybnik", + "Rzeszów", + "Siedlce", + "Siemianowice Śląskie", + "Skierniewice", + "Słupsk", + "Sopot", + "Sosnowiec", + "Suwałki", + "Szczecin", + "Świętochłowice", + "Świnoujście", + "Tarnobrzeg", + "Tarnów", + "Toruń", + "Tychy", + "Wałbrzych", + "Włocławek", + "Wrocław", + "Zabrze", + "Zamość", + "Zielona Góra", + "Żory" +] + +begin_date = datetime.datetime(year=1949, month=1, day=1, hour=0, minute=0, second=0) +end_date = begin_date + datetime.timedelta(days=365 * 75) + + +generator = np.random.default_rng(790492283396) +batch = iter(generator.integers(low=-1500, high=3500, size=66*75*365*24*2)) + +start = datetime.datetime.now() +with open('../data/temperatures.csv', 'w', encoding='utf-8') as target: + for city in cities: + print(city) + now = begin_date + while now < end_date: + target.write("{};{}.000;{}\n".format(city, now, int(next(batch)) / 100.0)) + now += datetime.timedelta(minutes=30) + +end = datetime.datetime.now() +print("Completed in {}".format(end - start))