Merge pull request #17 from msaffitz/master
Retry file downloads on exception up to 10 times, skip already downloaded files.
This commit is contained in:
commit
bba95cb5fe
1 changed files with 31 additions and 8 deletions
39
exporter.py
39
exporter.py
|
@ -398,21 +398,44 @@ def parse_replies(threads, users):
|
||||||
return body
|
return body
|
||||||
|
|
||||||
|
|
||||||
def save_files(out_dir):
|
def download_file(destination_path, url, attempt = 0):
|
||||||
|
if os.path.exists(destination_path):
|
||||||
|
print("Skipping existing %s" % destination_path)
|
||||||
|
return True
|
||||||
|
|
||||||
|
print(f"Downloading file on attempt {attempt} to {destination_path}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.get(url, headers=HEADERS)
|
||||||
|
with open(destination_path, "wb") as fh:
|
||||||
|
fh.write(response.content)
|
||||||
|
except Exception as err:
|
||||||
|
print(f"Unexpected error on {destination_path} attempt {attempt}; {err=}, {type(err)=}")
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def save_files(file_dir):
|
||||||
total = 0
|
total = 0
|
||||||
start = default_timer()
|
start = default_timer()
|
||||||
for file_info in get_file_list():
|
for file_info in get_file_list():
|
||||||
url = file_info["url_private"]
|
url = file_info["url_private"]
|
||||||
file_info["name"] = sanitize_filename(file_info["name"])
|
file_info["name"] = sanitize_filename(file_info["name"])
|
||||||
destination_filename = "{id}-{name}".format(**file_info)
|
destination_filename = "{id}-{name}".format(**file_info)
|
||||||
files_dir = os.path.join(out_dir, "files")
|
os.makedirs(file_dir, exist_ok=True)
|
||||||
os.makedirs(files_dir, exist_ok=True)
|
destination_path = os.path.join(file_dir, destination_filename)
|
||||||
destination_path = os.path.join(files_dir, destination_filename)
|
|
||||||
print("Downloading file to %s" % destination_path)
|
download_success = False
|
||||||
response = requests.get(url, headers=HEADERS)
|
attempt = 1
|
||||||
with open(destination_path, "wb") as fh:
|
while not download_success and attempt <= 10:
|
||||||
fh.write(response.content)
|
download_success = download_file(destination_path, url, attempt)
|
||||||
|
attempt += 1
|
||||||
|
|
||||||
|
if not download_success:
|
||||||
|
raise Exception("Failed to download from {url} after {attempt} tries")
|
||||||
|
|
||||||
total += 1
|
total += 1
|
||||||
|
|
||||||
end = default_timer()
|
end = default_timer()
|
||||||
seconds = int(end - start)
|
seconds = int(end - start)
|
||||||
print("Downloaded %i files in %i seconds" % (total, seconds))
|
print("Downloaded %i files in %i seconds" % (total, seconds))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue