MR 24 with fixes

see: https://github.com/sebseager/slack-exporter/pull/24
This commit is contained in:
Piotr Dec 2025-04-28 14:23:51 +02:00
parent ab73c63d7e
commit 325f1c4ed1
No known key found for this signature in database
GPG key ID: D3B5A5D0150D147A
2 changed files with 25 additions and 16 deletions

3
.gitignore vendored
View file

@ -4,3 +4,6 @@ __pycache__
.idea
.vscode
.DS_Store
bin/
pyvenv.cfg
out/

View file

@ -1,4 +1,3 @@
#!/usr/bin/env python3
import os
import sys
import requests
@ -19,8 +18,6 @@ if os.path.isfile(env_file):
# write handling
def post_response(response_url, text):
requests.post(response_url, json={"text": text})
@ -143,11 +140,15 @@ def channel_list(team_id=None, response_url=None):
)
def get_file_list():
def get_file_list(channel_id=None):
current_page = 1
total_pages = 1
while current_page <= total_pages:
response = get_data("https://slack.com/api/files.list", params={"page": current_page})
params = {"page": current_page}
if channel_id:
# Add the channel_id parameter if specified
params["channel"] = channel_id
response = get_data("https://slack.com/api/files.list", params=params)
json_data = response.json()
total_pages = json_data["paging"]["pages"]
for file in json_data["files"]:
@ -328,7 +329,8 @@ def parse_channel_history(msgs, users, check_thread=False):
if "messages" in msgs:
msgs = msgs["messages"]
messages = [x for x in msgs if x["type"] == "message"] # files are also messages
messages = [x for x in msgs if x["type"] ==
"message"] # files are also messages
body = ""
for msg in messages:
if "user" in msg:
@ -398,13 +400,13 @@ def parse_replies(threads, users):
return body
def download_file(destination_path, url, attempt = 0):
def download_file(destination_path, url, attempt=0):
if os.path.exists(destination_path):
print("Skipping existing %s" % destination_path)
return True
print(f"Downloading file on attempt {attempt} to {destination_path}")
try:
response = requests.get(url, headers=HEADERS)
with open(destination_path, "wb") as fh:
@ -415,10 +417,10 @@ def download_file(destination_path, url, attempt = 0):
else:
return True
def save_files(file_dir):
def save_files(file_dir, channel_id=None):
total = 0
start = default_timer()
for file_info in get_file_list():
for file_info in get_file_list(channel_id=channel_id):
url = file_info["url_private"]
file_info["name"] = sanitize_filename(file_info["name"])
destination_filename = "{id}-{name}".format(**file_info)
@ -428,8 +430,8 @@ def save_files(file_dir):
download_success = False
attempt = 1
while not download_success and attempt <= 10:
download_success = download_file(destination_path, url, attempt)
attempt += 1
download_success = download_file(destination_path, url, attempt)
attempt += 1
if not download_success:
raise Exception("Failed to download from {url} after {attempt} tries")
@ -535,9 +537,9 @@ if __name__ == "__main__":
ch_name, ch_type = name_from_ch_id(channel_id, channel_list)
header_str = "%s Name: %s" % (ch_type, ch_name)
data_ch = (
"Channel ID: %s\n%s\n%s Messages\n%s\n\n"
% (channel_id, header_str, len(channel_hist), sep_str)
+ data_ch
"Channel ID: %s\n%s\n%s Messages\n%s\n\n"
% (channel_id, header_str, len(channel_hist), sep_str)
+ data_ch
)
save(data_ch, "channel_%s" % channel_id)
if a.r:
@ -568,4 +570,8 @@ if __name__ == "__main__":
save_replies(ch_hist, ch_id, ch_list, user_list)
if a.files and a.o is not None:
save_files(out_dir)
if a.ch:
ch_id = a.ch
save_files(out_dir, channel_id=ch_id)
else:
save_files(out_dir)