MR 24 with fixes

see: https://github.com/sebseager/slack-exporter/pull/24
This commit is contained in:
Piotr Dec 2025-04-28 14:23:51 +02:00
parent ab73c63d7e
commit 325f1c4ed1
No known key found for this signature in database
GPG key ID: D3B5A5D0150D147A
2 changed files with 25 additions and 16 deletions

3
.gitignore vendored
View file

@ -4,3 +4,6 @@ __pycache__
.idea .idea
.vscode .vscode
.DS_Store .DS_Store
bin/
pyvenv.cfg
out/

View file

@ -1,4 +1,3 @@
#!/usr/bin/env python3
import os import os
import sys import sys
import requests import requests
@ -19,8 +18,6 @@ if os.path.isfile(env_file):
# write handling # write handling
def post_response(response_url, text): def post_response(response_url, text):
requests.post(response_url, json={"text": text}) requests.post(response_url, json={"text": text})
@ -143,11 +140,15 @@ def channel_list(team_id=None, response_url=None):
) )
def get_file_list(): def get_file_list(channel_id=None):
current_page = 1 current_page = 1
total_pages = 1 total_pages = 1
while current_page <= total_pages: while current_page <= total_pages:
response = get_data("https://slack.com/api/files.list", params={"page": current_page}) params = {"page": current_page}
if channel_id:
# Add the channel_id parameter if specified
params["channel"] = channel_id
response = get_data("https://slack.com/api/files.list", params=params)
json_data = response.json() json_data = response.json()
total_pages = json_data["paging"]["pages"] total_pages = json_data["paging"]["pages"]
for file in json_data["files"]: for file in json_data["files"]:
@ -328,7 +329,8 @@ def parse_channel_history(msgs, users, check_thread=False):
if "messages" in msgs: if "messages" in msgs:
msgs = msgs["messages"] msgs = msgs["messages"]
messages = [x for x in msgs if x["type"] == "message"] # files are also messages messages = [x for x in msgs if x["type"] ==
"message"] # files are also messages
body = "" body = ""
for msg in messages: for msg in messages:
if "user" in msg: if "user" in msg:
@ -398,13 +400,13 @@ def parse_replies(threads, users):
return body return body
def download_file(destination_path, url, attempt = 0): def download_file(destination_path, url, attempt=0):
if os.path.exists(destination_path): if os.path.exists(destination_path):
print("Skipping existing %s" % destination_path) print("Skipping existing %s" % destination_path)
return True return True
print(f"Downloading file on attempt {attempt} to {destination_path}") print(f"Downloading file on attempt {attempt} to {destination_path}")
try: try:
response = requests.get(url, headers=HEADERS) response = requests.get(url, headers=HEADERS)
with open(destination_path, "wb") as fh: with open(destination_path, "wb") as fh:
@ -415,10 +417,10 @@ def download_file(destination_path, url, attempt = 0):
else: else:
return True return True
def save_files(file_dir): def save_files(file_dir, channel_id=None):
total = 0 total = 0
start = default_timer() start = default_timer()
for file_info in get_file_list(): for file_info in get_file_list(channel_id=channel_id):
url = file_info["url_private"] url = file_info["url_private"]
file_info["name"] = sanitize_filename(file_info["name"]) file_info["name"] = sanitize_filename(file_info["name"])
destination_filename = "{id}-{name}".format(**file_info) destination_filename = "{id}-{name}".format(**file_info)
@ -428,8 +430,8 @@ def save_files(file_dir):
download_success = False download_success = False
attempt = 1 attempt = 1
while not download_success and attempt <= 10: while not download_success and attempt <= 10:
download_success = download_file(destination_path, url, attempt) download_success = download_file(destination_path, url, attempt)
attempt += 1 attempt += 1
if not download_success: if not download_success:
raise Exception("Failed to download from {url} after {attempt} tries") raise Exception("Failed to download from {url} after {attempt} tries")
@ -535,9 +537,9 @@ if __name__ == "__main__":
ch_name, ch_type = name_from_ch_id(channel_id, channel_list) ch_name, ch_type = name_from_ch_id(channel_id, channel_list)
header_str = "%s Name: %s" % (ch_type, ch_name) header_str = "%s Name: %s" % (ch_type, ch_name)
data_ch = ( data_ch = (
"Channel ID: %s\n%s\n%s Messages\n%s\n\n" "Channel ID: %s\n%s\n%s Messages\n%s\n\n"
% (channel_id, header_str, len(channel_hist), sep_str) % (channel_id, header_str, len(channel_hist), sep_str)
+ data_ch + data_ch
) )
save(data_ch, "channel_%s" % channel_id) save(data_ch, "channel_%s" % channel_id)
if a.r: if a.r:
@ -568,4 +570,8 @@ if __name__ == "__main__":
save_replies(ch_hist, ch_id, ch_list, user_list) save_replies(ch_hist, ch_id, ch_list, user_list)
if a.files and a.o is not None: if a.files and a.o is not None:
save_files(out_dir) if a.ch:
ch_id = a.ch
save_files(out_dir, channel_id=ch_id)
else:
save_files(out_dir)