diff --git a/.gitignore b/.gitignore index f10862a..5680e72 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -/.env +.env +exports \ No newline at end of file diff --git a/README.md b/README.md index 9db26ee..b3feca8 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,62 @@ # slack-exporter -A Slack app for exporting messages and file attachments from public and private channels. +A Slack bot and standalone script for exporting messages and file attachments from public and private channels, using Slack's new Conversations API. -Note that Slack provides a similar service for workspace admins at [https://my.slack.com/services/export](https://my.slack.com/services/export) (where `my` can be replaced with your full workspace name to access workspace different than your default). However, it can only access public channels, while `slack-exporter` can be added to any channel. +A similar service is provided by Slack for workspace admins at [https://my.slack.com/services/export](https://my.slack.com/services/export) (where `my` can be replaced with your full workspace name to refer to a workspace different than your default). However, it can only access public channels, while `slack-exporter` can retrieve data from any channel accessible to your user account. + +## Authentication with Slack + +There are two ways to use `slack-exporter` (detailed below). Both require a Slack API token to be able to communicate with your workspace. + +1. Visit [https://api.slack.com/apps/](https://api.slack.com/apps/) and sign in to your workspace. +2. Click `Create New App`, enter a name (e.g., `Slack Exporter`), and select your workspace. +3. In the left-hand panel, navigate to `OAuth & Permissions`, and scroll to `User Token Scopes` (**not** `Bot Token Scopes`). +4. Select the following permissions: + - `channels:read`, `channels:history` + - `groups:read`, `groups:history` + - `mpim:read`, `mpim:history` + - `im:read`, `im:history` + - `users:read` +5. Select `Install to Workspace` at the top of that page (or `Reinstall to Workspace` if you have done this previously) and accept at the prompt. +6. Copy the `OAuth Access Token` (which will generally start with `xoxp` for user-level permissions) + +## Usage + +### As a standalone script + +`exporter.py` can create an archive of all conversation history in your workspace which is accessible to your user account. + +1. Run the following (replacing the value with the user token you obtained in the [Authentication with Slack](#authentication-with-slack) section above). + + ```shell script + export SLACK_USER_TOKEN=xoxp-xxxxxxxxxxxxx... + ``` + +2. Run `python exporter.py --help` to view the available export options. + +### As a Slack bot + +`bot.py` is a Slack bot that responds to "slash commands" in Slack channels (e.g., `/export-channel`). To connect the bot to the Slack app generated in [Authentication with Slack](#authentication-with-slack), create a file named `.env` in the root directory of this repo, and add the following line: + +```text +SLACK_USER_TOKEN = xoxp-xxxxxxxxxxxxx... +``` + +Save this file and run the Flask application in `bot.py` such that the application is exposed to the Internet. This can be done via a web server (e.g., Heroku), as well as via the ngrok service, which assigns your `localhost` server a public URL. + +To use the ngrok method: + +1. [Download](https://ngrok.com/download) the appropriate binary. +2. Run `python bot.py` +3. Run the ngrok binary with `path/to/ngrok http 5000`, where `5000` is the port on which the Flask application (step 2) is running. Copy the forwarding HTTPS address provided. + +Return to the Slack app you created in [Authentication with Slack](#authentication-with-slack) and navigate to the `Slash Commands` page in the sidebar. Create the following slash commands (one for each applicable Flask route in `bot.py`): + +| Command | Request URL | Arguments | Example Usage | +|-----------------|-------------------------------------------|--------------|----------------------| +| /export-channel | https://`[host_url]`/slack/export-channel | json \| text | /export-channel text | +| /export-replies | https://`[host_url]`/slack/export-replies | json \| text | /export-replies json | + +where, if using ngrok, `[domain]` would be replaced with something like `https://xxxxxxxxxxxx.ngrok.io`. + +Navigate back to `OAuth & Permissions` and click `(Re)install to Workspace` to add these slash commands to the workspace. diff --git a/bot.py b/bot.py index f628c8b..5f00959 100644 --- a/bot.py +++ b/bot.py @@ -1,42 +1,18 @@ import os -import ssl -import slack -from slack.errors import SlackApiError import requests -from dotenv import load_dotenv from flask import Flask, request, Response from urllib.parse import urljoin from uuid import uuid4 import json -from datetime import datetime - -ssl_context = ssl.create_default_context() -ssl_context.check_hostname = False -ssl_context.verify_mode = ssl.CERT_NONE +from dotenv import load_dotenv +from exporter import parse_replies, parse_channel_history app = Flask(__name__) load_dotenv(os.path.join(app.root_path, '.env')) -client = slack.WebClient(token=os.environ['SLACK_BOT_TOKEN'], ssl=ssl_context) # chat write interactions -def send_to_channel(channel_id, text): - try: - client.chat_postMessage(channel=channel_id, text=text) - except SlackApiError as e: - print(e) - pass - - -def send_to_user(user_id, text): - try: - client.chat_postMessage(channel=user_id, text=text, as_user=True) - except SlackApiError as e: - print(e) - pass - - def post_response(response_url, text): requests.post(response_url, json={'text': text}) @@ -72,7 +48,10 @@ def paginated_get(url, params, response_url, combine_key=None): result = [] while True: next_cursor, data = get_at_cursor(url, params, response_url, cursor=next_cursor) - result.extend(data) if combine_key is None else result.extend(data[combine_key]) + try: + result.extend(data) if combine_key is None else result.extend(data[combine_key]) + except KeyError: + post_response(response_url, "Sorry! I got an unexpected response (KeyError).") if next_cursor is None: break @@ -101,37 +80,19 @@ def user_list(team_id, response_url): return paginated_get('https://slack.com/api/users.list', params, response_url, combine_key='members') -# parsing +def channel_replies(timestamps, channel_id, response_url): + replies = [] + for timestamp in timestamps: + params = { + 'token': os.environ['SLACK_USER_TOKEN'], + 'channel': channel_id, + 'ts': timestamp, + 'limit': 200 + } + r = paginated_get('https://slack.com/api/conversations.replies', params, response_url, combine_key='messages') + replies.append(r) -def user_list_to_names(user_dict): - return {x['id']: {'name': x['name'], 'real_name': x['real_name']} for x in user_dict} - - -def channel_history_to_text(msgs_dict, users): - messages = [x for x in msgs_dict['messages'] if x['type'] == 'message'] # files are also messages - body = 'Team ID: %s\nTeam Domain: %s\nChannel ID: %s\nChannel Name: %s\n\n' % \ - (msgs_dict['team_id'], msgs_dict['team_domain'], msgs_dict['channel_id'], msgs_dict['channel_name']) - body += '%s\n %s Messages\n%s\n\n' % ('=' * 16, len(messages), '=' * 16) - for msg in messages: - usr = users[msg['user']] if 'user' in msg else {'name': '', 'real_name': 'none'} - ts = datetime.fromtimestamp(round(float(msg['ts']))).strftime('%m-%d-%Y %H:%M:%S') - text = msg['text'] if msg['text'].strip() != "" else "[no message content]" - for u in users.keys(): - # if u in text: - # print(u) - text = str(text).replace('<@%s>' % u, '<@%s> (%s)' % (u, users[u]['name'])) - entry = "Message at %s\nUser: %s (%s)\n%s" % (ts, usr['name'], usr['real_name'], text) - if 'reactions' in msg: - rxns = msg['reactions'] - entry += "\nReactions: " + ', '.join('%s (%s)' % (x['name'], ', '.join( - users[u]['name'] for u in x['users'])) for x in rxns) - if 'files' in msg: - files = msg['files'] - entry += "\nFiles:\n" + '\n'.join(' - %s, %s' % (f['name'], f['url_private_download']) for f in files) - - body += entry.strip() + '\n\n%s\n\n' % ('=' * 16) - - return body + return replies # Flask routes @@ -143,32 +104,37 @@ def export_channel(): try: team_id = data['team_id'] team_domain = data['team_domain'] - channel_id = data['channel_id'] - channel_name = data['channel_name'] + ch_id = data['channel_id'] + ch_name = data['channel_name'] response_url = data['response_url'] command_args = data['text'] except KeyError: - return Response("Sorry! I got an unexpected response from Slack (KeyError)."), 200 + return Response("Sorry! I got an unexpected response (KeyError)."), 200 post_response(response_url, "Retrieving history for this channel...") - all_messages = { - 'team_id': team_id, - 'team_domain': team_domain, - 'channel_id': channel_id, - 'channel_name': channel_name, - 'messages': channel_history(channel_id, response_url) - } + ch_hist = channel_history(ch_id, response_url) - filename = "%s-%s-%s.json" % (team_domain, channel_id, str(uuid4().hex)[:6]) - filepath = os.path.join(app.root_path, 'exports', filename) + export_mode = str(command_args).lower() + + exports_subdir = 'exports' + exports_dir = os.path.join(app.root_path, exports_subdir) + file_ext = '.txt' if export_mode == 'text' else '.json' + filename = "%s-ch_%s-%s%s" % (team_domain, ch_id, str(uuid4().hex)[:6], file_ext) + filepath = os.path.join(exports_dir, filename) loc = urljoin(request.url_root, 'download/%s' % filename) + if not os.path.isdir(exports_dir): + os.makedirs(exports_dir, exist_ok=True) + with open(filepath, mode='w') as f: - if str(command_args).lower() == 'text': - users = user_list_to_names(user_list(team_id, response_url)) - f.write(channel_history_to_text(all_messages, users)) + if export_mode == 'text': + num_msgs = len(ch_hist) + sep = '=' * 24 + header_str = 'Channel Name: %s\nChannel ID: %s\n%s Messages\n%s\n\n' % (ch_name, ch_id, num_msgs, sep) + data_ch = header_str + parse_channel_history(ch_hist, user_list(team_id, response_url)) + f.write(data_ch) else: - json.dump(all_messages, f, indent=4) + json.dump(ch_hist, f, indent=4) post_response(response_url, "Done! This channel's history is available for download here (note that this link " "is single-use): %s" % loc) @@ -176,8 +142,60 @@ def export_channel(): return Response(), 200 +@app.route('/slack/export-replies', methods=['POST']) +def export_replies(): + data = request.form + + try: + team_id = data['team_id'] + team_domain = data['team_domain'] + ch_id = data['channel_id'] + ch_name = data['channel_name'] + response_url = data['response_url'] + command_args = data['text'] + except KeyError: + return Response("Sorry! I got an unexpected response (KeyError)."), 200 + + post_response(response_url, "Retrieving reply threads for this channel...") + print(ch_id) + ch_hist = channel_history(ch_id, response_url) + print(ch_hist) + ch_replies = channel_replies([x['ts'] for x in ch_hist if 'reply_count' in x], ch_id, response_url) + + export_mode = str(command_args).lower() + + exports_subdir = 'exports' + exports_dir = os.path.join(app.root_path, exports_subdir) + file_ext = '.txt' if export_mode == 'text' else '.json' + filename = "%s-re_%s-%s%s" % (team_domain, ch_id, str(uuid4().hex)[:6], file_ext) + filepath = os.path.join(exports_dir, filename) + loc = urljoin(request.url_root, 'download/%s' % filename) + + if export_mode == 'text': + header_str = 'Threads in: %s\n%s Messages' % (ch_name, len(ch_replies)) + data_replies = parse_replies(ch_replies, user_list(team_id, response_url)) + sep = '=' * 24 + data_replies = '%s\n%s\n\n%s' % (header_str, sep, data_replies) + else: + data_replies = ch_replies + + if not os.path.isdir(exports_dir): + os.makedirs(exports_dir, exist_ok=True) + + with open(filepath, mode='w') as f: + if export_mode == 'text': + f.write(data_replies) + else: + json.dump(data_replies, f, indent=4) + + post_response(response_url, "Done! This channel's reply threads are available for download here (note that this " + "link is single-use): %s" % loc) + + return Response(), 200 + + @app.route('/download/') -def download(filename, mimetype='application/json'): +def download(filename): path = os.path.join(app.root_path, 'exports', filename) def generate(): @@ -185,10 +203,12 @@ def download(filename, mimetype='application/json'): yield from f os.remove(path) + mimetype = 'text/plain' if os.path.splitext(filename)[-1] == '.txt' else 'application/json' + r = app.response_class(generate(), mimetype=mimetype) r.headers.set('Content-Disposition', 'attachment', filename=filename) return r if __name__ == '__main__': - app.run(debug=True) + app.run(debug=False) diff --git a/exporter.py b/exporter.py new file mode 100644 index 0000000..257ee7e --- /dev/null +++ b/exporter.py @@ -0,0 +1,280 @@ +import os +import sys +import requests +import json +from datetime import datetime +import argparse + +from dotenv import load_dotenv +load_dotenv(os.path.join(os.path.dirname(__file__), '.env')) + + +# pagination handling + +def get_at_cursor(url, params, cursor=None): + if cursor is not None: + params['cursor'] = cursor + + r = requests.get(url, params=params) + if r.status_code != 200: + print("ERROR: %s %s" % (r.status_code, r.reason)) + sys.exit(1) + d = r.json() + + try: + if d['ok'] is False: + print("I encountered an error: %s" % d) + sys.exit(1) + + next_cursor = None + if 'response_metadata' in d and 'next_cursor' in d['response_metadata']: + next_cursor = d['response_metadata']['next_cursor'] + if str(next_cursor).strip() == '': + next_cursor = None + + return next_cursor, d + + except KeyError as e: + print("Something went wrong: %s." % e) + return None, [] + + +def paginated_get(url, params, combine_key=None): + next_cursor = None + result = [] + while True: + next_cursor, data = get_at_cursor(url, params, cursor=next_cursor) + result.extend(data) if combine_key is None else result.extend(data[combine_key]) + if next_cursor is None: + break + + return result + + +# GET requests + +def channel_list(team_id=None): + params = { + 'token': os.environ['SLACK_USER_TOKEN'], + 'team_id': team_id, + 'types': 'public_channel,private_channel,mpim,im', + 'limit': 200 + } + + return paginated_get('https://slack.com/api/conversations.list', params, combine_key='channels') + + +def channel_history(channel_id): + params = { + 'token': os.environ['SLACK_USER_TOKEN'], + 'channel': channel_id, + 'limit': 200 + } + + return paginated_get('https://slack.com/api/conversations.history', params, combine_key='messages') + + +def user_list(team_id=None): + params = { + 'token': os.environ['SLACK_USER_TOKEN'], + 'limit': 200, + 'team_id': team_id + } + + return paginated_get('https://slack.com/api/users.list', params, combine_key='members') + + +def channel_replies(timestamps, channel_id): + replies = [] + for timestamp in timestamps: + params = { + 'token': os.environ['SLACK_USER_TOKEN'], + 'channel': channel_id, + 'ts': timestamp, + 'limit': 200 + } + replies.append(paginated_get('https://slack.com/api/conversations.replies', params, combine_key='messages')) + + return replies + + +# parsing + +def parse_channel_list(channels, users): + result = '' + for channel in channels: + ch_id = channel['id'] + ch_name = channel['name'] if 'name' in channel else '' + ch_private = 'private ' if 'is_private' in channel and channel['is_private'] else '' + if 'is_im' in channel and channel['is_im']: + ch_type = 'direct_message' + elif 'is_mpim' in channel and channel['is_mpim']: + ch_type = 'multiparty-direct_message' + elif 'group' in channel and channel['is_group']: + ch_type = 'group' + else: + ch_type = 'channel' + if 'creator' in channel: + ch_ownership = 'created by %s' % name_from_uid(channel['creator'], users) + elif 'user' in channel: + ch_ownership = 'with %s' % name_from_uid(channel['user'], users) + else: + ch_ownership = '' + ch_name = ' %s:' % ch_name if ch_name.strip() != '' else ch_name + result += '[%s]%s %s%s %s\n' % (ch_id, ch_name, ch_private, ch_type, ch_ownership) + + return result + + +def name_from_uid(user_id, users, real=False): + for user in users: + if user['id'] == user_id: + return user['real_name'] if real else user['name'] + return '[null user]' + + +def name_from_ch_id(channel_id, channels): + for channel in channels: + if channel['id'] == channel_id: + return (channel['user'], 'Direct Message') if 'user' in channel else (channel['name'], 'Channel') + return '[null channel]' + + +def parse_user_list(users): + result = '' + for u in users: + entry = '[%s] %s (%s), %s' % (u['id'], u['name'], u['real_name'], u['tz']) + u_type = '' + if 'is_admin' in u and u['is_admin']: + u_type += 'admin|' + if 'is_owner' in u and u['is_owner']: + u_type += 'owner|' + if 'is_primary_owner' in u and u['is_primary_owner']: + u_type += 'primary_owner|' + if 'is_restricted' in u and u['is_restricted']: + u_type += 'restricted|' + if 'is_ultra_restricted' in u and u['is_ultra_restricted']: + u_type += 'ultra_restricted|' + if 'is_bot' in u and u['is_bot']: + u_type += 'bot|' + if 'is_app_user' in u and u['is_app_user']: + u_type += 'app_user|' + u_type = u_type[:-1] if u_type.endswith('|') else u_type + entry += ', ' if u_type.strip() != '' else '' + entry += '%s\n' % u_type + result += entry + + return result + + +def parse_channel_history(msgs, users, check_thread=False): + if 'messages' in msgs: + msgs = msgs['messages'] + + messages = [x for x in msgs if x['type'] == 'message'] # files are also messages + body = '' + for msg in messages: + if 'user' in msg: + usr = {'name': name_from_uid(msg['user'], users), 'real_name': name_from_uid(msg['user'], users, True)} + else: + usr = {'name': '', 'real_name': 'none'} + + timestamp = datetime.fromtimestamp(round(float(msg['ts']))).strftime('%m-%d-%Y %H:%M:%S') + text = msg['text'] if msg['text'].strip() != "" else "[no message content]" + for u in [x['id'] for x in users]: + text = str(text).replace('<@%s>' % u, '<@%s> (%s)' % (u, name_from_uid(u, users))) + + entry = "Message at %s\nUser: %s (%s)\n%s" % (timestamp, usr['name'], usr['real_name'], text) + if 'reactions' in msg: + rxns = msg['reactions'] + entry += "\nReactions: " + ', '.join('%s (%s)' % (x['name'], ', '.join( + name_from_uid(u, users) for u in x['users'])) for x in rxns) + if 'files' in msg: + files = msg['files'] + entry += "\nFiles:\n" + '\n'.join(' - %s, %s' % (f['name'], f['url_private_download']) for f in files) + + entry += '\n\n%s\n\n' % ('*' * 24) + + if check_thread and 'parent_user_id' in msg: + entry = '\n'.join('\t%s' % x for x in entry.split('\n')) + + body += entry.rstrip('\t') # get rid of any extra tabs between trailing newlines + + return body + + +def parse_replies(threads, users): + body = '' + for thread in threads: + body += parse_channel_history(thread, users, check_thread=True) + body += '\n' + + return body + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('-o', help="Directory in which to save output files (if left blank, prints to stdout)") + parser.add_argument('--lc', action='store_true', help="List all conversations in your workspace") + parser.add_argument('--lu', action='store_true', help="List all users in your workspace") + parser.add_argument('--json', action='store_true', help="Give the requested output in raw JSON format (no parsing)") + parser.add_argument('-c', action='store_true', help="Get history for all accessible conversations") + parser.add_argument('-r', action='store_true', help="Get reply threads for all accessible conversations") + a = parser.parse_args() + + ts = str(datetime.strftime(datetime.now(), '%m-%d-%Y_%H%M%S')) + + def save(data, filename): + if a.o is None: + print(data) + else: + out_dir_parent = os.path.abspath(os.path.expanduser(os.path.expandvars(a.o))) + out_dir = os.path.join(out_dir_parent, 'slack_export_%s' % ts) + filename = filename + '.json' if a.json else filename + '.txt' + os.makedirs(out_dir, exist_ok=True) + full_filepath = os.path.join(out_dir, filename) + print("Writing output to %s" % full_filepath) + with open(full_filepath, mode='w') as f: + if a.json: + json.dump(data, f, indent=4) + else: + f.write(data) + + def save_replies(channel_hist, channel_id, users): + ch_replies = channel_replies([x['ts'] for x in channel_hist if 'reply_count' in x], channel_id) + if a.json: + data_replies = ch_replies + else: + ch_name, ch_type = name_from_ch_id(ch_id, ch_list) + header_str = 'Threads in %s: %s\n%s Messages' % (ch_type, ch_name, len(ch_replies)) + data_replies = parse_replies(ch_replies, users) + sep = '=' * 24 + data_replies = '%s\n%s\n\n%s' % (header_str, sep, data_replies) + save(data_replies, 'channel-replies_%s' % channel_id) + + if a.lc: + data = channel_list() if a.json else parse_channel_list(channel_list(), user_list()) + save(data, 'channel_list') + if a.lu: + data = user_list() if a.json else parse_user_list(user_list()) + save(data, 'user_list') + if a.c: + ch_list = channel_list() + users = user_list() + for ch_id in [x['id'] for x in ch_list]: + ch_hist = channel_history(ch_id) + if a.json: + data_ch = ch_hist + else: + data_ch = parse_channel_history(ch_hist, users) + ch_name, ch_type = name_from_ch_id(ch_id, ch_list) + header_str = '%s Name: %s' % (ch_type, ch_name) + sep = '=' * 24 + data_ch = 'Channel ID: %s\n%s\n%s Messages\n%s\n\n' % (ch_id, header_str, len(ch_hist), sep) + data_ch + save(data_ch, 'channel_%s' % ch_id) + if a.r: + save_replies(ch_hist, ch_id, users) + # elif, since we want to avoid asking for channel_history twice + elif a.r: + for ch_id in [x['id'] for x in channel_list()]: + save_replies(channel_history(ch_id), ch_id, user_list()) diff --git a/requirements.txt b/requirements.txt index a4d6ba2..a032b5e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -requests==2.24.0 -Flask==1.1.2 -python-dotenv==0.15.0 -slackclient==2.9.3 +Flask~=1.1.2 +requests~=2.24.0 +python-dotenv~=0.15.0 \ No newline at end of file