Added exporter.py and documentation

This commit is contained in:
Seb Seager 2020-12-29 16:37:57 -06:00
parent 23385c2f01
commit 8c4550f307
5 changed files with 439 additions and 82 deletions

3
.gitignore vendored
View file

@ -1 +1,2 @@
/.env
.env
exports

View file

@ -1,5 +1,62 @@
# slack-exporter
A Slack app for exporting messages and file attachments from public and private channels.
A Slack bot and standalone script for exporting messages and file attachments from public and private channels, using Slack's new Conversations API.
Note that Slack provides a similar service for workspace admins at [https://my.slack.com/services/export](https://my.slack.com/services/export) (where `my` can be replaced with your full workspace name to access workspace different than your default). However, it can only access public channels, while `slack-exporter` can be added to any channel.
A similar service is provided by Slack for workspace admins at [https://my.slack.com/services/export](https://my.slack.com/services/export) (where `my` can be replaced with your full workspace name to refer to a workspace different than your default). However, it can only access public channels, while `slack-exporter` can retrieve data from any channel accessible to your user account.
## Authentication with Slack
There are two ways to use `slack-exporter` (detailed below). Both require a Slack API token to be able to communicate with your workspace.
1. Visit [https://api.slack.com/apps/](https://api.slack.com/apps/) and sign in to your workspace.
2. Click `Create New App`, enter a name (e.g., `Slack Exporter`), and select your workspace.
3. In the left-hand panel, navigate to `OAuth & Permissions`, and scroll to `User Token Scopes` (**not** `Bot Token Scopes`).
4. Select the following permissions:
- `channels:read`, `channels:history`
- `groups:read`, `groups:history`
- `mpim:read`, `mpim:history`
- `im:read`, `im:history`
- `users:read`
5. Select `Install to Workspace` at the top of that page (or `Reinstall to Workspace` if you have done this previously) and accept at the prompt.
6. Copy the `OAuth Access Token` (which will generally start with `xoxp` for user-level permissions)
## Usage
### As a standalone script
`exporter.py` can create an archive of all conversation history in your workspace which is accessible to your user account.
1. Run the following (replacing the value with the user token you obtained in the [Authentication with Slack](#authentication-with-slack) section above).
```shell script
export SLACK_USER_TOKEN=xoxp-xxxxxxxxxxxxx...
```
2. Run `python exporter.py --help` to view the available export options.
### As a Slack bot
`bot.py` is a Slack bot that responds to "slash commands" in Slack channels (e.g., `/export-channel`). To connect the bot to the Slack app generated in [Authentication with Slack](#authentication-with-slack), create a file named `.env` in the root directory of this repo, and add the following line:
```text
SLACK_USER_TOKEN = xoxp-xxxxxxxxxxxxx...
```
Save this file and run the Flask application in `bot.py` such that the application is exposed to the Internet. This can be done via a web server (e.g., Heroku), as well as via the ngrok service, which assigns your `localhost` server a public URL.
To use the ngrok method:
1. [Download](https://ngrok.com/download) the appropriate binary.
2. Run `python bot.py`
3. Run the ngrok binary with `path/to/ngrok http 5000`, where `5000` is the port on which the Flask application (step 2) is running. Copy the forwarding HTTPS address provided.
Return to the Slack app you created in [Authentication with Slack](#authentication-with-slack) and navigate to the `Slash Commands` page in the sidebar. Create the following slash commands (one for each applicable Flask route in `bot.py`):
| Command | Request URL | Arguments | Example Usage |
|-----------------|-------------------------------------------|--------------|----------------------|
| /export-channel | https://`[host_url]`/slack/export-channel | json \| text | /export-channel text |
| /export-replies | https://`[host_url]`/slack/export-replies | json \| text | /export-replies json |
where, if using ngrok, `[domain]` would be replaced with something like `https://xxxxxxxxxxxx.ngrok.io`.
Navigate back to `OAuth & Permissions` and click `(Re)install to Workspace` to add these slash commands to the workspace.

170
bot.py
View file

@ -1,42 +1,18 @@
import os
import ssl
import slack
from slack.errors import SlackApiError
import requests
from dotenv import load_dotenv
from flask import Flask, request, Response
from urllib.parse import urljoin
from uuid import uuid4
import json
from datetime import datetime
ssl_context = ssl.create_default_context()
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
from dotenv import load_dotenv
from exporter import parse_replies, parse_channel_history
app = Flask(__name__)
load_dotenv(os.path.join(app.root_path, '.env'))
client = slack.WebClient(token=os.environ['SLACK_BOT_TOKEN'], ssl=ssl_context)
# chat write interactions
def send_to_channel(channel_id, text):
try:
client.chat_postMessage(channel=channel_id, text=text)
except SlackApiError as e:
print(e)
pass
def send_to_user(user_id, text):
try:
client.chat_postMessage(channel=user_id, text=text, as_user=True)
except SlackApiError as e:
print(e)
pass
def post_response(response_url, text):
requests.post(response_url, json={'text': text})
@ -72,7 +48,10 @@ def paginated_get(url, params, response_url, combine_key=None):
result = []
while True:
next_cursor, data = get_at_cursor(url, params, response_url, cursor=next_cursor)
result.extend(data) if combine_key is None else result.extend(data[combine_key])
try:
result.extend(data) if combine_key is None else result.extend(data[combine_key])
except KeyError:
post_response(response_url, "Sorry! I got an unexpected response (KeyError).")
if next_cursor is None:
break
@ -101,37 +80,19 @@ def user_list(team_id, response_url):
return paginated_get('https://slack.com/api/users.list', params, response_url, combine_key='members')
# parsing
def channel_replies(timestamps, channel_id, response_url):
replies = []
for timestamp in timestamps:
params = {
'token': os.environ['SLACK_USER_TOKEN'],
'channel': channel_id,
'ts': timestamp,
'limit': 200
}
r = paginated_get('https://slack.com/api/conversations.replies', params, response_url, combine_key='messages')
replies.append(r)
def user_list_to_names(user_dict):
return {x['id']: {'name': x['name'], 'real_name': x['real_name']} for x in user_dict}
def channel_history_to_text(msgs_dict, users):
messages = [x for x in msgs_dict['messages'] if x['type'] == 'message'] # files are also messages
body = 'Team ID: %s\nTeam Domain: %s\nChannel ID: %s\nChannel Name: %s\n\n' % \
(msgs_dict['team_id'], msgs_dict['team_domain'], msgs_dict['channel_id'], msgs_dict['channel_name'])
body += '%s\n %s Messages\n%s\n\n' % ('=' * 16, len(messages), '=' * 16)
for msg in messages:
usr = users[msg['user']] if 'user' in msg else {'name': '', 'real_name': 'none'}
ts = datetime.fromtimestamp(round(float(msg['ts']))).strftime('%m-%d-%Y %H:%M:%S')
text = msg['text'] if msg['text'].strip() != "" else "[no message content]"
for u in users.keys():
# if u in text:
# print(u)
text = str(text).replace('<@%s>' % u, '<@%s> (%s)' % (u, users[u]['name']))
entry = "Message at %s\nUser: %s (%s)\n%s" % (ts, usr['name'], usr['real_name'], text)
if 'reactions' in msg:
rxns = msg['reactions']
entry += "\nReactions: " + ', '.join('%s (%s)' % (x['name'], ', '.join(
users[u]['name'] for u in x['users'])) for x in rxns)
if 'files' in msg:
files = msg['files']
entry += "\nFiles:\n" + '\n'.join(' - %s, %s' % (f['name'], f['url_private_download']) for f in files)
body += entry.strip() + '\n\n%s\n\n' % ('=' * 16)
return body
return replies
# Flask routes
@ -143,32 +104,37 @@ def export_channel():
try:
team_id = data['team_id']
team_domain = data['team_domain']
channel_id = data['channel_id']
channel_name = data['channel_name']
ch_id = data['channel_id']
ch_name = data['channel_name']
response_url = data['response_url']
command_args = data['text']
except KeyError:
return Response("Sorry! I got an unexpected response from Slack (KeyError)."), 200
return Response("Sorry! I got an unexpected response (KeyError)."), 200
post_response(response_url, "Retrieving history for this channel...")
all_messages = {
'team_id': team_id,
'team_domain': team_domain,
'channel_id': channel_id,
'channel_name': channel_name,
'messages': channel_history(channel_id, response_url)
}
ch_hist = channel_history(ch_id, response_url)
filename = "%s-%s-%s.json" % (team_domain, channel_id, str(uuid4().hex)[:6])
filepath = os.path.join(app.root_path, 'exports', filename)
export_mode = str(command_args).lower()
exports_subdir = 'exports'
exports_dir = os.path.join(app.root_path, exports_subdir)
file_ext = '.txt' if export_mode == 'text' else '.json'
filename = "%s-ch_%s-%s%s" % (team_domain, ch_id, str(uuid4().hex)[:6], file_ext)
filepath = os.path.join(exports_dir, filename)
loc = urljoin(request.url_root, 'download/%s' % filename)
if not os.path.isdir(exports_dir):
os.makedirs(exports_dir, exist_ok=True)
with open(filepath, mode='w') as f:
if str(command_args).lower() == 'text':
users = user_list_to_names(user_list(team_id, response_url))
f.write(channel_history_to_text(all_messages, users))
if export_mode == 'text':
num_msgs = len(ch_hist)
sep = '=' * 24
header_str = 'Channel Name: %s\nChannel ID: %s\n%s Messages\n%s\n\n' % (ch_name, ch_id, num_msgs, sep)
data_ch = header_str + parse_channel_history(ch_hist, user_list(team_id, response_url))
f.write(data_ch)
else:
json.dump(all_messages, f, indent=4)
json.dump(ch_hist, f, indent=4)
post_response(response_url, "Done! This channel's history is available for download here (note that this link "
"is single-use): %s" % loc)
@ -176,8 +142,60 @@ def export_channel():
return Response(), 200
@app.route('/slack/export-replies', methods=['POST'])
def export_replies():
data = request.form
try:
team_id = data['team_id']
team_domain = data['team_domain']
ch_id = data['channel_id']
ch_name = data['channel_name']
response_url = data['response_url']
command_args = data['text']
except KeyError:
return Response("Sorry! I got an unexpected response (KeyError)."), 200
post_response(response_url, "Retrieving reply threads for this channel...")
print(ch_id)
ch_hist = channel_history(ch_id, response_url)
print(ch_hist)
ch_replies = channel_replies([x['ts'] for x in ch_hist if 'reply_count' in x], ch_id, response_url)
export_mode = str(command_args).lower()
exports_subdir = 'exports'
exports_dir = os.path.join(app.root_path, exports_subdir)
file_ext = '.txt' if export_mode == 'text' else '.json'
filename = "%s-re_%s-%s%s" % (team_domain, ch_id, str(uuid4().hex)[:6], file_ext)
filepath = os.path.join(exports_dir, filename)
loc = urljoin(request.url_root, 'download/%s' % filename)
if export_mode == 'text':
header_str = 'Threads in: %s\n%s Messages' % (ch_name, len(ch_replies))
data_replies = parse_replies(ch_replies, user_list(team_id, response_url))
sep = '=' * 24
data_replies = '%s\n%s\n\n%s' % (header_str, sep, data_replies)
else:
data_replies = ch_replies
if not os.path.isdir(exports_dir):
os.makedirs(exports_dir, exist_ok=True)
with open(filepath, mode='w') as f:
if export_mode == 'text':
f.write(data_replies)
else:
json.dump(data_replies, f, indent=4)
post_response(response_url, "Done! This channel's reply threads are available for download here (note that this "
"link is single-use): %s" % loc)
return Response(), 200
@app.route('/download/<filename>')
def download(filename, mimetype='application/json'):
def download(filename):
path = os.path.join(app.root_path, 'exports', filename)
def generate():
@ -185,10 +203,12 @@ def download(filename, mimetype='application/json'):
yield from f
os.remove(path)
mimetype = 'text/plain' if os.path.splitext(filename)[-1] == '.txt' else 'application/json'
r = app.response_class(generate(), mimetype=mimetype)
r.headers.set('Content-Disposition', 'attachment', filename=filename)
return r
if __name__ == '__main__':
app.run(debug=True)
app.run(debug=False)

280
exporter.py Normal file
View file

@ -0,0 +1,280 @@
import os
import sys
import requests
import json
from datetime import datetime
import argparse
from dotenv import load_dotenv
load_dotenv(os.path.join(os.path.dirname(__file__), '.env'))
# pagination handling
def get_at_cursor(url, params, cursor=None):
if cursor is not None:
params['cursor'] = cursor
r = requests.get(url, params=params)
if r.status_code != 200:
print("ERROR: %s %s" % (r.status_code, r.reason))
sys.exit(1)
d = r.json()
try:
if d['ok'] is False:
print("I encountered an error: %s" % d)
sys.exit(1)
next_cursor = None
if 'response_metadata' in d and 'next_cursor' in d['response_metadata']:
next_cursor = d['response_metadata']['next_cursor']
if str(next_cursor).strip() == '':
next_cursor = None
return next_cursor, d
except KeyError as e:
print("Something went wrong: %s." % e)
return None, []
def paginated_get(url, params, combine_key=None):
next_cursor = None
result = []
while True:
next_cursor, data = get_at_cursor(url, params, cursor=next_cursor)
result.extend(data) if combine_key is None else result.extend(data[combine_key])
if next_cursor is None:
break
return result
# GET requests
def channel_list(team_id=None):
params = {
'token': os.environ['SLACK_USER_TOKEN'],
'team_id': team_id,
'types': 'public_channel,private_channel,mpim,im',
'limit': 200
}
return paginated_get('https://slack.com/api/conversations.list', params, combine_key='channels')
def channel_history(channel_id):
params = {
'token': os.environ['SLACK_USER_TOKEN'],
'channel': channel_id,
'limit': 200
}
return paginated_get('https://slack.com/api/conversations.history', params, combine_key='messages')
def user_list(team_id=None):
params = {
'token': os.environ['SLACK_USER_TOKEN'],
'limit': 200,
'team_id': team_id
}
return paginated_get('https://slack.com/api/users.list', params, combine_key='members')
def channel_replies(timestamps, channel_id):
replies = []
for timestamp in timestamps:
params = {
'token': os.environ['SLACK_USER_TOKEN'],
'channel': channel_id,
'ts': timestamp,
'limit': 200
}
replies.append(paginated_get('https://slack.com/api/conversations.replies', params, combine_key='messages'))
return replies
# parsing
def parse_channel_list(channels, users):
result = ''
for channel in channels:
ch_id = channel['id']
ch_name = channel['name'] if 'name' in channel else ''
ch_private = 'private ' if 'is_private' in channel and channel['is_private'] else ''
if 'is_im' in channel and channel['is_im']:
ch_type = 'direct_message'
elif 'is_mpim' in channel and channel['is_mpim']:
ch_type = 'multiparty-direct_message'
elif 'group' in channel and channel['is_group']:
ch_type = 'group'
else:
ch_type = 'channel'
if 'creator' in channel:
ch_ownership = 'created by %s' % name_from_uid(channel['creator'], users)
elif 'user' in channel:
ch_ownership = 'with %s' % name_from_uid(channel['user'], users)
else:
ch_ownership = ''
ch_name = ' %s:' % ch_name if ch_name.strip() != '' else ch_name
result += '[%s]%s %s%s %s\n' % (ch_id, ch_name, ch_private, ch_type, ch_ownership)
return result
def name_from_uid(user_id, users, real=False):
for user in users:
if user['id'] == user_id:
return user['real_name'] if real else user['name']
return '[null user]'
def name_from_ch_id(channel_id, channels):
for channel in channels:
if channel['id'] == channel_id:
return (channel['user'], 'Direct Message') if 'user' in channel else (channel['name'], 'Channel')
return '[null channel]'
def parse_user_list(users):
result = ''
for u in users:
entry = '[%s] %s (%s), %s' % (u['id'], u['name'], u['real_name'], u['tz'])
u_type = ''
if 'is_admin' in u and u['is_admin']:
u_type += 'admin|'
if 'is_owner' in u and u['is_owner']:
u_type += 'owner|'
if 'is_primary_owner' in u and u['is_primary_owner']:
u_type += 'primary_owner|'
if 'is_restricted' in u and u['is_restricted']:
u_type += 'restricted|'
if 'is_ultra_restricted' in u and u['is_ultra_restricted']:
u_type += 'ultra_restricted|'
if 'is_bot' in u and u['is_bot']:
u_type += 'bot|'
if 'is_app_user' in u and u['is_app_user']:
u_type += 'app_user|'
u_type = u_type[:-1] if u_type.endswith('|') else u_type
entry += ', ' if u_type.strip() != '' else ''
entry += '%s\n' % u_type
result += entry
return result
def parse_channel_history(msgs, users, check_thread=False):
if 'messages' in msgs:
msgs = msgs['messages']
messages = [x for x in msgs if x['type'] == 'message'] # files are also messages
body = ''
for msg in messages:
if 'user' in msg:
usr = {'name': name_from_uid(msg['user'], users), 'real_name': name_from_uid(msg['user'], users, True)}
else:
usr = {'name': '', 'real_name': 'none'}
timestamp = datetime.fromtimestamp(round(float(msg['ts']))).strftime('%m-%d-%Y %H:%M:%S')
text = msg['text'] if msg['text'].strip() != "" else "[no message content]"
for u in [x['id'] for x in users]:
text = str(text).replace('<@%s>' % u, '<@%s> (%s)' % (u, name_from_uid(u, users)))
entry = "Message at %s\nUser: %s (%s)\n%s" % (timestamp, usr['name'], usr['real_name'], text)
if 'reactions' in msg:
rxns = msg['reactions']
entry += "\nReactions: " + ', '.join('%s (%s)' % (x['name'], ', '.join(
name_from_uid(u, users) for u in x['users'])) for x in rxns)
if 'files' in msg:
files = msg['files']
entry += "\nFiles:\n" + '\n'.join(' - %s, %s' % (f['name'], f['url_private_download']) for f in files)
entry += '\n\n%s\n\n' % ('*' * 24)
if check_thread and 'parent_user_id' in msg:
entry = '\n'.join('\t%s' % x for x in entry.split('\n'))
body += entry.rstrip('\t') # get rid of any extra tabs between trailing newlines
return body
def parse_replies(threads, users):
body = ''
for thread in threads:
body += parse_channel_history(thread, users, check_thread=True)
body += '\n'
return body
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-o', help="Directory in which to save output files (if left blank, prints to stdout)")
parser.add_argument('--lc', action='store_true', help="List all conversations in your workspace")
parser.add_argument('--lu', action='store_true', help="List all users in your workspace")
parser.add_argument('--json', action='store_true', help="Give the requested output in raw JSON format (no parsing)")
parser.add_argument('-c', action='store_true', help="Get history for all accessible conversations")
parser.add_argument('-r', action='store_true', help="Get reply threads for all accessible conversations")
a = parser.parse_args()
ts = str(datetime.strftime(datetime.now(), '%m-%d-%Y_%H%M%S'))
def save(data, filename):
if a.o is None:
print(data)
else:
out_dir_parent = os.path.abspath(os.path.expanduser(os.path.expandvars(a.o)))
out_dir = os.path.join(out_dir_parent, 'slack_export_%s' % ts)
filename = filename + '.json' if a.json else filename + '.txt'
os.makedirs(out_dir, exist_ok=True)
full_filepath = os.path.join(out_dir, filename)
print("Writing output to %s" % full_filepath)
with open(full_filepath, mode='w') as f:
if a.json:
json.dump(data, f, indent=4)
else:
f.write(data)
def save_replies(channel_hist, channel_id, users):
ch_replies = channel_replies([x['ts'] for x in channel_hist if 'reply_count' in x], channel_id)
if a.json:
data_replies = ch_replies
else:
ch_name, ch_type = name_from_ch_id(ch_id, ch_list)
header_str = 'Threads in %s: %s\n%s Messages' % (ch_type, ch_name, len(ch_replies))
data_replies = parse_replies(ch_replies, users)
sep = '=' * 24
data_replies = '%s\n%s\n\n%s' % (header_str, sep, data_replies)
save(data_replies, 'channel-replies_%s' % channel_id)
if a.lc:
data = channel_list() if a.json else parse_channel_list(channel_list(), user_list())
save(data, 'channel_list')
if a.lu:
data = user_list() if a.json else parse_user_list(user_list())
save(data, 'user_list')
if a.c:
ch_list = channel_list()
users = user_list()
for ch_id in [x['id'] for x in ch_list]:
ch_hist = channel_history(ch_id)
if a.json:
data_ch = ch_hist
else:
data_ch = parse_channel_history(ch_hist, users)
ch_name, ch_type = name_from_ch_id(ch_id, ch_list)
header_str = '%s Name: %s' % (ch_type, ch_name)
sep = '=' * 24
data_ch = 'Channel ID: %s\n%s\n%s Messages\n%s\n\n' % (ch_id, header_str, len(ch_hist), sep) + data_ch
save(data_ch, 'channel_%s' % ch_id)
if a.r:
save_replies(ch_hist, ch_id, users)
# elif, since we want to avoid asking for channel_history twice
elif a.r:
for ch_id in [x['id'] for x in channel_list()]:
save_replies(channel_history(ch_id), ch_id, user_list())

View file

@ -1,4 +1,3 @@
requests==2.24.0
Flask==1.1.2
python-dotenv==0.15.0
slackclient==2.9.3
Flask~=1.1.2
requests~=2.24.0
python-dotenv~=0.15.0