exporter-ng
This commit is contained in:
parent
6124b58289
commit
2a8ea1b8f4
1 changed files with 479 additions and 0 deletions
479
exporter-ng.py
Normal file
479
exporter-ng.py
Normal file
|
@ -0,0 +1,479 @@
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
from timeit import default_timer
|
||||||
|
from datetime import datetime
|
||||||
|
import argparse
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from pathvalidate import sanitize_filename
|
||||||
|
from time import sleep
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import List, Optional, Dict, Any, Iterator, Tuple
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SlackConfig:
|
||||||
|
"""Konfiguracja klienta Slack API"""
|
||||||
|
user_token: str
|
||||||
|
additional_sleep_time: int = 2
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_env(cls, env_path: str = ".env") -> 'SlackConfig':
|
||||||
|
env_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), env_path)
|
||||||
|
if os.path.isfile(env_file):
|
||||||
|
load_dotenv(env_file)
|
||||||
|
|
||||||
|
try:
|
||||||
|
token = os.environ["SLACK_USER_TOKEN"]
|
||||||
|
return cls(token)
|
||||||
|
except KeyError:
|
||||||
|
raise ValueError("Brak SLACK_USER_TOKEN w zmiennych środowiskowych")
|
||||||
|
|
||||||
|
class SlackAPI:
|
||||||
|
"""Klasa do komunikacji z API Slacka"""
|
||||||
|
def __init__(self, config: SlackConfig):
|
||||||
|
self.config = config
|
||||||
|
self.headers = {"Authorization": f"Bearer {config.user_token}"}
|
||||||
|
self.base_url = "https://slack.com/api"
|
||||||
|
|
||||||
|
def get_data(self, endpoint: str, params: dict) -> requests.Response:
|
||||||
|
"""Pobiera dane z API z obsługą rate-limitingu"""
|
||||||
|
url = f"{self.base_url}/{endpoint}"
|
||||||
|
success = False
|
||||||
|
attempt = 0
|
||||||
|
|
||||||
|
while not success:
|
||||||
|
response = requests.get(url, headers=self.headers, params=params)
|
||||||
|
attempt += 1
|
||||||
|
|
||||||
|
if response.status_code != 429:
|
||||||
|
success = True
|
||||||
|
else:
|
||||||
|
retry_after = int(response.headers["Retry-After"])
|
||||||
|
sleep_time = retry_after + self.config.additional_sleep_time
|
||||||
|
print(f"Limit zapytań przekroczony. Ponowna próba za {sleep_time} sekund (próba {attempt}).")
|
||||||
|
sleep(sleep_time)
|
||||||
|
return response
|
||||||
|
|
||||||
|
def paginated_get(self, endpoint: str, params: dict, combine_key: Optional[str] = None) -> List[Dict]:
|
||||||
|
"""Pobiera wszystkie strony wyników z API"""
|
||||||
|
result = []
|
||||||
|
next_cursor = None
|
||||||
|
|
||||||
|
while True:
|
||||||
|
if next_cursor:
|
||||||
|
params["cursor"] = next_cursor
|
||||||
|
|
||||||
|
response = self.get_data(endpoint, params)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise Exception(f"Błąd API: {response.status_code} {response.reason}")
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
if not data.get("ok", False):
|
||||||
|
raise Exception(f"Błąd Slack API: {data}")
|
||||||
|
|
||||||
|
items = data[combine_key] if combine_key else data
|
||||||
|
result.extend(items)
|
||||||
|
|
||||||
|
metadata = data.get("response_metadata", {})
|
||||||
|
next_cursor = metadata.get("next_cursor", "").strip()
|
||||||
|
if not next_cursor:
|
||||||
|
break
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def get_users(self, team_id: Optional[str] = None) -> List[Dict]:
|
||||||
|
"""Pobiera listę użytkowników"""
|
||||||
|
params = {"limit": 200}
|
||||||
|
if team_id:
|
||||||
|
params["team_id"] = team_id
|
||||||
|
return self.paginated_get("users.list", params, "members")
|
||||||
|
|
||||||
|
def get_channels(self, team_id: Optional[str] = None) -> List[Dict]:
|
||||||
|
"""Pobiera listę kanałów"""
|
||||||
|
params = {
|
||||||
|
"types": "public_channel,private_channel,mpim,im",
|
||||||
|
"limit": 200
|
||||||
|
}
|
||||||
|
if team_id:
|
||||||
|
params["team_id"] = team_id
|
||||||
|
return self.paginated_get("conversations.list", params, "channels")
|
||||||
|
|
||||||
|
def get_channel_history(self, channel_id: str, oldest: Optional[str] = None,
|
||||||
|
latest: Optional[str] = None) -> List[Dict]:
|
||||||
|
"""Pobiera historię kanału"""
|
||||||
|
params = {
|
||||||
|
"channel": channel_id,
|
||||||
|
"limit": 200
|
||||||
|
}
|
||||||
|
if oldest:
|
||||||
|
params["oldest"] = oldest
|
||||||
|
if latest:
|
||||||
|
params["latest"] = latest
|
||||||
|
return self.paginated_get("conversations.history", params, "messages")
|
||||||
|
|
||||||
|
def get_replies(self, channel_id: str, thread_ts: str) -> List[Dict]:
|
||||||
|
"""Pobiera odpowiedzi w wątku"""
|
||||||
|
params = {
|
||||||
|
"channel": channel_id,
|
||||||
|
"ts": thread_ts,
|
||||||
|
"limit": 200
|
||||||
|
}
|
||||||
|
return self.paginated_get("conversations.replies", params, "messages")
|
||||||
|
|
||||||
|
def get_files(self, channel: Optional[str] = None) -> List[Dict]:
|
||||||
|
params = {}
|
||||||
|
if channel:
|
||||||
|
params["channel"] = channel
|
||||||
|
return self.paginated_get("files.list", params, "files")
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SlackUser:
|
||||||
|
"""Reprezentacja użytkownika Slack"""
|
||||||
|
id: str
|
||||||
|
name: str
|
||||||
|
real_name: Optional[str] = None
|
||||||
|
display_name: Optional[str] = None
|
||||||
|
is_admin: bool = False
|
||||||
|
is_owner: bool = False
|
||||||
|
is_primary_owner: bool = False
|
||||||
|
is_restricted: bool = False
|
||||||
|
is_ultra_restricted: bool = False
|
||||||
|
is_bot: bool = False
|
||||||
|
is_app_user: bool = False
|
||||||
|
tz: Optional[str] = None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, data: Dict) -> 'SlackUser':
|
||||||
|
profile = data.get("profile", {})
|
||||||
|
return cls(
|
||||||
|
id=data["id"],
|
||||||
|
name=data.get("name", ""),
|
||||||
|
real_name=profile.get("real_name"),
|
||||||
|
display_name=profile.get("display_name"),
|
||||||
|
is_admin=data.get("is_admin", False),
|
||||||
|
is_owner=data.get("is_owner", False),
|
||||||
|
is_primary_owner=data.get("is_primary_owner", False),
|
||||||
|
is_restricted=data.get("is_restricted", False),
|
||||||
|
is_ultra_restricted=data.get("is_ultra_restricted", False),
|
||||||
|
is_bot=data.get("is_bot", False),
|
||||||
|
is_app_user=data.get("is_app_user", False),
|
||||||
|
tz=data.get("tz")
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_display_name(self) -> str:
|
||||||
|
"""Zwraca nazwę wyświetlaną użytkownika"""
|
||||||
|
return self.real_name or self.display_name or self.name or "[brak nazwy]"
|
||||||
|
|
||||||
|
def format(self) -> str:
|
||||||
|
"""Formatuje informacje o użytkowniku"""
|
||||||
|
parts = [f"[{self.id}] {self.name}"]
|
||||||
|
|
||||||
|
if self.real_name:
|
||||||
|
parts.append(f"({self.real_name})")
|
||||||
|
|
||||||
|
if self.tz:
|
||||||
|
parts.append(self.tz)
|
||||||
|
|
||||||
|
user_types = []
|
||||||
|
for attr in ['is_admin', 'is_owner', 'is_primary_owner', 'is_restricted',
|
||||||
|
'is_ultra_restricted', 'is_bot', 'is_app_user']:
|
||||||
|
if getattr(self, attr):
|
||||||
|
user_types.append(attr[3:])
|
||||||
|
|
||||||
|
if user_types:
|
||||||
|
parts.append("|".join(user_types))
|
||||||
|
|
||||||
|
return ", ".join(parts)
|
||||||
|
|
||||||
|
class SlackChannel:
|
||||||
|
"""Reprezentacja kanału Slack"""
|
||||||
|
def __init__(self, data: Dict):
|
||||||
|
self.id = data["id"]
|
||||||
|
self.name = data.get("name", "")
|
||||||
|
self.is_private = data.get("is_private", False)
|
||||||
|
self.is_im = data.get("is_im", False)
|
||||||
|
self.is_mpim = data.get("is_mpim", False)
|
||||||
|
self.is_group = data.get("is_group", False)
|
||||||
|
self.creator_id = data.get("creator")
|
||||||
|
self.user_id = data.get("user")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def type(self) -> str:
|
||||||
|
if self.is_im:
|
||||||
|
return "direct_message"
|
||||||
|
elif self.is_mpim:
|
||||||
|
return "multiparty-direct_message"
|
||||||
|
elif self.is_group:
|
||||||
|
return "group"
|
||||||
|
else:
|
||||||
|
return "channel"
|
||||||
|
|
||||||
|
def format(self, users: Dict[str, SlackUser]) -> str:
|
||||||
|
"""Formatuje informacje o kanale"""
|
||||||
|
parts = [f"[{self.id}]"]
|
||||||
|
|
||||||
|
if self.name:
|
||||||
|
parts.append(f" {self.name}:")
|
||||||
|
|
||||||
|
if self.is_private:
|
||||||
|
parts.append("private")
|
||||||
|
|
||||||
|
parts.append(self.type)
|
||||||
|
|
||||||
|
if self.creator_id and self.creator_id in users:
|
||||||
|
parts.append(f"created by {users[self.creator_id].name}")
|
||||||
|
elif self.user_id and self.user_id in users:
|
||||||
|
parts.append(f"with {users[self.user_id].name}")
|
||||||
|
|
||||||
|
return " ".join(parts)
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SlackFile:
|
||||||
|
"""Reprezentacja pliku Slack"""
|
||||||
|
id: str
|
||||||
|
name: str
|
||||||
|
url_private: str
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, data: Dict) -> 'SlackFile':
|
||||||
|
return cls(
|
||||||
|
id=data["id"],
|
||||||
|
name=data.get("name", ""),
|
||||||
|
url_private=data.get("url_private", "")
|
||||||
|
)
|
||||||
|
|
||||||
|
class SlackMessage:
|
||||||
|
"""Reprezentacja wiadomości Slack"""
|
||||||
|
def __init__(self, data: Dict, users: Dict[str, SlackUser]):
|
||||||
|
self.timestamp = float(data["ts"])
|
||||||
|
self.text = data.get("text", "[no message content]")
|
||||||
|
self.user_id = data.get("user")
|
||||||
|
self.reactions = data.get("reactions", [])
|
||||||
|
self.files = data.get("files", [])
|
||||||
|
self.users = users
|
||||||
|
self.has_thread = "reply_count" in data
|
||||||
|
self.parent_user_id = data.get("parent_user_id")
|
||||||
|
|
||||||
|
def format(self, indent: bool = False) -> str:
|
||||||
|
"""Formatuje wiadomość"""
|
||||||
|
timestamp = datetime.fromtimestamp(round(self.timestamp)).strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
user = self.users.get(self.user_id)
|
||||||
|
user_info = f"{user.name} ({user.get_display_name()})" if user else "none"
|
||||||
|
|
||||||
|
text = self.text
|
||||||
|
for uid, u in self.users.items():
|
||||||
|
text = text.replace(f"<@{uid}>", f"<@{uid}> ({u.name})")
|
||||||
|
|
||||||
|
lines = [
|
||||||
|
f"Message at {timestamp}",
|
||||||
|
f"User: {user_info}",
|
||||||
|
text
|
||||||
|
]
|
||||||
|
|
||||||
|
if self.reactions:
|
||||||
|
reaction_parts = []
|
||||||
|
for reaction in self.reactions:
|
||||||
|
users = [self.users[uid].name for uid in reaction["users"] if uid in self.users]
|
||||||
|
reaction_parts.append(f"{reaction['name']} ({', '.join(users)})")
|
||||||
|
lines.append("Reactions: " + ", ".join(reaction_parts))
|
||||||
|
|
||||||
|
if self.files:
|
||||||
|
lines.append("Files:")
|
||||||
|
for file in self.files:
|
||||||
|
if "name" in file and "url_private_download" in file:
|
||||||
|
lines.append(f" - [{file['id']}] {file['name']}, {file['url_private_download']}")
|
||||||
|
else:
|
||||||
|
lines.append(f" - [{file['id']}] [deleted, oversize, or unavailable file]")
|
||||||
|
|
||||||
|
message = "\n".join(lines)
|
||||||
|
if indent:
|
||||||
|
message = "\n".join("\t" + line for line in message.split("\n"))
|
||||||
|
|
||||||
|
return message + "\n\n" + "*" * 24 + "\n\n"
|
||||||
|
|
||||||
|
class SlackExporter:
|
||||||
|
"""Główna klasa eksportera"""
|
||||||
|
def __init__(self, config: SlackConfig, output_dir: str):
|
||||||
|
self.api = SlackAPI(config)
|
||||||
|
self.timestamp = datetime.now().strftime("%Y-%m-%d_%H%M%S")
|
||||||
|
self.output_dir = self._set_output_dir(output_dir)
|
||||||
|
self.users: Dict[str, SlackUser] = self._load_users()
|
||||||
|
self.channels: List[SlackChannel] = self._load_channels()
|
||||||
|
|
||||||
|
def _set_output_dir(self, path: str) -> str:
|
||||||
|
"""Ustawia katalog wyjściowy"""
|
||||||
|
parent_dir = os.path.abspath(os.path.expanduser(os.path.expandvars(path)))
|
||||||
|
output_dir = os.path.join(parent_dir, f"slack_export_{self.timestamp}")
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
return output_dir
|
||||||
|
|
||||||
|
def _load_users(self) -> Dict[str, SlackUser]:
|
||||||
|
"""Ładuje użytkowników"""
|
||||||
|
users_data = self.api.get_users()
|
||||||
|
return {u["id"]: SlackUser.from_dict(u) for u in users_data}
|
||||||
|
|
||||||
|
def _load_channels(self) -> List[SlackChannel]:
|
||||||
|
"""Ładuje kanały"""
|
||||||
|
channels_data = self.api.get_channels()
|
||||||
|
return [SlackChannel(ch) for ch in channels_data]
|
||||||
|
|
||||||
|
def _save_data(self, data: Any, filename: str, as_json: bool = False):
|
||||||
|
"""Zapisuje dane do pliku"""
|
||||||
|
if not self.output_dir:
|
||||||
|
json.dump(data, sys.stdout, indent=4)
|
||||||
|
return
|
||||||
|
|
||||||
|
ext = "json" if as_json else "txt"
|
||||||
|
filepath = os.path.join(self.output_dir, f"{filename}.{ext}")
|
||||||
|
|
||||||
|
print(f"Zapisywanie do {filepath}")
|
||||||
|
with open(filepath, mode="w", encoding="utf-8") as f:
|
||||||
|
if as_json:
|
||||||
|
json.dump(data, f, indent=4)
|
||||||
|
else:
|
||||||
|
f.write(data)
|
||||||
|
|
||||||
|
def export_channel_list(self, as_json: bool = False):
|
||||||
|
"""Eksportuje listę kanałów"""
|
||||||
|
if as_json:
|
||||||
|
data = [vars(ch) for ch in self.channels]
|
||||||
|
else:
|
||||||
|
data = "\n".join(ch.format(self.users) for ch in self.channels)
|
||||||
|
self._save_data(data, "channel_list", as_json)
|
||||||
|
|
||||||
|
def export_user_list(self, as_json: bool = False):
|
||||||
|
"""Eksportuje listę użytkowników"""
|
||||||
|
if as_json:
|
||||||
|
data = [vars(u) for u in self.users.values()]
|
||||||
|
else:
|
||||||
|
data = "\n".join(u.format() for u in self.users.values())
|
||||||
|
self._save_data(data, "user_list", as_json)
|
||||||
|
|
||||||
|
def export_channel_history(self, channel_id: str, oldest: Optional[str] = None,
|
||||||
|
latest: Optional[str] = None, as_json: bool = False):
|
||||||
|
"""Eksportuje historię kanału"""
|
||||||
|
history = self.api.get_channel_history(channel_id, oldest, latest)
|
||||||
|
|
||||||
|
if as_json:
|
||||||
|
data = history
|
||||||
|
else:
|
||||||
|
messages = [SlackMessage(msg, self.users) for msg in history]
|
||||||
|
channel = next((ch for ch in self.channels if ch.id == channel_id), None)
|
||||||
|
|
||||||
|
header = f"Channel ID: {channel_id}\n"
|
||||||
|
if channel:
|
||||||
|
header += f"{channel.type.title()} Name: {channel.name}\n"
|
||||||
|
header += f"{len(messages)} Messages\n{'*' * 24}\n\n"
|
||||||
|
|
||||||
|
data = header + "".join(msg.format() for msg in messages)
|
||||||
|
|
||||||
|
self._save_data(data, f"channel_{channel_id}", as_json)
|
||||||
|
|
||||||
|
def export_channel_replies(self, channel_id: str, oldest: Optional[str] = None,
|
||||||
|
latest: Optional[str] = None, as_json: bool = False):
|
||||||
|
"""Eksportuje wątki w kanale"""
|
||||||
|
history = self.api.get_channel_history(channel_id, oldest, latest)
|
||||||
|
thread_messages = [msg for msg in history if "reply_count" in msg]
|
||||||
|
|
||||||
|
all_replies = []
|
||||||
|
for msg in thread_messages:
|
||||||
|
replies = self.api.get_replies(channel_id, msg["ts"])
|
||||||
|
all_replies.extend(replies)
|
||||||
|
|
||||||
|
if as_json:
|
||||||
|
data = all_replies
|
||||||
|
else:
|
||||||
|
messages = [SlackMessage(msg, self.users) for msg in all_replies]
|
||||||
|
channel = next((ch for ch in self.channels if ch.id == channel_id), None)
|
||||||
|
|
||||||
|
header = f"Threads in {channel.type if channel else 'channel'}: "
|
||||||
|
header += f"{channel.name if channel else channel_id}\n"
|
||||||
|
header += f"{len(messages)} Messages\n{'*' * 24}\n\n"
|
||||||
|
|
||||||
|
data = header + "".join(msg.format(True) for msg in messages)
|
||||||
|
|
||||||
|
self._save_data(data, f"channel-replies_{channel_id}", as_json)
|
||||||
|
|
||||||
|
def export_channel_files(self, channel_id: Optional[str] = None):
|
||||||
|
"""Eksportuje pliki w kanale"""
|
||||||
|
files = [SlackFile.from_dict(f) for f in self.api.get_files(channel_id)]
|
||||||
|
for file in files:
|
||||||
|
filename = f"{file.id}-{sanitize_filename(file.name)}"
|
||||||
|
self.download_file(filename, file.url_private)
|
||||||
|
|
||||||
|
def download_file(self, filename: str, url: str, attempts: int = 10) -> bool:
|
||||||
|
if attempts == 0:
|
||||||
|
return False
|
||||||
|
target = os.path.join(self.output_dir, filename)
|
||||||
|
if os.path.exists(target):
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
response = requests.get(url, stream=True)
|
||||||
|
response.raise_for_status()
|
||||||
|
with open(target, 'wb') as f:
|
||||||
|
for chunk in response.iter_content(chunk_size=8192):
|
||||||
|
f.write(chunk)
|
||||||
|
return True
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print(f"Error downloading file {filename}: {e}. {attempts} attempts left.")
|
||||||
|
return self.download_file(filename, url, attempts - 1)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Eksporter danych ze Slacka")
|
||||||
|
parser.add_argument("-o", help="Katalog wyjściowy (jeśli pusty, wyświetla na stdout)")
|
||||||
|
parser.add_argument("--lc", action="store_true", help="Lista wszystkich konwersacji")
|
||||||
|
parser.add_argument("--lu", action="store_true", help="Lista wszystkich użytkowników")
|
||||||
|
parser.add_argument("--json", action="store_true", help="Wynik w formacie JSON")
|
||||||
|
parser.add_argument("-c", action="store_true", help="Historia wszystkich dostępnych konwersacji")
|
||||||
|
parser.add_argument("--ch", help="Z -c, ogranicza eksport do podanego ID kanału")
|
||||||
|
parser.add_argument("--fr", help="Z -c, timestamp początku zakresu (Unix)")
|
||||||
|
parser.add_argument("--to", help="Z -c, timestamp końca zakresu (Unix)")
|
||||||
|
parser.add_argument("-r", action="store_true", help="Pobierz wątki ze wszystkich konwersacji")
|
||||||
|
parser.add_argument("--files", action="store_true", help="Pobierz wszystkie pliki")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.files and not args.o:
|
||||||
|
print("Opcja --files wymaga określenia katalogu wyjściowego (-o)")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
config = SlackConfig.from_env()
|
||||||
|
exporter = SlackExporter(config)
|
||||||
|
|
||||||
|
if args.o:
|
||||||
|
exporter.set_output_dir(args.o)
|
||||||
|
|
||||||
|
exporter.load_users()
|
||||||
|
exporter.load_channels()
|
||||||
|
|
||||||
|
if args.lc:
|
||||||
|
exporter.export_channel_list(args.json)
|
||||||
|
|
||||||
|
if args.lu:
|
||||||
|
exporter.export_user_list(args.json)
|
||||||
|
|
||||||
|
if args.c or args.r:
|
||||||
|
channel_ids = [args.ch] if args.ch else [ch.id for ch in exporter.channels]
|
||||||
|
for channel_id in channel_ids:
|
||||||
|
if args.c:
|
||||||
|
exporter.export_channel_history(channel_id, args.fr, args.to, args.json)
|
||||||
|
if args.r:
|
||||||
|
exporter.export_channel_replies(channel_id, args.fr, args.to, args.json)
|
||||||
|
|
||||||
|
if args.files and args.o:
|
||||||
|
# TODO: Implementacja pobierania plików
|
||||||
|
print("Funkcja pobierania plików jeszcze nie zaimplementowana")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Błąd: {e}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# main()
|
||||||
|
config = SlackConfig.from_env()
|
||||||
|
exporter = SlackExporter(config, 'out')
|
Loading…
Add table
Add a link
Reference in a new issue