2021-08-07 04:01:38 +00:00
|
|
|
# Import JSON from a Slack admin export into a disk image Qemu can load.
|
|
|
|
#
|
|
|
|
# Dependencies: python, netpbm
|
2021-08-07 03:46:48 +00:00
|
|
|
#
|
2021-08-08 18:15:22 +00:00
|
|
|
# Step 1: download a Slack archive
|
|
|
|
#
|
|
|
|
# Step 2: download user avatars to subdirectory images/ and convert them to PPM in subdirectory images/ppm/
|
|
|
|
# mkdir images
|
|
|
|
# cd images
|
2021-08-07 04:01:38 +00:00
|
|
|
# grep image_72 . -r |grep -v users.json |awk '{print $3}' |sort |uniq |sed 's/?.*//' |sed 's,\\,,g' |sed 's/"//' |sed 's/",$//' > images.list
|
2021-08-07 03:46:48 +00:00
|
|
|
# wget -i images.list --wait=0.1
|
|
|
|
# # fix some lying images
|
|
|
|
# for f in $(file *.jpg |grep PNG |sed 's/:.*//'); do mv -i $f $(echo $f |sed 's/\.jpg$/.png/'); done
|
|
|
|
# #
|
|
|
|
# mkdir ppm
|
|
|
|
# for f in *.jpg; do jpegtopnm $f |pnmtopnm -plain > ppm/$(echo $f |sed 's/\.jpg$//').ppm; done
|
|
|
|
# for f in *.png; do png2pnm -n $f > ppm/$(echo $f |sed 's/\.png$//').ppm; done
|
|
|
|
#
|
2021-08-08 18:15:22 +00:00
|
|
|
# Step 3: construct a disk image out of the archives and avatars
|
|
|
|
# cd ../.. # go back to parent of images/
|
2021-08-07 04:01:38 +00:00
|
|
|
# dd if=/dev/zero of=data.img count=201600 # 100MB
|
2021-08-08 18:15:22 +00:00
|
|
|
# python path/to/convert_slack.py |dd of=data.img conv=notrunc
|
2021-08-07 04:01:38 +00:00
|
|
|
# Currently this process yields errors for ~70 items on the Future of Software
|
|
|
|
# group. We fail to load those.
|
2021-08-07 03:46:48 +00:00
|
|
|
#
|
|
|
|
# Notes on input format:
|
|
|
|
# Redundant 'type' field that's always 'message'. Probably an "enterprise" feature.
|
|
|
|
|
|
|
|
from sys import argv, stderr
|
|
|
|
import json
|
|
|
|
from os import listdir
|
|
|
|
from os.path import isfile, join, basename, splitext
|
|
|
|
from urllib.parse import urlparse
|
|
|
|
|
|
|
|
def look_up_ppm_image(url):
|
|
|
|
file_root = splitext(basename(urlparse(url).path))[0]
|
2021-08-08 18:15:22 +00:00
|
|
|
filename = f"images/ppm/{file_root}.ppm"
|
2021-08-07 03:46:48 +00:00
|
|
|
if isfile(filename):
|
|
|
|
with open(filename) as f:
|
|
|
|
return f.read()
|
|
|
|
|
2021-08-10 11:15:40 +00:00
|
|
|
user_id = {} # name -> index
|
2021-08-10 11:18:34 +00:00
|
|
|
with open('users.json') as f:
|
2021-08-10 11:22:29 +00:00
|
|
|
for idx, user in enumerate(json.load(f)):
|
2021-08-10 11:19:54 +00:00
|
|
|
if 'real_name' not in user:
|
|
|
|
user['real_name'] = ''
|
|
|
|
print(f"({json.dumps(user['id'])} \"@{user['name']}\" {json.dumps(user['real_name'])} [{look_up_ppm_image(user['profile']['image_72']) or ''}])")
|
2021-08-10 11:22:29 +00:00
|
|
|
user_id[user['id']] = idx
|
2021-08-07 03:46:48 +00:00
|
|
|
|
2021-08-10 11:16:36 +00:00
|
|
|
items = []
|
|
|
|
|
2021-08-10 11:11:51 +00:00
|
|
|
def contents(filename):
|
|
|
|
with open(filename) as f:
|
|
|
|
for item in json.load(f):
|
|
|
|
try:
|
|
|
|
if 'thread_ts' in item:
|
|
|
|
# comment
|
|
|
|
yield {
|
|
|
|
'name': f"/{item['thread_ts']}/{item['ts']}",
|
|
|
|
'contents': item['text'],
|
|
|
|
'by': user_id[item['user']],
|
|
|
|
}
|
|
|
|
else:
|
|
|
|
# top-level post
|
|
|
|
yield {
|
|
|
|
'name': f"/{item['ts']}",
|
|
|
|
'contents': item['text'],
|
|
|
|
'by': user_id[item['user']],
|
|
|
|
}
|
|
|
|
except KeyError:
|
|
|
|
stderr.write(repr(item)+'\n')
|
|
|
|
|
2021-08-10 11:28:44 +00:00
|
|
|
for channel in json.load(open('channels.json')):
|
2021-08-10 11:34:58 +00:00
|
|
|
for filename in sorted(listdir(channel['name'])):
|
|
|
|
for item in contents(join(channel['name'], filename)):
|
2021-08-10 11:28:44 +00:00
|
|
|
print(f"({json.dumps(item['name'])} {json.dumps(channel['name'])} {item['by']} {json.dumps(item['contents'])})")
|