1 # Import JSON from a Slack admin export into a disk image Mu can load.
3 # Dependencies: python, wget, awk, sed, netpbm
5 # Step 1: download a Slack archive and unpack it to some directory
7 # Step 2: download user avatars to subdirectory images/ and convert them to PPM in subdirectory images/ppm/
8 # grep image_72 . -r |grep -v users.json |awk '{print $3}' |sort |uniq |sed 's/?.*//' |sed 's,\\,,g' |sed 's/"//' |sed 's/",$//' > images.list
11 # wget -i ../images.list --wait=0.1
12 # # fix some lying images
13 # for f in $(file *.jpg |grep PNG |sed 's/:.*//'); do mv -i $f $(echo $f |sed 's/\.jpg$/.png/'); done
16 # for f in *.jpg; do jpegtopnm $f |pnmtoplainpnm > ppm/$(echo $f |sed 's/\.jpg$//').ppm; done
17 # for f in *.png; do pngtopnm $f |pnmtoplainpnm > ppm/$(echo $f |sed 's/\.png$//').ppm; done
19 # (Depending on your OS, you may need to replace pnmtoplainpnm with `pnmtopnm -plain`. Some places also have a pnm2pnm.
20 # I don't understand it either.)
22 # Step 3: construct a disk image out of the archives and avatars
23 # cd .. # go back to the top-level archive directory
24 # dd if=/dev/zero of=data.img count=201600 # 100MB
25 # python path/to/convert_slack.py > data.out 2> data.err
26 # dd if=data.out of=data.img conv=notrunc
27 # Currently this process yields errors for ~300 items (~70 posts and their comments)
28 # on the Future of Software group (https://futureofcoding.org/community). We fail to load those.
30 # Notes on input format:
31 # Redundant 'type' field that's always 'message'. Probably an "enterprise" feature.
33 from sys
import argv
, stderr
35 from os
import listdir
36 from os
.path
import isfile
, join
, basename
, splitext
37 from urllib
.parse
import urlparse
40 def look_up_ppm_image(url
):
41 file_root
= splitext(basename(urlparse(url
).path
))[0]
42 filename
= f
"images/ppm/{file_root}.ppm"
44 with
open(filename
) as f
:
48 with
open('users.json') as f
:
49 for idx
, user
in enumerate(json
.load(f
)):
50 if 'real_name' not in user
:
51 user
['real_name'] = ''
52 print(f
"({json.dumps(user['id'])} \"@{user['name']}\" {json.dumps(user['real_name'])} [{look_up_ppm_image(user['profile']['image_72']) or ''}])")
53 user_idx
[user
['id']] = idx
56 if 'subtype' in item
and item
['subtype'] == 'bot_message' and 'username' in item
:
57 federated_user
= item
['username']
58 if federated_user
not in user_idx
:
59 user_idx
[federated_user
] = len(user_idx
)
60 return user_idx
[federated_user
]
61 return user_idx
[item
['user']]
65 if 'thread_ts' in item
and item
['thread_ts'] != item
['ts']:
67 return item_idx
[item
['thread_ts']]
72 for channel
in json
.load(open('channels.json')):
73 for filename
in sorted(listdir(channel
['name'])):
74 with
open(join(channel
['name'], filename
)) as f
:
75 for item
in json
.load(f
):
76 item
['channel_name'] = channel
['name']
80 for item
in sorted(items
, key
=lambda item
: item
['ts']):
82 print(f
"({json.dumps(item['ts'])} {parent(item)} {json.dumps(item['channel_name'])} {by(item)} {json.dumps(item['text'])})")
83 item_idx
[item
['ts']] = idx
84 idx
+= 1 # only increment when actually used and no exception raised
86 traceback
.print_exc(file=stderr
)
87 stderr
.write(repr(item
)+'\n')