no, we can't just sort lines in the slack archive
Comments contain indices back to the parent. Reordering items completely messes up the indices.
This commit is contained in:
parent
ef3881ab5a
commit
7d3f2722ff
|
@ -20,7 +20,6 @@
|
||||||
# cd .. # go back to the top-level archive directory
|
# cd .. # go back to the top-level archive directory
|
||||||
# dd if=/dev/zero of=data.img count=201600 # 100MB
|
# dd if=/dev/zero of=data.img count=201600 # 100MB
|
||||||
# python path/to/convert_slack.py > data.out 2> data.err
|
# python path/to/convert_slack.py > data.out 2> data.err
|
||||||
# (optionally sort items by timestamp; I currently do this in Vim by piping the latter half of data.out through `sort`)
|
|
||||||
# dd if=data.out of=data.img conv=notrunc
|
# dd if=data.out of=data.img conv=notrunc
|
||||||
# Currently this process yields errors for ~300 items (~70 posts and their comments)
|
# Currently this process yields errors for ~300 items (~70 posts and their comments)
|
||||||
# on the Future of Software group (https://futureofcoding.org/community). We fail to load those.
|
# on the Future of Software group (https://futureofcoding.org/community). We fail to load those.
|
||||||
|
@ -60,15 +59,19 @@ def parent(item):
|
||||||
else:
|
else:
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
idx = 0
|
items = []
|
||||||
for channel in json.load(open('channels.json')):
|
for channel in json.load(open('channels.json')):
|
||||||
for filename in sorted(listdir(channel['name'])):
|
for filename in sorted(listdir(channel['name'])):
|
||||||
with open(join(channel['name'], filename)) as f:
|
with open(join(channel['name'], filename)) as f:
|
||||||
for item in json.load(f):
|
for item in json.load(f):
|
||||||
try:
|
item['channel_name'] = channel['name']
|
||||||
#? stderr.write(repr(item)+'\n')
|
items.append(item)
|
||||||
print(f"({json.dumps(item['ts'])} {parent(item)} {json.dumps(channel['name'])} {by(item)} {json.dumps(item['text'])})")
|
|
||||||
item_idx[item['ts']] = idx
|
idx = 0
|
||||||
idx += 1 # only increment when actually used and no exception raised
|
for item in sorted(items, key=lambda item: item['ts']):
|
||||||
except KeyError:
|
try:
|
||||||
stderr.write(repr(item)+'\n')
|
print(f"({json.dumps(item['ts'])} {parent(item)} {json.dumps(item['channel_name'])} {by(item)} {json.dumps(item['text'])})")
|
||||||
|
item_idx[item['ts']] = idx
|
||||||
|
idx += 1 # only increment when actually used and no exception raised
|
||||||
|
except KeyError:
|
||||||
|
stderr.write(repr(item)+'\n')
|
||||||
|
|
Loading…
Reference in New Issue