no, we can't just sort lines in the slack archive

Comments contain indices back to the parent. Reordering items completely messes up the indices.
2021-08-14 19:56:09 -07:00 · 2021-08-14 19:56:09 -07:00 · 7d3f2722ff
parent ef3881ab5a
commit 7d3f2722ff
1 changed files with 12 additions and 9 deletions
--- a/browse-slack/convert_slack.py
+++ b/browse-slack/convert_slack.py
@ -20,7 +20,6 @@
 #   cd ..  # go back to the top-level archive directory
 #   dd if=/dev/zero of=data.img count=201600  # 100MB
 #   python path/to/convert_slack.py > data.out 2> data.err
-#   (optionally sort items by timestamp; I currently do this in Vim by piping the latter half of data.out through `sort`)
 #   dd if=data.out of=data.img conv=notrunc
 # Currently this process yields errors for ~300 items (~70 posts and their comments)
 # on the Future of Software group (https://futureofcoding.org/community). We fail to load those.
@ -60,15 +59,19 @@ def parent(item):
    else:
        return -1

-idx = 0
+items = []
 for channel in json.load(open('channels.json')):
    for filename in sorted(listdir(channel['name'])):
        with open(join(channel['name'], filename)) as f:
            for item in json.load(f):
-                try:
-#?                     stderr.write(repr(item)+'\n')
-                    print(f"({json.dumps(item['ts'])} {parent(item)} {json.dumps(channel['name'])} {by(item)} {json.dumps(item['text'])})")
-                    item_idx[item['ts']] = idx
-                    idx += 1  # only increment when actually used and no exception raised
-                except KeyError:
-                    stderr.write(repr(item)+'\n')
+                item['channel_name'] = channel['name']
+                items.append(item)
+
+idx = 0
+for item in sorted(items, key=lambda item: item['ts']):
+    try:
+        print(f"({json.dumps(item['ts'])} {parent(item)} {json.dumps(item['channel_name'])} {by(item)} {json.dumps(item['text'])})")
+        item_idx[item['ts']] = idx
+        idx += 1  # only increment when actually used and no exception raised
+    except KeyError:
+        stderr.write(repr(item)+'\n')