Skip to content

Commit

Permalink
simulate forum post list, support voice text channels
Browse files Browse the repository at this point in the history
  • Loading branch information
slatinsky committed Oct 27, 2022
1 parent 3dbe903 commit 019f3dc
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 22 deletions.
53 changes: 52 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ View your JSON [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExport
- Message deduplication - merge multiple JSON exports and view them as if they were one
- Advanced message lazy loading and grouping (infinite scroll without pagination) - even channels with 100k+ messages are loaded almost instantly
- Threads support (go to thread, go back to channel where thread was created)
- Forums support (view forum posts as if they were threads)
- Guild search with autocomplete and filters
- View media files locally
- Browse guild or direct messages
Expand Down Expand Up @@ -99,6 +100,57 @@ Don't know how to get THREAD_IDs? Handy backup helper is included to extend your

![](docs/backup_helper.png)

## How to view forums
Viewing forums is supported by this viewer, but exporting them with DiscordChatExporter is harder than with other channel types, because export of main forum channel is not supported.

Workaround is to export individual forum threads. I made a script to get forum IDs automatically:

### Steps
1. Open discord in browser
2. Navigate to channel with forum post list
3. press F12 and paste this script to the console:

```js
len = 0
ids = []
previouseScrollTop = 0

function scrollToPosition(offset) {
scrollDiv = document.querySelector('div[class*="chat-"] > div > div > div[class*="scrollerBase-"]')
scrollDiv.scroll(0, offset)
}

function captureIds() {
document.querySelectorAll('div[data-item-id]').forEach((e) => ids.push(e.dataset.itemId))
ids = [...new Set(ids)] //deduplicate
if (ids.length > len) {
len = ids.length
console.log('Found', len, 'IDs')
}
}

function printIds() {
// print all ids, comma separated
console.log('found IDs:',ids.join(','))
}

scrollToPosition(0)
interval = setInterval(() => {
scrollToPosition(scrollDiv.scrollTop + window.innerHeight / 3)
setTimeout(() => {
captureIds()
if (previouseScrollTop === scrollDiv.scrollTop) {
clearInterval(interval)
printIds()
}
previouseScrollTop = scrollDiv.scrollTop
}, 1000)
}, 1542)
```

4. script will scroll the page. At the the end, it will print all IDs to the console
5. download each id with DiscordChatExporter as if you would download channel (--channel FORUM_POST_ID)

# Development
You don't need to follow development steps if you don't need to modify the code.

Expand Down Expand Up @@ -239,7 +291,6 @@ But should work on any Windows 10 / Windows 11 x64 computer.
- Linux support (docker?)
- Improve code readability
- online mode - view media files directly from Discord servers
- Discord forums support - waiting for DiscordChatExporter export support

## Why this tool was made
[DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter) is a well made tool to export Discord chats. But to actually view them, you had to download them in HTML format, which more inconvenient to parse than JSON. And If you wanted to extend your backup, it would be broken into multiple files, which is not very convenient.
Expand Down
97 changes: 76 additions & 21 deletions preprocess/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,68 @@ def read_channels_messages_from_files(self):
messages[message['id']] = message
return channels, messages

def simulate_thread_creation(self, channels, messages):
"""
Sometimes threads are exported, but original channel is not
Because forum exports are not supported by DiscordChatExported, this is always the case
Internally forum posts are Threads
"""

thread_ids = [] # list of exported thread ids
for channel in channels.values():
if channel['type'] == "GuildPublicThread":
thread_ids.append(channel['id'])

not_found_thread_ids = thread_ids # list of thread ids without corresponding message with type 'ThreadCreated'

for message in messages.values():
if message['type'] == "ThreadCreated":
if message['reference']['channelId'] in not_found_thread_ids:
not_found_thread_ids.remove(message['reference']['channelId'])

first_messages_in_channels = {}
channel_msg_count = {}
for message in messages.values():
if message['channelId'] not in first_messages_in_channels or message['timestamp'] < first_messages_in_channels[message['channelId']]['timestamp']:
first_messages_in_channels[message['channelId']] = message

if message['channelId'] not in channel_msg_count:
channel_msg_count[message['channelId']] = 0
channel_msg_count[message['channelId']] += 1


# insert fake "ThreadCreated" message for each thread that does not have one
for thread_id in not_found_thread_ids:
thread = channels[thread_id]

first_message_in_thread = first_messages_in_channels[thread_id]
fake_thread_created_message = {
'id': thread_id,
'type': "ThreadCreated",
'timestamp': first_message_in_thread['timestamp'],
'timestampEdited': None,
'callEndedTimestamp': None,
'isPinned': False,
'content': "Started a thread.",
'reference': {
'messageId': None,
'channelId': thread_id,
'guildId': self.guild_id,
},
'channelId': thread['categoryId'],
'author': first_message_in_thread['author'],
'threadName': thread['name'],
'threadMsgCount': channel_msg_count[thread_id],
'reactions': [],
'attachments': [],
'embeds': [],
'stickers': [],
'mentions': [],
}
messages[thread['id']] = fake_thread_created_message

return channels, messages

def cleanup_empty_fields(self, messages):
for message in messages.values():
# cleanup unused fields
Expand Down Expand Up @@ -409,7 +471,7 @@ def group_messages_and_channels(self, messages, channels):
threads = {}
normal_channels = {} # non thread channels
for channel in channels.values():
if channel['type'] == "GuildTextChat" or channel['type'] == "DirectTextChat" or channel['type'] == "DirectGroupTextChat":
if channel['type'] == "GuildTextChat" or channel['type'] == "DirectTextChat" or channel['type'] == "DirectGroupTextChat" or channel['type'] == "GuildVoiceChat":
normal_channels[channel['id']] = channel
elif channel['type'] == "GuildPublicThread":
threads[channel['id']] = channel
Expand All @@ -424,35 +486,29 @@ def group_messages_and_channels(self, messages, channels):
# handle threads without exported channel (FORUMS)
for channel in threads.values():
if channel['categoryId'] not in normal_channels:
print(f"Found thread '{channel['name']}' without exported channel '{channel['category']}'")
print(f" Found thread '{channel['name']}' without exported channel '{channel['category']}'")

# add channel to normal channels
channel_info = {
'id': channel['categoryId'],
'name': channel['category'],
'type': "GuildTextChat",
'messageCount': 0,
'messageCount': len(messages_by_channel[channel['categoryId']].values()),
'categoryId': "-1",
'category': "lost threads",
'category': "forums/lost threads",
'threads': []
}
normal_channels[channel['categoryId']] = channel_info
channels[channel['categoryId']] = channel_info

print("xxxxxxxxx")
pprint(channels)

# messages_by_channel
messages_by_channel[channel['categoryId']] = {}

# # add thread to channel
# add thread to channel
normal_channels[channel['categoryId']]['threads'].append(channel)


for channel in normal_channels.values():
# if channel['type'] == 4:
# continue
print(channel['name'])
# print(channel['name'])
if channel['categoryId'] not in categories:
if 'threads' not in channel:
channel['threads'] = []
Expand All @@ -473,11 +529,6 @@ def group_messages_and_channels(self, messages, channels):
})


print('----------------')




# pprint(threads)
for category in categories.values():
# loop category['channelIds']
Expand Down Expand Up @@ -531,17 +582,21 @@ def get_thread_id_to_message_id(self, messages, messages_by_channel, threads):
return thread_id_to_message_id

def process(self):
print("Step 1 - Reading data from json files...")
print("Step 0 - Reading data from json files...")
channels, messages = self.read_channels_messages_from_files()

print("Step 1 - Recreating forums and missing channels from threads...")
channels, messages = self.simulate_thread_creation(channels, messages)

print("Step 2 - Sorting messages and channels...")
# sort messages dict by key
messages = dict(sorted(messages.items()))
# sort channels dict by key
channels = dict(sorted(channels.items()))

# print message count
print("Message count: " + str(len(messages)))
print(" Message count: " + str(len(messages)))
print(" Channel+Thread count: " + str(len(channels))) # includes forum threads

print("Step 3 - Deduplicating authors...")
messages, authors = self.extract_authors(messages)
Expand Down Expand Up @@ -570,7 +625,7 @@ def process(self):
# get message ids
message_ids = list(messages.keys())

print("Step 9 - Creating lookup thread ids -> to message ids...")
print("Step 10 - Creating lookup thread ids -> to message ids...")
thread_id_to_message_id = self.get_thread_id_to_message_id(messages, messages_by_channel, threads)

# group channels and others attributes to single dict
Expand All @@ -587,7 +642,7 @@ def process(self):
'messages': messages_by_channel,
}

print("Step 10 - Writing guild JSON...")
print("Step 11 - Writing guild JSON...")
self.write_json(guild, output_dir + 'guild.json')


Expand Down

0 comments on commit 019f3dc

Please sign in to comment.