mirror of
https://git.mia.jetzt/scrubber
synced 2025-01-10 11:21:53 -07:00
159 lines
3.9 KiB
Python
159 lines
3.9 KiB
Python
import json
|
|
import sys
|
|
import time
|
|
from collections import namedtuple
|
|
from functools import cache
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
import psycopg
|
|
|
|
from com import eval_config, progressbar
|
|
|
|
|
|
Note = namedtuple("Note", ["renote_id", "reply_id", "user_id"])
|
|
Tree = namedtuple("Tree", ["id", "replies", "renotes"])
|
|
|
|
config = eval_config()
|
|
conn: psycopg.Connection = config["connect"]()
|
|
user_id: str = config["user_id"]
|
|
early_exit: Optional[int] = config.get("early_exit")
|
|
|
|
|
|
print("fetching note ids", file=sys.stderr)
|
|
note_ids = set()
|
|
cur = conn.execute(
|
|
'select id from note where "userId" = %s and not ("renoteId" is not null and text is null)',
|
|
[user_id],
|
|
)
|
|
while rows := cur.fetchmany(0xFF):
|
|
time.sleep(0.0001)
|
|
for row in rows:
|
|
note_ids.add(row[0])
|
|
if early_exit and len(note_ids) > early_exit:
|
|
break
|
|
|
|
|
|
@cache
|
|
def get_note(id: str) -> Note:
|
|
time.sleep(0.0001)
|
|
return Note(
|
|
*conn.execute(
|
|
'select "renoteId", "replyId", "userId" from note where id = %s', [id]
|
|
).fetchone()
|
|
)
|
|
|
|
|
|
roots = {}
|
|
trees = {}
|
|
|
|
|
|
def tree_init(id: str, seek: bool = True) -> Tree:
|
|
if tree := trees.get(id):
|
|
return tree
|
|
tree = Tree(id, [], [])
|
|
note = get_note(id)
|
|
if note.reply_id or note.renote_id:
|
|
if note.reply_id:
|
|
p_tree = tree_init(note.reply_id)
|
|
p_tree.replies.append(tree)
|
|
if note.renote_id:
|
|
r_tree = tree_init(note.renote_id, False)
|
|
r_tree.renotes.append(tree)
|
|
else:
|
|
roots[id] = tree
|
|
trees[id] = tree
|
|
return tree
|
|
|
|
|
|
def make_widgets(msg, trees, roots):
|
|
widgets = [
|
|
f"{msg} ",
|
|
progressbar.Percentage(),
|
|
" ",
|
|
progressbar.Bar(),
|
|
" ",
|
|
progressbar.SimpleProgress("%(value_s)s/%(max_value_s)s"),
|
|
" ",
|
|
]
|
|
if trees:
|
|
widgets += [progressbar.Variable("trees"), " "]
|
|
if roots:
|
|
widgets += [progressbar.Variable("roots"), " "]
|
|
widgets += [progressbar.ETA()]
|
|
return widgets
|
|
|
|
|
|
pb = progressbar.ProgressBar(
|
|
0,
|
|
len(note_ids),
|
|
widgets=make_widgets("building trees", True, True),
|
|
)
|
|
for note_id in note_ids:
|
|
tree_init(note_id)
|
|
pb.increment(trees=len(trees), roots=len(roots))
|
|
pb.finish()
|
|
|
|
|
|
def traverse(tree: Tree):
|
|
note = get_note(tree.id)
|
|
if note.user_id == user_id:
|
|
expand(tree)
|
|
else:
|
|
for child in tree.replies:
|
|
traverse(child)
|
|
|
|
|
|
def expand(tree: Tree):
|
|
time.sleep(0.0001)
|
|
for row in conn.execute(
|
|
"select id from note_replies(%s, 1, 1000)", [tree.id]
|
|
).fetchall():
|
|
if row[0] in trees:
|
|
continue
|
|
note = get_note(row[0])
|
|
new = Tree(row[0], [], [])
|
|
if note.reply_id == tree.id:
|
|
# is a reply
|
|
tree.replies.append(new)
|
|
trees[row[0]] = new
|
|
if note.renote_id == tree.id:
|
|
# is a renote
|
|
tree.renotes.append(new)
|
|
trees[row[0]] = new
|
|
for child in tree.replies:
|
|
expand(child)
|
|
|
|
|
|
roots_len = len(roots)
|
|
pb = progressbar.ProgressBar(
|
|
0, roots_len, widgets=make_widgets("expanding roots", True, False)
|
|
)
|
|
|
|
for root in roots.values():
|
|
traverse(root)
|
|
pb.increment(trees=len(trees))
|
|
pb.finish()
|
|
|
|
|
|
with Path("graph.db").open("w") as f:
|
|
pb = progressbar.ProgressBar(
|
|
0, len(trees), widgets=make_widgets("saving graph", False, False)
|
|
)
|
|
for key, tree in trees.items():
|
|
note = get_note(tree.id)
|
|
is_root = tree.id in roots
|
|
f.write(f"{tree.id}\t")
|
|
f.write(",".join((reply.id for reply in tree.replies)))
|
|
f.write(f"\t")
|
|
f.write(",".join((renote.id for renote in tree.renotes)))
|
|
f.write(f"\t")
|
|
flags = []
|
|
if tree.id in roots:
|
|
flags.append("root")
|
|
if note.user_id == user_id:
|
|
flags.append("self")
|
|
f.write(",".join(flags))
|
|
f.write(f"\n")
|
|
pb.increment()
|
|
pb.finish()
|