mirror of
https://iceshrimp.dev/limepotato/jormungandr-bite.git
synced 2025-01-25 06:41:36 -07:00
[backend] Implement basic tokenizer for postgres FTS
This commit is contained in:
parent
a88d581413
commit
e405d348ed
1 changed files with 67 additions and 21 deletions
|
@ -23,61 +23,99 @@ const filters = {
|
||||||
"filter": miscFilter,
|
"filter": miscFilter,
|
||||||
"-filter": miscFilterInverse,
|
"-filter": miscFilterInverse,
|
||||||
"has": attachmentFilter,
|
"has": attachmentFilter,
|
||||||
} as Record<string, (query: SelectQueryBuilder<any>, search: string) => any>
|
} as Record<string, (query: SelectQueryBuilder<any>, search: string, id: number) => any>
|
||||||
|
|
||||||
//TODO: (phrase OR phrase2) should be treated as an OR part of the query
|
|
||||||
//TODO: "phrase with multiple words" should be treated as one term
|
|
||||||
//TODO: editing the query should be possible, clicking search again resets it (it should be a twitter-like top of the page kind of deal)
|
//TODO: editing the query should be possible, clicking search again resets it (it should be a twitter-like top of the page kind of deal)
|
||||||
|
//TODO: new filters are missing from the filter dropdown, and said dropdown should always show (remove the searchFilters meta prop), also we should fix the null bug
|
||||||
|
|
||||||
export function generateFtsQuery(query: SelectQueryBuilder<any>, q: string): void {
|
export function generateFtsQuery(query: SelectQueryBuilder<any>, q: string): void {
|
||||||
const components = q.split(" ");
|
const components = q.split(" ");
|
||||||
const terms: string[] = [];
|
const terms: string[] = [];
|
||||||
|
let finalTerms: string[] = [];
|
||||||
|
let counter = 0;
|
||||||
|
|
||||||
for (const component of components) {
|
for (const component of components) {
|
||||||
const split = component.split(":");
|
const split = component.split(":");
|
||||||
if (split.length > 1 && filters[split[0]] !== undefined)
|
if (split.length > 1 && filters[split[0]] !== undefined)
|
||||||
filters[split[0]](query, split.slice(1).join(":"));
|
filters[split[0]](query, split.slice(1).join(":"), counter++);
|
||||||
else terms.push(component);
|
else terms.push(component);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const term of terms) {
|
let idx = 0;
|
||||||
if (term.startsWith('-')) query.andWhere("note.text NOT ILIKE :q", { q: `%${sqlLikeEscape(term.substring(1))}%` });
|
let state: 'idle' | 'quote' | 'parenthesis' = 'idle';
|
||||||
else query.andWhere("note.text ILIKE :q", { q: `%${sqlLikeEscape(term)}%` });
|
for (let i = 0; i < terms.length; i++) {
|
||||||
|
if (state === 'idle') {
|
||||||
|
if (terms[i].startsWith('"')) {
|
||||||
|
idx = i;
|
||||||
|
state = 'quote';
|
||||||
|
} else if (terms[i].startsWith('(')) {
|
||||||
|
idx = i;
|
||||||
|
state = 'parenthesis';
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
finalTerms.push(terms[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (state === 'quote' && terms[i].endsWith('"')) {
|
||||||
|
finalTerms.push(extractToken(terms, idx, i));
|
||||||
|
state = 'idle';
|
||||||
|
} else if (state === 'parenthesis' && terms[i].endsWith(')')) {
|
||||||
|
query.andWhere(new Brackets(qb => {
|
||||||
|
for (const term of extractToken(terms, idx, i).split(' OR ')) {
|
||||||
|
const id = counter++;
|
||||||
|
qb.orWhere(`note.text ILIKE :q_${id}`);
|
||||||
|
query.setParameter(`q_${id}`, `%${sqlLikeEscape(term)}%`);
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
state = 'idle';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state != "idle") {
|
||||||
|
finalTerms.push(...extractToken(terms, idx, terms.length - 1, false).substring(1).split(' '));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const term of finalTerms) {
|
||||||
|
const id = counter++;
|
||||||
|
if (term.startsWith('-')) query.andWhere(`note.text NOT ILIKE :q_${id}`);
|
||||||
|
else query.andWhere(`note.text ILIKE :q_${id}`);
|
||||||
|
|
||||||
|
query.setParameter(`q_${id}`, `%${sqlLikeEscape(term.substring(term.startsWith('-') ? 1 : 0))}%`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function fromFilter(query: SelectQueryBuilder<any>, filter: string) {
|
function fromFilter(query: SelectQueryBuilder<any>, filter: string, id: number) {
|
||||||
const userQuery = generateUserSubquery(filter);
|
const userQuery = generateUserSubquery(filter, id);
|
||||||
query.andWhere(`note.userId = (${userQuery.getQuery()})`);
|
query.andWhere(`note.userId = (${userQuery.getQuery()})`);
|
||||||
query.setParameters(userQuery.getParameters());
|
query.setParameters(userQuery.getParameters());
|
||||||
}
|
}
|
||||||
|
|
||||||
function fromFilterInverse(query: SelectQueryBuilder<any>, filter: string) {
|
function fromFilterInverse(query: SelectQueryBuilder<any>, filter: string, id: number) {
|
||||||
const userQuery = generateUserSubquery(filter);
|
const userQuery = generateUserSubquery(filter, id);
|
||||||
query.andWhere(`note.userId <> (${userQuery.getQuery()})`);
|
query.andWhere(`note.userId <> (${userQuery.getQuery()})`);
|
||||||
query.setParameters(userQuery.getParameters());
|
query.setParameters(userQuery.getParameters());
|
||||||
}
|
}
|
||||||
|
|
||||||
function mentionFilter(query: SelectQueryBuilder<any>, filter: string) {
|
function mentionFilter(query: SelectQueryBuilder<any>, filter: string, id: number) {
|
||||||
const userQuery = generateUserSubquery(filter);
|
const userQuery = generateUserSubquery(filter, id);
|
||||||
query.andWhere(`note.mentions @> array[(${userQuery.getQuery()})]`);
|
query.andWhere(`note.mentions @> array[(${userQuery.getQuery()})]`);
|
||||||
query.setParameters(userQuery.getParameters());
|
query.setParameters(userQuery.getParameters());
|
||||||
}
|
}
|
||||||
|
|
||||||
function mentionFilterInverse(query: SelectQueryBuilder<any>, filter: string) {
|
function mentionFilterInverse(query: SelectQueryBuilder<any>, filter: string, id: number) {
|
||||||
const userQuery = generateUserSubquery(filter);
|
const userQuery = generateUserSubquery(filter, id);
|
||||||
query.andWhere(`NOT (note.mentions @> array[(${userQuery.getQuery()})])`);
|
query.andWhere(`NOT (note.mentions @> array[(${userQuery.getQuery()})])`);
|
||||||
query.setParameters(userQuery.getParameters());
|
query.setParameters(userQuery.getParameters());
|
||||||
}
|
}
|
||||||
|
|
||||||
function replyFilter(query: SelectQueryBuilder<any>, filter: string) {
|
function replyFilter(query: SelectQueryBuilder<any>, filter: string, id: number) {
|
||||||
const userQuery = generateUserSubquery(filter);
|
const userQuery = generateUserSubquery(filter, id);
|
||||||
query.andWhere(`note.replyUserId = (${userQuery.getQuery()})`);
|
query.andWhere(`note.replyUserId = (${userQuery.getQuery()})`);
|
||||||
query.setParameters(userQuery.getParameters());
|
query.setParameters(userQuery.getParameters());
|
||||||
}
|
}
|
||||||
|
|
||||||
function replyFilterInverse(query: SelectQueryBuilder<any>, filter: string) {
|
function replyFilterInverse(query: SelectQueryBuilder<any>, filter: string, id: number) {
|
||||||
const userQuery = generateUserSubquery(filter);
|
const userQuery = generateUserSubquery(filter, id);
|
||||||
query.andWhere(`note.replyUserId <> (${userQuery.getQuery()})`);
|
query.andWhere(`note.replyUserId <> (${userQuery.getQuery()})`);
|
||||||
query.setParameters(userQuery.getParameters());
|
query.setParameters(userQuery.getParameters());
|
||||||
}
|
}
|
||||||
|
@ -148,10 +186,9 @@ function attachmentFilter(query: SelectQueryBuilder<any>, filter: string) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function generateUserSubquery(filter: string) {
|
function generateUserSubquery(filter: string, id: number) {
|
||||||
if (filter.startsWith('@')) filter = filter.substring(1);
|
if (filter.startsWith('@')) filter = filter.substring(1);
|
||||||
const split = filter.split('@');
|
const split = filter.split('@');
|
||||||
const id = Buffer.from(filter).toString('hex');
|
|
||||||
|
|
||||||
const query = Users.createQueryBuilder('user')
|
const query = Users.createQueryBuilder('user')
|
||||||
.select('user.id')
|
.select('user.id')
|
||||||
|
@ -165,3 +202,12 @@ function generateUserSubquery(filter: string) {
|
||||||
|
|
||||||
return query;
|
return query;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function extractToken(array: string[], start: number, end: number, trim: boolean = true) {
|
||||||
|
const slice = array.slice(start, end+1).join(" ");
|
||||||
|
return trim ? trimStartAndEnd(slice) : slice;
|
||||||
|
}
|
||||||
|
|
||||||
|
function trimStartAndEnd(str: string) {
|
||||||
|
return str.substring(1, str.length - 1);
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue