From e405d348ed95ddd762675fc554ba1d1c0fec5697 Mon Sep 17 00:00:00 2001 From: Laura Hausmann Date: Sat, 18 Nov 2023 02:57:16 +0100 Subject: [PATCH] [backend] Implement basic tokenizer for postgres FTS --- .../server/api/common/generate-fts-query.ts | 88 ++++++++++++++----- 1 file changed, 67 insertions(+), 21 deletions(-) diff --git a/packages/backend/src/server/api/common/generate-fts-query.ts b/packages/backend/src/server/api/common/generate-fts-query.ts index de86cf9f3..830fb3e9d 100644 --- a/packages/backend/src/server/api/common/generate-fts-query.ts +++ b/packages/backend/src/server/api/common/generate-fts-query.ts @@ -23,61 +23,99 @@ const filters = { "filter": miscFilter, "-filter": miscFilterInverse, "has": attachmentFilter, -} as Record, search: string) => any> +} as Record, search: string, id: number) => any> -//TODO: (phrase OR phrase2) should be treated as an OR part of the query -//TODO: "phrase with multiple words" should be treated as one term //TODO: editing the query should be possible, clicking search again resets it (it should be a twitter-like top of the page kind of deal) +//TODO: new filters are missing from the filter dropdown, and said dropdown should always show (remove the searchFilters meta prop), also we should fix the null bug export function generateFtsQuery(query: SelectQueryBuilder, q: string): void { const components = q.split(" "); const terms: string[] = []; + let finalTerms: string[] = []; + let counter = 0; for (const component of components) { const split = component.split(":"); if (split.length > 1 && filters[split[0]] !== undefined) - filters[split[0]](query, split.slice(1).join(":")); + filters[split[0]](query, split.slice(1).join(":"), counter++); else terms.push(component); } - for (const term of terms) { - if (term.startsWith('-')) query.andWhere("note.text NOT ILIKE :q", { q: `%${sqlLikeEscape(term.substring(1))}%` }); - else query.andWhere("note.text ILIKE :q", { q: `%${sqlLikeEscape(term)}%` }); + let idx = 0; + let state: 'idle' | 'quote' | 'parenthesis' = 'idle'; + for (let i = 0; i < terms.length; i++) { + if (state === 'idle') { + if (terms[i].startsWith('"')) { + idx = i; + state = 'quote'; + } else if (terms[i].startsWith('(')) { + idx = i; + state = 'parenthesis'; + } + else { + finalTerms.push(terms[i]); + } + } + else if (state === 'quote' && terms[i].endsWith('"')) { + finalTerms.push(extractToken(terms, idx, i)); + state = 'idle'; + } else if (state === 'parenthesis' && terms[i].endsWith(')')) { + query.andWhere(new Brackets(qb => { + for (const term of extractToken(terms, idx, i).split(' OR ')) { + const id = counter++; + qb.orWhere(`note.text ILIKE :q_${id}`); + query.setParameter(`q_${id}`, `%${sqlLikeEscape(term)}%`); + } + })); + state = 'idle'; + } + } + + if (state != "idle") { + finalTerms.push(...extractToken(terms, idx, terms.length - 1, false).substring(1).split(' ')); + } + + for (const term of finalTerms) { + const id = counter++; + if (term.startsWith('-')) query.andWhere(`note.text NOT ILIKE :q_${id}`); + else query.andWhere(`note.text ILIKE :q_${id}`); + + query.setParameter(`q_${id}`, `%${sqlLikeEscape(term.substring(term.startsWith('-') ? 1 : 0))}%`); } } -function fromFilter(query: SelectQueryBuilder, filter: string) { - const userQuery = generateUserSubquery(filter); +function fromFilter(query: SelectQueryBuilder, filter: string, id: number) { + const userQuery = generateUserSubquery(filter, id); query.andWhere(`note.userId = (${userQuery.getQuery()})`); query.setParameters(userQuery.getParameters()); } -function fromFilterInverse(query: SelectQueryBuilder, filter: string) { - const userQuery = generateUserSubquery(filter); +function fromFilterInverse(query: SelectQueryBuilder, filter: string, id: number) { + const userQuery = generateUserSubquery(filter, id); query.andWhere(`note.userId <> (${userQuery.getQuery()})`); query.setParameters(userQuery.getParameters()); } -function mentionFilter(query: SelectQueryBuilder, filter: string) { - const userQuery = generateUserSubquery(filter); +function mentionFilter(query: SelectQueryBuilder, filter: string, id: number) { + const userQuery = generateUserSubquery(filter, id); query.andWhere(`note.mentions @> array[(${userQuery.getQuery()})]`); query.setParameters(userQuery.getParameters()); } -function mentionFilterInverse(query: SelectQueryBuilder, filter: string) { - const userQuery = generateUserSubquery(filter); +function mentionFilterInverse(query: SelectQueryBuilder, filter: string, id: number) { + const userQuery = generateUserSubquery(filter, id); query.andWhere(`NOT (note.mentions @> array[(${userQuery.getQuery()})])`); query.setParameters(userQuery.getParameters()); } -function replyFilter(query: SelectQueryBuilder, filter: string) { - const userQuery = generateUserSubquery(filter); +function replyFilter(query: SelectQueryBuilder, filter: string, id: number) { + const userQuery = generateUserSubquery(filter, id); query.andWhere(`note.replyUserId = (${userQuery.getQuery()})`); query.setParameters(userQuery.getParameters()); } -function replyFilterInverse(query: SelectQueryBuilder, filter: string) { - const userQuery = generateUserSubquery(filter); +function replyFilterInverse(query: SelectQueryBuilder, filter: string, id: number) { + const userQuery = generateUserSubquery(filter, id); query.andWhere(`note.replyUserId <> (${userQuery.getQuery()})`); query.setParameters(userQuery.getParameters()); } @@ -148,10 +186,9 @@ function attachmentFilter(query: SelectQueryBuilder, filter: string) { } } -function generateUserSubquery(filter: string) { +function generateUserSubquery(filter: string, id: number) { if (filter.startsWith('@')) filter = filter.substring(1); const split = filter.split('@'); - const id = Buffer.from(filter).toString('hex'); const query = Users.createQueryBuilder('user') .select('user.id') @@ -165,3 +202,12 @@ function generateUserSubquery(filter: string) { return query; } + +function extractToken(array: string[], start: number, end: number, trim: boolean = true) { + const slice = array.slice(start, end+1).join(" "); + return trim ? trimStartAndEnd(slice) : slice; +} + +function trimStartAndEnd(str: string) { + return str.substring(1, str.length - 1); +}