mirror of
https://iceshrimp.dev/limepotato/jormungandr-bite.git
synced 2025-01-10 23:51:01 -07:00
94 lines
1.8 KiB
TypeScript
94 lines
1.8 KiB
TypeScript
const MeCab = require('mecab-async');
|
|
|
|
import Post from '../../api/models/post';
|
|
import User from '../../api/models/user';
|
|
import config from '../../conf';
|
|
|
|
const mecab = new MeCab();
|
|
if (config.analysis.mecab_command) mecab.command = config.analysis.mecab_command;
|
|
|
|
function tokenize(text: string) {
|
|
const tokens = mecab.parseSync(text)
|
|
// キーワードのみ
|
|
.filter(token => token[1] == '名詞' && (token[2] == '固有名詞' || token[2] == '一般'))
|
|
// 取り出し
|
|
.map(token => token[0]);
|
|
|
|
return tokens;
|
|
}
|
|
|
|
// Fetch all users
|
|
User.find({}, {
|
|
fields: {
|
|
_id: true
|
|
}
|
|
}).then(users => {
|
|
let i = -1;
|
|
|
|
const x = cb => {
|
|
if (++i == users.length) return cb();
|
|
extractKeywordsOne(users[i]._id, () => x(cb));
|
|
};
|
|
|
|
x(() => {
|
|
console.log('complete');
|
|
});
|
|
});
|
|
|
|
async function extractKeywordsOne(id, cb) {
|
|
console.log(`extracting keywords of ${id} ...`);
|
|
|
|
// Fetch recent posts
|
|
const recentPosts = await Post.find({
|
|
user_id: id,
|
|
text: {
|
|
$exists: true
|
|
}
|
|
}, {
|
|
sort: {
|
|
_id: -1
|
|
},
|
|
limit: 1000,
|
|
fields: {
|
|
_id: false,
|
|
text: true
|
|
}
|
|
});
|
|
|
|
// 投稿が少なかったら中断
|
|
if (recentPosts.length < 10) {
|
|
return cb();
|
|
}
|
|
|
|
const keywords = {};
|
|
|
|
// Extract keywords from recent posts
|
|
recentPosts.forEach(post => {
|
|
const keywordsOfPost = tokenize(post.text);
|
|
|
|
keywordsOfPost.forEach(keyword => {
|
|
if (keywords[keyword]) {
|
|
keywords[keyword]++;
|
|
} else {
|
|
keywords[keyword] = 1;
|
|
}
|
|
});
|
|
});
|
|
|
|
// Sort keywords by frequency
|
|
const keywordsSorted = Object.keys(keywords).sort((a, b) => keywords[b] - keywords[a]);
|
|
|
|
// Lookup top 10 keywords
|
|
const topKeywords = keywordsSorted.slice(0, 10);
|
|
|
|
process.stdout.write(' >>> ' + topKeywords.join(' '));
|
|
|
|
// Save
|
|
User.update({ _id: id }, {
|
|
$set: {
|
|
keywords: topKeywords
|
|
}
|
|
}).then(() => {
|
|
cb();
|
|
});
|
|
}
|