collocates分析

类似coca和antconc的搭配词分析功能,由于没有找到ntlk ngram window_size的JavaScript版替代,所以自己简单写了一个。

目前為 2024-07-04 提交的版本,檢視 最新版本

// ==UserScript==
// @name         collocates分析
// @namespace    http://tampermonkey.net/
// @version      2024-07-04
// @description  类似coca和antconc的搭配词分析功能,由于没有找到ntlk ngram window_size的JavaScript版替代,所以自己简单写了一个。
// @author       You
// @include      *://*
// @icon         https://www.google.com/s2/favicons?domain=english-corpora.org
// @grant        GM_notification
// @license MIT
// ==/UserScript==

let targetWord = "the";

function countWordsNearAbout(text) {

	text = text.replace(/[.,!;:'()]/g,""); // 去除标点
	const words = text.split(/\s+/);  // 按照空格分割字符串成单词数组
	const wordFrequency = {};

	// 遍历单词数组,查找targetWord并统计其附近5个单词
	for (let i = 0; i < words.length; i++) {
			if (words[i].toLowerCase() === targetWord) {
					// 统计"about"附近前5个和后5个单词
					const start = Math.max(0, i - 5);
					const end = Math.min(words.length, i + 6);
					
					for (let j = start; j < end; j++) {
							if (j !== i) {  // 排除"about"自身
									const word = words[j].toLowerCase();
									if (!wordFrequency[word]) {
											wordFrequency[word] = 0;
									}
									wordFrequency[word]++;
							}
					}
			}
	}

	return wordFrequency;
}

function sortAndFilterWordFrequency(wordFrequency) {
	// 将对象转换为数组,按频率排序并过滤掉频率小于5的单词
	const sortedWords = Object.entries(wordFrequency)
			.filter(([word, count]) => count >= 5)
			.sort((a, b) => b[1] - a[1]);

	// 输出结果
	sortedWords.forEach(([word, count]) => {
			console.log(`${word}: ${count}`);
	});
	console.log('输出完毕,仅输出频率大于5的')
}



// 示例文本
const text = "This is an aaa aaa example sentence about something aaa. Let's talk about what we know about JavaScript. How about we learn more about it?  for Garmann's Summer, written and illustrated by Stian Hole and translated from Norwegian by Don Bartlett; Amulet Books This is an aaa aaa example sentence about something aaa. Let's talk about what we know about JavaScript. How about we learn more about it?  for Garmann's Summer, written and illustrated by Stian Hole and translated from Norwegian by Don Bartlett; Amulet Books This is an aaa aaa example sentence about something aaa. Let's talk about what we know about JavaScript. How about we learn more about it?  for Garmann's Summer, written and illustrated by Stian Hole and translated from Norwegian by Don Bartlett; Amulet Books This is an aaa aaa example sentence about something aaa. Let's talk about what we know about JavaScript. How about we learn more about it?  for Garmann's Summer, written and illustrated by Stian Hole and translated from Norwegian by Don Bartlett; Amulet Books";




let keyProcess = (e) => {
	// if (location.href.includes("english-corpora")) { // 直接修改match了,不这样麻烦判断了
	// 	ifWindow = document.getElementsByName("x3")[0].contentWindow;
	// } else{
	// 	ifWindow = window;
	// }
	if (e.altKey && e.keyCode === 80) { // alt和80P
		let selectText = window.getSelection().toString();
		const wordFrequency = countWordsNearAbout(selectText);
		sortAndFilterWordFrequency(wordFrequency);
	}
	if (e.altKey && e.keyCode === 79) { // alt和O
		targetWord = window.getSelection().toString();
		GM_notification({
			text: '查找的词:' + targetWord,
			timeout: 2000 // 通知显示时间,单位为毫秒
		});
	}
}

document.addEventListener('keydown', keyProcess)

// // 特殊处理coca iframe
// setInterval(() => {
// 	document.getElementsByName("x3")[0].contentWindow.document.addEventListener('keydown', keyProcess)
// }, 3000);

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址