/**
* 重叠匹配度
* @author: zhuangjie
* @date: 2024-07-23
*/
function overlapMatchingDegreeForObjectArray(keyword = "", objArr = [], fun = (obj) => [], {sort = "desc",onlyHasScope =false,scopeForObjArrContainer} = {}) {
const scopeForData = objArr.map(item => overlapMatchingDegree(keyword, fun(item), sort));
// scope与 objArr 同步排序
sortAndSync(scopeForData, objArr)
if(Array.isArray(scopeForObjArrContainer)) {
// 说明需要分数,倒给
scopeForObjArrContainer.push(...scopeForData)
}
return onlyHasScope ? filterMismatchItem(scopeForData,objArr) : objArr;
}
// 根据scopeForData得到新数组objArr
function filterMismatchItem(scopeForData,objArr) {
const result = []
for(let [scope,index] of scopeForData.entries()) {
if(scope != 0) {
result.push(objArr[index])
}
}
return result
}
/**
* 计算匹配度外层封装工具
* @param {string} keyword - 匹配字符串1
* @param {Object | Arrayy} topicWeighs - 匹配字符串2与它的权重
* @returns {number} 匹配度分数
*/
function overlapMatchingDegree(keyword, topicWeighs = {}, sort = "desc") {
// 兼容topicWeighs为topic数组,元素越靠前权重越高
if (Array.isArray(topicWeighs)) {
const _temp = {}
if (sort === "asc") {
for (let i = 1; i <= topicWeighs.length; i++) {
_temp[topicWeighs[i - 1]] = i;
}
} else {
// desc
for (let i = topicWeighs.length; i > 0; i--) {
_temp[topicWeighs[topicWeighs.length - i]] = i;
}
}
topicWeighs = _temp;
}
let topicList = Object.keys(topicWeighs)
// topic map 得分
const topicScores = topicList.map(topic => {
let topicScore = 0; // 得分
const currentTopicScore = topicWeighs[topic]; // 当前topic“个数”
const overlapLengthBlocksMap = findOverlapBlocks(keyword, topic)
const overlapLengthKeys = Object.keys(overlapLengthBlocksMap);
for (let overlapLengthKey of overlapLengthKeys) {
const currentLengthBlocks = overlapLengthBlocksMap[overlapLengthKey];
topicScore = Math.pow(currentTopicScore, overlapLengthKey) * currentLengthBlocks.length;
}
return topicScore;
})
// 算总分返回
return topicScores.reduce((a, b) => a + b, 0)
}
/**
* 查找重叠匹配块(入口函数)
* @param {*} str1
* @param {*} str2
* @returns 返回重叠块 如:{"2": ["好用","推荐"],"3": ["好用推荐"]}
* 算法核心思想:
* -----------------------------------------------------
* sumatrapdf* | sumatrapdf* | sumatrapdf*
* pdf- | pdf- | pdf-
* ------------------------------------------------------
*/
function findOverlapBlocks(str1 = "", str2 = "") {
let maxStr2OffsetLength = str1.length - 1;
let minStr2OffsetLength = -(str2.length - 1);
const alignmentHub = { /* "3": ["好用","推荐"] */ }
for (let currentStr2OffsetLength = maxStr2OffsetLength; currentStr2OffsetLength >= minStr2OffsetLength; currentStr2OffsetLength--) {
let str1EndIndex = str1.length - 1;
let str2EndIndex = currentStr2OffsetLength + (str2.length - 1);
let overlappingStart = currentStr2OffsetLength >= 0 ? currentStr2OffsetLength : 0; // 重叠闭区间开始 [
let overlappingEnd = str2EndIndex < str1EndIndex ? str2EndIndex : str1EndIndex; // 重叠闭区间结束 ]
// 对齐
const alignmentContent = alignment(str1.substring(overlappingStart, overlappingEnd + 1), str2.substring(overlappingStart - currentStr2OffsetLength, overlappingEnd - currentStr2OffsetLength + 1));
// 长重叠 覆盖 短重叠
longOverlappingCoverage(alignmentHub, alignmentContent);
}
return alignmentHub;
}
function longOverlappingCoverage(alignmentHub = {}, alignmentContent = {}) {
const modifiedCols = Object.keys(alignmentContent)
const maxmodifiedCols = Math.max(...modifiedCols)
const minmodifiedCols = Math.min(...modifiedCols)
// 合并到对应的列上
modifiedCols.forEach(col => {
alignmentHub[col] = alignmentHub[col] ? Array.from(new Set(alignmentHub[col].concat(alignmentContent[col]))) : alignmentContent[col];
})
const alignmentHubCol = Object.keys(alignmentHub)
const gtmodifiedCols = alignmentHubCol.filter(col => parseFloat(col) > maxmodifiedCols);
const ltmodifiedCols = alignmentHubCol.filter(col => parseFloat(col) < minmodifiedCols);
// 重新计算各列,过滤掉在modifiedCols的`大于modifiedCols的列`的子串,过滤掉`小于modifiedCols的列`在modifiedCols的子串
// - 过滤掉在modifiedCols的`大于modifiedCols的列`的子串
colElementFilter(alignmentHub, modifiedCols, gtmodifiedCols);
// - 过滤掉`小于modifiedCols的列`在modifiedCols的子串
colElementFilter(alignmentHub, ltmodifiedCols, modifiedCols);
}
// 列元素过滤
function colElementFilter(alignmentHub = {}, cols = [], gtCols = []) {
if (cols.length == 0 || gtCols.length == 0) return;
gtCols.forEach(gtCol => {
const gtOverlappingBlocks = alignmentHub[gtCol];
for (let [index, col] of cols.entries()) {
const overlappingBlocks = alignmentHub[col];
if (gtOverlappingBlocks == undefined || overlappingBlocks == undefined) continue;;
alignmentHub[col] = overlappingBlocks.filter(overlappingBlock => {
for (let gtOverlappingBlock of gtOverlappingBlocks) {
if (gtOverlappingBlock.includes(overlappingBlock)) {
return false
}
}
return true;
})
}
})
// 清理掉value为空数组项
// {1: [],2:["好用"]} -to-> {2:["好用"]}
Object.keys(alignmentHub).forEach(key => { if (alignmentHub[key].length == 0) delete alignmentHub[key] });
}
// 对齐
function alignment(str1 = "", str2 = "") {
// 返回 {"1",["我","用"],"2":["推荐","可以"]}
if (str1.length != str2.length) {
throw new Error("算法错误,对齐字符串长度不一致");
}
const overlappingBlocks = {}
let currentOverlappingBlock = "";
for (let i = str1.length - 1; i >= 0; i--) {
if (str1[i] == str2[i]) {
currentOverlappingBlock = str1[i] + currentOverlappingBlock;
if (i - 1 >= 0) continue;
}
if (currentOverlappingBlock.length == 0) {
continue;
} else {
// 块收集
let currentOverlappingBlockContainer = overlappingBlocks[currentOverlappingBlock.length]
if (currentOverlappingBlockContainer == undefined) {
currentOverlappingBlockContainer = overlappingBlocks[currentOverlappingBlock.length] = [currentOverlappingBlock]
} else if (!currentOverlappingBlockContainer.includes(currentOverlappingBlock)) {
currentOverlappingBlockContainer.push(currentOverlappingBlock)
}
}
currentOverlappingBlock = "";
}
return overlappingBlocks;
}
// 【同步算法-堆实现】
function sortAndSync(arr1, arr2, order = 'desc') {
if (arr1.length !== arr2.length) {
throw new Error("Both arrays must have the same length");
}
function swap(arr, i, j) {
[arr[i], arr[j]] = [arr[j], arr[i]];
}
function heapify(arr1, arr2, n, i, order) {
let largest = i;
let left = 2 * i + 1;
let right = 2 * i + 2;
if (order === 'asc') {
if (left < n && arr1[left] > arr1[largest]) {
largest = left;
}
if (right < n && arr1[right] > arr1[largest]) {
largest = right;
}
} else {
if (left < n && arr1[left] < arr1[largest]) {
largest = left;
}
if (right < n && arr1[right] < arr1[largest]) {
largest = right;
}
}
if (largest !== i) {
swap(arr1, i, largest);
swap(arr2, i, largest);
heapify(arr1, arr2, n, largest, order);
}
}
function buildHeap(arr1, arr2, n, order) {
for (let i = Math.floor(n / 2) - 1; i >= 0; i--) {
heapify(arr1, arr2, n, i, order);
}
}
function heapSort(arr1, arr2, order) {
let n = arr1.length;
buildHeap(arr1, arr2, n, order);
for (let i = n - 1; i > 0; i--) {
swap(arr1, 0, i);
swap(arr2, 0, i);
heapify(arr1, arr2, i, 0, order);
}
}
heapSort(arr1, arr2, order);
}
// 【算法测试1】
// console.log("-- 算法测试开始 --")
// console.log(findOverlapBlocks("[推荐]sumatrapdf非常好用","pdf 推荐"))
// console.log("-- 算法测试结束 --")
// 【算法测试2】
// console.log("匹配度:", overlapMatchingDegree("好用的pdf工具", { "sumatrapdf": 10, "小而好用的pdf阅读器": 8, "https://www.sumatrapdfreader.org/downloadafter": 3 }));