string-overlap-matching-degree

计算字符串重叠/匹配度计算

目前為 2024-08-06 提交的版本,檢視 最新版本

此腳本不應該直接安裝,它是一個供其他腳本使用的函式庫。欲使用本函式庫,請在腳本 metadata 寫上: // @require https://update.gf.qytechs.cn/scripts/501646/1422792/string-overlap-matching-degree.js

/**
 * 重叠匹配度
 * @author: zhuangjie
 * @date: 2024-07-23
 */
function overlapMatchingDegreeForObjectArray(keyword = "", objArr = [], fun = (obj) => [], {sort = "desc",onlyHasScope =false,scopeForObjArrContainer} = {}) {
    const scopeForData = objArr.map(item => overlapMatchingDegree(keyword, fun(item), sort));
    // scope与 objArr 同步排序
    sortAndSync(scopeForData, objArr)
    if(Array.isArray(scopeForObjArrContainer)) {
        // 说明需要分数,倒给
        scopeForObjArrContainer.push(...scopeForData)
    }
    return onlyHasScope ? filterMismatchItem(scopeForData,objArr) : objArr;
}

// 根据scopeForData得到新数组objArr
function filterMismatchItem(scopeForData,objArr) {
    const result = []
    for(let [scope,index] of scopeForData.entries()) {
        if(scope != 0) {
            result.push(objArr[index])
        }
    }
    return result
}

/**
 * 计算匹配度外层封装工具
 * @param {string} keyword - 匹配字符串1
 * @param {Object | Arrayy} topicWeighs - 匹配字符串2与它的权重
 * @returns {number} 匹配度分数
 */
function overlapMatchingDegree(keyword, topicWeighs = {}, sort = "desc") {
    // 兼容topicWeighs为topic数组,元素越靠前权重越高
    if (Array.isArray(topicWeighs)) {
        const _temp = {}
        if (sort === "asc") {
            for (let i = 1; i <= topicWeighs.length; i++) {
                _temp[topicWeighs[i - 1]] = i;
            }
        } else {
            // desc
            for (let i = topicWeighs.length; i > 0; i--) {
                _temp[topicWeighs[topicWeighs.length - i]] = i;
            }
        }
        topicWeighs = _temp;
    }
    let topicList = Object.keys(topicWeighs)
    // topic map 得分
    const topicScores = topicList.map(topic => {
        let topicScore = 0; // 得分
        const currentTopicScore = topicWeighs[topic]; // 当前topic“个数”
        const overlapLengthBlocksMap = findOverlapBlocks(keyword, topic)
        const overlapLengthKeys = Object.keys(overlapLengthBlocksMap);
        for (let overlapLengthKey of overlapLengthKeys) {
            const currentLengthBlocks = overlapLengthBlocksMap[overlapLengthKey];
            topicScore = Math.pow(currentTopicScore, overlapLengthKey) * currentLengthBlocks.length;
        }
        return topicScore;
    })
    // 算总分返回
    return topicScores.reduce((a, b) => a + b, 0)
}

/**
 * 查找重叠匹配块(入口函数)
 * @param {*} str1 
 * @param {*} str2 
 * @returns 返回重叠块 如:{"2": ["好用","推荐"],"3": ["好用推荐"]}
 * 算法核心思想:
 * -----------------------------------------------------
 * sumatrapdf*          | sumatrapdf*      | sumatrapdf*
 *           pdf-       |          pdf-    |         pdf-
 * ------------------------------------------------------
 */
function findOverlapBlocks(str1 = "", str2 = "") {
    let maxStr2OffsetLength = str1.length - 1;
    let minStr2OffsetLength = -(str2.length - 1);
    const alignmentHub = { /* "3": ["好用","推荐"] */ }
    for (let currentStr2OffsetLength = maxStr2OffsetLength; currentStr2OffsetLength >= minStr2OffsetLength; currentStr2OffsetLength--) {
        let str1EndIndex = str1.length - 1;
        let str2EndIndex = currentStr2OffsetLength + (str2.length - 1);

        let overlappingStart = currentStr2OffsetLength >= 0 ? currentStr2OffsetLength : 0; // 重叠闭区间开始 [
        let overlappingEnd = str2EndIndex < str1EndIndex ? str2EndIndex : str1EndIndex; // 重叠闭区间结束 ]

        // 对齐
        const alignmentContent = alignment(str1.substring(overlappingStart, overlappingEnd + 1), str2.substring(overlappingStart - currentStr2OffsetLength, overlappingEnd - currentStr2OffsetLength + 1));
        // 长重叠 覆盖 短重叠
        longOverlappingCoverage(alignmentHub, alignmentContent);
    }
    return alignmentHub;
}
function longOverlappingCoverage(alignmentHub = {}, alignmentContent = {}) {
    const modifiedCols = Object.keys(alignmentContent)
    const maxmodifiedCols = Math.max(...modifiedCols)
    const minmodifiedCols = Math.min(...modifiedCols)
    // 合并到对应的列上
    modifiedCols.forEach(col => {
        alignmentHub[col] = alignmentHub[col] ? Array.from(new Set(alignmentHub[col].concat(alignmentContent[col]))) : alignmentContent[col];
    })
    const alignmentHubCol = Object.keys(alignmentHub)
    const gtmodifiedCols = alignmentHubCol.filter(col => parseFloat(col) > maxmodifiedCols);
    const ltmodifiedCols = alignmentHubCol.filter(col => parseFloat(col) < minmodifiedCols);
    // 重新计算各列,过滤掉在modifiedCols的`大于modifiedCols的列`的子串,过滤掉`小于modifiedCols的列`在modifiedCols的子串
    // - 过滤掉在modifiedCols的`大于modifiedCols的列`的子串
    colElementFilter(alignmentHub, modifiedCols, gtmodifiedCols);
    // - 过滤掉`小于modifiedCols的列`在modifiedCols的子串
    colElementFilter(alignmentHub, ltmodifiedCols, modifiedCols);

}
// 列元素过滤
function colElementFilter(alignmentHub = {}, cols = [], gtCols = []) {
    if (cols.length == 0 || gtCols.length == 0) return;
    gtCols.forEach(gtCol => {
        const gtOverlappingBlocks = alignmentHub[gtCol];
        for (let [index, col] of cols.entries()) {
            const overlappingBlocks = alignmentHub[col];
            if (gtOverlappingBlocks == undefined || overlappingBlocks == undefined) continue;;
            alignmentHub[col] = overlappingBlocks.filter(overlappingBlock => {
                for (let gtOverlappingBlock of gtOverlappingBlocks) {
                    if (gtOverlappingBlock.includes(overlappingBlock)) {
                        return false
                    }
                }
                return true;
            })
        }
    })
    // 清理掉value为空数组项
    // {1: [],2:["好用"]} -to-> {2:["好用"]}
    Object.keys(alignmentHub).forEach(key => { if (alignmentHub[key].length == 0) delete alignmentHub[key] });
}
// 对齐
function alignment(str1 = "", str2 = "") {
    // 返回 {"1",["我","用"],"2":["推荐","可以"]}
    if (str1.length != str2.length) {
        throw new Error("算法错误,对齐字符串长度不一致");
    }
    const overlappingBlocks = {}
    let currentOverlappingBlock = "";
    for (let i = str1.length - 1; i >= 0; i--) {
        if (str1[i] == str2[i]) {
            currentOverlappingBlock = str1[i] + currentOverlappingBlock;
            if (i - 1 >= 0) continue;
        }
        if (currentOverlappingBlock.length == 0) {
            continue;
        } else {
            // 块收集
            let currentOverlappingBlockContainer = overlappingBlocks[currentOverlappingBlock.length]
            if (currentOverlappingBlockContainer == undefined) {
                currentOverlappingBlockContainer = overlappingBlocks[currentOverlappingBlock.length] = [currentOverlappingBlock]
            } else if (!currentOverlappingBlockContainer.includes(currentOverlappingBlock)) {
                currentOverlappingBlockContainer.push(currentOverlappingBlock)
            }
        }
        currentOverlappingBlock = "";
    }
    return overlappingBlocks;
}

// 【同步算法-堆实现】
function sortAndSync(arr1, arr2, order = 'desc') {
    if (arr1.length !== arr2.length) {
        throw new Error("Both arrays must have the same length");
    }
    function swap(arr, i, j) {
        [arr[i], arr[j]] = [arr[j], arr[i]];
    }
    function heapify(arr1, arr2, n, i, order) {
        let largest = i;
        let left = 2 * i + 1;
        let right = 2 * i + 2;

        if (order === 'asc') {
            if (left < n && arr1[left] > arr1[largest]) {
                largest = left;
            }
            if (right < n && arr1[right] > arr1[largest]) {
                largest = right;
            }
        } else {

            if (left < n && arr1[left] < arr1[largest]) {
                largest = left;
            }
            if (right < n && arr1[right] < arr1[largest]) {
                largest = right;
            }
        }

        if (largest !== i) {
            swap(arr1, i, largest);
            swap(arr2, i, largest);
            heapify(arr1, arr2, n, largest, order);
        }
    }
    function buildHeap(arr1, arr2, n, order) {
        for (let i = Math.floor(n / 2) - 1; i >= 0; i--) {
            heapify(arr1, arr2, n, i, order);
        }
    }
    function heapSort(arr1, arr2, order) {
        let n = arr1.length;
        buildHeap(arr1, arr2, n, order);

        for (let i = n - 1; i > 0; i--) {
            swap(arr1, 0, i);
            swap(arr2, 0, i);
            heapify(arr1, arr2, i, 0, order);
        }
    }
    heapSort(arr1, arr2, order);
}

// 【算法测试1】
//  console.log("-- 算法测试开始 --")
//  console.log(findOverlapBlocks("[推荐]sumatrapdf非常好用","pdf 推荐"))
//  console.log("-- 算法测试结束 --")

// 【算法测试2】
// console.log("匹配度:", overlapMatchingDegree("好用的pdf工具", { "sumatrapdf": 10, "小而好用的pdf阅读器": 8, "https://www.sumatrapdfreader.org/downloadafter": 3 }));

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址