Wikidata Episode Generator

Creates QuickStatements for Wikidata episode items from Wikipedia episode lists

目前为 2022-03-09 提交的版本。查看 最新版本

// ==UserScript==
// @name         Wikidata Episode Generator
// @version      0.4.1
// @description  Creates QuickStatements for Wikidata episode items from Wikipedia episode lists
// @author       CennoxX
// @contact      [email protected]
// @namespace    https://gf.qytechs.cn/users/21515
// @homepage     https://github.com/CennoxX/userscripts
// @supportURL   https://github.com/CennoxX/userscripts/issues/new?title=[enwiki%20Episode%20Generator]%20
// @match        https://en.wikipedia.org/wiki/*
// @connect      www.wikidata.org
// @connect      www.imdb.com
// @connect      www.fernsehserien.de
// @icon         https://www.google.com/s2/favicons?sz=64&domain=www.wikidata.org
// @grant        GM.xmlHttpRequest
// @grant        GM.setClipboard
// @grant        GM.registerMenuCommand
// @license      MIT
// ==/UserScript==
/* jshint esversion: 10 */
/* eslint quotes: ["warn", "double", "avoid-escape"]*/

(function() {
    "use strict";
    GM.registerMenuCommand("convert episode lists for Wikidata",
                           (async()=>{
        console.clear();
        var article = document.title.split(" – Wikipedia")[0];
        var response = await fetch(`/w/api.php?action=query&prop=revisions|pageprops&titles=${encodeURIComponent(article)}&rvslots=*&rvprop=content&formatversion=2&format=json`);
        var data = await response.json();
        var articletext = Object.values(data.query.pages)[0].revisions[0].slots.main.content.split("Specials")[0];
        var subArticles = articletext.match(/{{:(.*)}}/g);
        if (subArticles != null){
            console.log("loading sub articles from Wikipedia…");

            for (var sub of subArticles){
                response = await fetch(`/w/api.php?action=query&prop=revisions|pageprops&titles=${encodeURIComponent(sub.replace(/{{:|}}/g,""))}&rvslots=*&rvprop=content&formatversion=2&format=json`);
                var subData = await response.json();
                var subtext = Object.values(subData.query.pages)[0].revisions[0].slots.main.content;
                articletext = articletext.replace(sub, subtext);
            }
        }

        var wikibaseId = Object.values(data.query.pages)[0].pageprops.wikibase_item;
        response = await GM.xmlHttpRequest({
            method: "GET",
            url: `https://www.wikidata.org/w/api.php?action=wbgetentities&props=claims&sitefilter=dewiki&ids=${wikibaseId}&format=json`,
            onload: function(response) {
                return response;
            }
        });
        var jsonObj = Object.values((JSON.parse(response.responseText)).entities)[0];
        var seriesId = 0;
        if (article.split("List of ").length==2){
            seriesId = jsonObj.claims.P360[0].qualifiers.P179[0].datavalue.value.id;
        }else if(article.split("season").length==2){
            seriesId = jsonObj.claims.P179[0].mainsnak.datavalue.value.id;
        }else{
            seriesId = wikibaseId;
        }
        response = await GM.xmlHttpRequest({
            method: "GET",
            url: `https://www.wikidata.org/w/api.php?action=wbgetentities&props=sitelinks|claims|labels&sitefilter=dewiki&ids=${seriesId}&format=json`,
            onload: function(response) {
                return response;
            }
        });
        jsonObj = Object.values((JSON.parse(response.responseText)).entities)[0];
        var series = jsonObj.labels.de.value;
        var seriesEn = jsonObj.labels.en.value;
        var seriesNl = jsonObj.labels.nl?.value ?? jsonObj.labels.en.value;
        var networkId = jsonObj.claims.P449[0].mainsnak.datavalue.value.id;
        var imdbId;
        if (jsonObj.claims.hasOwnProperty("P345")){
            imdbId = jsonObj.claims.P345[0].mainsnak.datavalue.value;
        }else{
            imdbId = prompt("IMDb-Id");
        }
        var fsId;
        if (jsonObj.claims.hasOwnProperty("P5327")){
            fsId = jsonObj.claims.P5327[0].mainsnak.datavalue.value;
        }else{
            fsId = prompt("Fernsehserien.de-Id");
        }
        var originalLanguageId = jsonObj.claims.P364[0].mainsnak.datavalue.value.id;
        var originalCountryId = jsonObj.claims.P495[0].mainsnak.datavalue.value.id;
        var seasons = jsonObj.claims.P527.sort((a,b) => a.qualifiers.P1545[0].datavalue.value - b.qualifiers.P1545[0].datavalue.value).map(i => i.mainsnak.datavalue.value.id);
        if (location.href.includes("season")){
            var epSeason = document.title.match(/season (\d+)\)/)[1];
            seasons = jsonObj.claims.P527.filter(i => i.qualifiers.P1545[0].datavalue.value==Number(epSeason)).map(i => i.mainsnak.datavalue.value.id);
        }
        var wikilinks = [];
        var eps = articletext.split(/{{Episode list.*\n/).map(i => i.split(/\n}}\n/)[0]).slice(1);
        for (var doubleEpText of eps.filter(i => i.match(/=.*<hr>.*\n/))){
            var doubleEpIndex = eps.indexOf(doubleEpText);
            doubleEpText = doubleEpText.replace(/(Title *= )\[\[.*\|(.*)\]\]/igm,"$1$2");
            doubleEpText = doubleEpText.replace(/(Title *= )\[\[(.*)\]\]/igm,"$1$2");
            eps.splice(doubleEpIndex, 0, doubleEpText.replace(/<hr>.*/g,"").replace(/(Title *= *.*)/ig,"$1, part 1"));
            eps[++doubleEpIndex]=doubleEpText.replace(/=.*<hr>/g,"=").replace(/(Title *= *.*)/ig,"$1, part 2");
        }
        var episodes = eps.map(i => {
            wikilinks = wikilinks.concat([...i.matchAll(/\[\[(.*?)\]\]/g)].map(i => i[1].split("|")[0]));
            if (i.match("OriginalAirDate *= *(\.+) *\n")==null){console.log("ERROR: OriginalAirDate\n",i);}
            if (i.match("DirectedBy *= *(\.+) *\n")==null){console.log("ERROR: DirectedBy\n",i);}
            if (i.match("WrittenBy *= *(\.+) *\n")==null){console.log("ERROR: WrittenBy\n",i);}
            return {
                "NR_GES": (i.match("EpisodeNumber *= *(\\d+) *\n")??["",(console.log("ERROR: EpisodeNumber\n",i),prompt("EpisodeNumber\n"+i.match("EpisodeNumber.*\n")))])[1],
                "NR_ST": (i.match("EpisodeNumber2 *= *(\\d+) *\n")??["",(console.log("ERROR: EpisodeNumber\n",i),prompt("EpisodeNumber2\n"+i.match("EpisodeNumber2.*\n")))])[1],
                "OT": (i.match("Title *= *(\.+) *\n")??["",(console.log("ERROR: EpisodeNumber\n",i),prompt("OT\n"+i.match("OT.*\n")))])[1].replace(/<!--.*?-->/i,""),
                "EA": getDate(i.match("OriginalAirDate *= *(\.+) *\n")[1]),
                "REG": [...i.match("DirectedBy_?1?2? *= *(\.+) *\n")[1].matchAll(new RegExp(wikilinks.join("|"),"g"))].map(i => i[0]).filter(i => i != ""),
                "DRB": [...i.match("WrittenBy_?1?2? *= *(\.+) *\n")[1].matchAll(new RegExp(wikilinks.join("|"),"g"))].map(i => i[0]).filter(i => i != "")
            };
        });
        var seasonId = 0;
        var episodeId = 0;
        var wikipediaLink = location.href.split("#")[0];
        var output = "";

        episodes.forEach(i => {
            if (Number(i.NR_ST)<episodeId){
                seasonId++;
            }
            i.season=seasonId;
            episodeId=i.NR_ST;
        });
        if (fsId){
            await GetFSLabels(fsId, episodes);
        }
        if (imdbId){
            await GetIMDbIds(imdbId, episodes);
        }
        console.log("loading Wikipedia article links from Wikidata…");
        episodes.forEach(ep=>{
            ep.DRBid = [];
            ep.REGid = [];
            ep.OTid = "";
            ep.DRB.forEach(async(drb)=>{
                response = await fetch(`/w/api.php?action=query&prop=pageprops&ppprop=wikibase_item&redirects=1&titles=${encodeURIComponent(drb)}&format=json`);
                data = await response.json();
                if (Object.values(data.query.pages)[0].pageprops != null){
                    ep.DRBid.push(Object.values(data.query.pages)[0].pageprops.wikibase_item);
                }
            });
            ep.REG.forEach(async(reg)=>{
                response = await fetch(`/w/api.php?action=query&prop=pageprops&ppprop=wikibase_item&redirects=1&titles=${encodeURIComponent(reg)}&format=json`);
                data = await response.json();
                if (Object.values(data.query.pages)[0].pageprops != null){
                    ep.REGid.push(Object.values(data.query.pages)[0].pageprops.wikibase_item);
                }
            });
            if (ep.OT.match(/\[\[.*\]\]/)!=null){
                var ot = ep.OT;
                ot = ot.match(/\[\[(.*)\]\]/)[1];
                ot = ot.replace(/\|.*/,"");
                (async()=>{
                    response = await fetch(`/w/api.php?action=query&prop=pageprops&ppprop=wikibase_item&redirects=1&titles=${encodeURIComponent(ot)}&format=json`);
                    data = await response.json();
                    if (Object.values(data.query.pages)[0].pageprops != null){
                        ep.OTid=Object.values(data.query.pages)[0].pageprops.wikibase_item;
                    }
                })();
            };
        });
        var stopInterval = setInterval(()=>{
            var results = 0;
            var drbnr = 0;
            var regnr = 0;
            var otnr = 0;
            episodes.forEach(i=> {
                results+=i.DRBid.length+i.REGid.length+(i.OTid!="")?1:0;
            });
            var requests = 0;
            episodes.forEach(i=>{
                requests+=i.DRB.length+i.REG.length+(i.OT.match(/\[\[.*\]\]/)!=null)?1:0;
            });
            if (requests == 0 || results == requests){
                clearInterval(stopInterval);
                //write CREATE-Statements, get DRB and REG
                episodes.forEach(ep => {
                    ep.OT = ep.OT.replace(/\[\[/,"").replace(/\]\]/,"").trim();
                    if (ep.OT.includes("|")){
                        ep.OT = ep.OT.split(/\|/)[1];
                    }
                    ep.OT = ep.OT.replace(/^(.*?)(?=[^\d]{2}.) ?[,:\-–]? \(?(?:part )?(\d+)\)?/i,"$1, part $2");
                    var epText = `CREATE
LAST	Len	"${ep.OT}"
`
                    if (ep.hasOwnProperty("DT")){
                        ep.DT = ep.DT.replace(/^(.*?)(?=[^\d]{2}.) ?[,:\-–]? \(?(?:Teil )?(\d+)\)?/i,"$1 – Teil $2");
                        epText += `LAST	Lde	"${ep.DT}"
`;
                    }
                    epText +=`LAST	P1476	en:"${ep.OT}"
LAST	Den	"episode of ${seriesEn}"
LAST	Dde	"Folge von ${series}"
LAST	Dnl	"aflevering van ${seriesNl}"
LAST	P31	Q21191270	S143	Q328	S4656	"${wikipediaLink}"
LAST	P179	${seriesId}	P1545	"${ep.NR_GES}"	S143	Q328	S4656	"${wikipediaLink}"
LAST	P4908	${seasons[ep.season]}	P1545	"${ep.NR_ST}"	S143	Q328	S4656	"${wikipediaLink}"
LAST	P449	${networkId}	S143	Q328	S4656	"${wikipediaLink}"
LAST	P364	${originalLanguageId}	S143	Q328	S4656	"${wikipediaLink}"
LAST	P495	${originalCountryId}	S143	Q328	S4656	"${wikipediaLink}"
LAST	P577	+${ep.EA}T00:00:00Z/11	P291	Q30	S143	Q328	S4656	"${wikipediaLink}"
`;
                    if (ep.hasOwnProperty("imdb")){
                        epText += `LAST	P345	"${ep.imdb}"
`;
                    }
                    ep.REGid.forEach(reg=> {
                        epText += `LAST	P57	${reg}	S143	Q328	S4656	"${wikipediaLink}"
`;});
                    ep.DRBid.forEach(drb=> {
                        epText += `LAST	P58	${drb}	S143	Q328	S4656	"${wikipediaLink}"
`;});
                    if (ep.OTid!=""){
                        epText = epText.replace(/LAST\sDen.*\nLAST\sDde.*\n/,"");
                        epText = epText.replace(/(CREATE\n)?LAST/g,ep.OTid);
                    }
                    output += epText;
                });
                console.log(output);
                GM.setClipboard(output);
            }
        },1000);
    }),"w");
    function getDate(episodeDate){
        return episodeDate.replace(/{{Start date\|(\d+)\|(\d+)\|(\d+)}}.*/i,"$1-$2-$3").replace(/-(\d)\b/g,"-0$1");
    }
    function compareString(title){
        if (!title){
            return null;
        }
        return title.trim().toLowerCase().replace(/\(?(?:part)? ?(\d+?)\)?$/i, "$1").replace(/&/i, "and").replace(/^the |^a |[\u200B-\u200D\uFEFF]| |\.|'|’|\(|\)|:|,|‚|\?|!|„|“|"|‘|…|\.|—|–|-/gi,"");
    }
    function levenshteinDistance(str1, str2){
        if (!str1 || !str2){
            return 100;
        }
        var track = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null));
        for (let i = 0; i <= str1.length; i += 1){
            track[0][i] = i;
        }
        for (let j = 0; j <= str2.length; j += 1){
            track[j][0] = j;
        }
        for (let j = 1; j <= str2.length; j += 1){
            for (let i = 1; i <= str1.length; i += 1){
                var indicator = str1[i - 1] === str2[j - 1] ? 0 : 1;
                track[j][i] = Math.min(
                    track[j][i - 1] + 1,
                    track[j - 1][i] + 1,
                    track[j - 1][i - 1] + indicator,
                );
            }
        }
        return track[str2.length][str1.length];
    }
    async function GetIMDbIds(imdbId, episodes){
        console.log("loading IDs from IMDb…");
        var imdbIds = [];
        var startEp = 1;
        var allEps = 0;
        do{
            var response = await GM.xmlHttpRequest({
                method: "GET",
                url: `https://www.imdb.com/search/title/?series=${imdbId}&view=simple&sort=release_date,asc&count=250&start=${startEp}`,
                onload: function(response) {
                    return response;
                }
            });
            var parser = new DOMParser();
            var xmlDoc = parser.parseFromString(response.responseText,"text/html");
            imdbIds = imdbIds.concat([...xmlDoc.querySelectorAll(".lister-item-header a:nth-child(5)")].map(i => {return {"title": i.innerText, "id": i.href.split("/")[4], "nr": i.parentElement.parentElement.querySelector(".text-primary").innerText.replace(".","")}}));
            allEps = xmlDoc.querySelector(".desc>span").innerText.split(" ")[2];
            startEp = startEp + 250;
        } while (imdbIds.length < allEps);

        episodes.forEach(ep => {
            var ot = ep.OT;
            if (ot.match(/\[\[.*\]\]/)!=null){
                ot = ot.match(/\[\[(.*)\]\]/)[1];
                ot = ot.replace(/.*\|/,"");
            }
            var imdbId = imdbIds.filter(id => compareString(id.title) == compareString(ot));
            if (imdbId.length == 1){
                ep.imdb = imdbId[0].id;
            }else{
                var matchedEp = imdbIds.reduce(function(prev, curr) {
                    return levenshteinDistance(compareString(prev.title), compareString(ot)) < levenshteinDistance(compareString(curr.title), compareString(ot)) ? prev : curr;
                });
                var match;
                if (ep.NR_GES != matchedEp.nr){
                    var epSeason;
                    if (location.href.includes("season")){
                        epSeason = document.title.match(/season (\d+)\)/)[1];
                    }else{
                        epSeason = ep.season+1
                    }
                    match = confirm(`fuzzy match?
WP: #${ep.NR_GES} / ${epSeason}x${(ep.NR_ST.length==1?"0":"")}${ep.NR_ST} ${ot}
IMDb: #${matchedEp.nr} ${matchedEp.title}`);
                }
                if (ep.NR_GES == matchedEp.nr || match){
                    ep.imdb = matchedEp.id;
                }
            }
        });
    };
    async function GetFSLabels(fsId, episodes){
        console.log("loading German labels from Fernsehserien.de…");
        var fsLabels = [];
        var response = await GM.xmlHttpRequest({
            method: "GET",
            url: `https://www.fernsehserien.de/${fsId}/episodenguide`,
            onload: function(response) {
                return response;
            }
        });
        var parser = new DOMParser();
        var xmlDoc = parser.parseFromString(response.responseText,"text/html");
        fsLabels = [...xmlDoc.querySelectorAll("a[data-event-category=liste-episoden]")].map(a => {return{"Lde": a.querySelector("div:nth-child(7)>span").innerText, "Len": a.querySelector("div:nth-child(7)>span.episodenliste-schmal")?.innerText, "nr": a.querySelector("div:nth-child(2)")?.firstChild?.nodeValue, "epNr": a.querySelector("span:nth-child(1)").innerText.replace(".","x")}});
        episodes.forEach(ep => {
            var ot = ep.OT;
            if (ot.match(/\[\[.*\]\]/)!=null){
                ot = ot.match(/\[\[(.*)\]\]/)[1];
                ot = ot.replace(/.*\|/,"");
            }
            var fsLabel = fsLabels.filter(id => compareString(id.Len) == compareString(ot));
            if (fsLabel.length == 1){
                if (fsLabel[0].Lde != "–"){
                    ep.DT = fsLabel[0].Lde;
                }
            }else{
                var matchedEp = fsLabels.reduce(function(prev, curr) {
                    return levenshteinDistance(compareString(prev.Len), compareString(ot)) < levenshteinDistance(compareString(curr.Len), compareString(ot)) ? prev : curr;
                });
                var epSeason;
                if (location.href.includes("season")){
                    epSeason = document.title.match(/season (\d+)\)/)[1];
                }else{
                    epSeason = ep.season+1
                }
                var epNr = epSeason + "x" + (ep.NR_ST.length==1?"0":"") + ep.NR_ST;
                if (matchedEp.Lde != "–"){
                    if (ep.NR_GES != matchedEp.nr && epNr != matchedEp.epNr){
                        var message = `WP: #${ep.NR_GES} / ${epNr} ${ot}
FS: #${matchedEp.nr} / ${matchedEp.epNr} ${matchedEp.Len}`
                        if (confirm("fuzzy match?\n"+message)){
                            ep.DT = matchedEp.Lde;
                            message = "matched:\n" + message;
                        }else{
                            message = "not matched:\n" + message;
                        }
                        console.log(message);
                    }else{
                        ep.DT = matchedEp.Lde;
                    }
                }
            }
        });
    };
})();

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址