Wikidata Episode Generator

Creates QuickStatements for Wikidata episode items from Wikipedia episode lists

当前为 2022-03-08 提交的版本,查看 最新版本

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Greasemonkey 油猴子Violentmonkey 暴力猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Violentmonkey 暴力猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Userscripts ,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展后才能安装此脚本。

(我已经安装了用户脚本管理器,让我安装!)

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

(我已经安装了用户样式管理器,让我安装!)

// ==UserScript==
// @name         Wikidata Episode Generator
// @version      0.4.0
// @description  Creates QuickStatements for Wikidata episode items from Wikipedia episode lists
// @author       CennoxX
// @contact      [email protected]
// @namespace    https://greasyfork.org/users/21515
// @homepage     https://github.com/CennoxX/userscripts
// @supportURL   https://github.com/CennoxX/userscripts/issues/new?title=[enwiki%20Episode%20Generator]%20
// @match        https://en.wikipedia.org/wiki/*
// @connect      www.wikidata.org
// @connect      www.imdb.com
// @connect      www.fernsehserien.de
// @icon         https://www.google.com/s2/favicons?sz=64&domain=www.wikidata.org
// @grant        GM.xmlHttpRequest
// @grant        GM.setClipboard
// @grant        GM.registerMenuCommand
// @license      MIT
// ==/UserScript==
/* jshint esversion: 10 */
/* eslint quotes: ["warn", "double", "avoid-escape"]*/

(function() {
    "use strict";
    GM.registerMenuCommand("convert episode lists for Wikidata",
                           (async()=>{
        console.clear();
        var article = document.title.split(" – Wikipedia")[0];
        var response = await fetch(`/w/api.php?action=query&prop=revisions|pageprops&titles=${encodeURIComponent(article)}&rvslots=*&rvprop=content&formatversion=2&format=json`);
        var data = await response.json();
        var articletext = Object.values(data.query.pages)[0].revisions[0].slots.main.content.split("Specials")[0];
        var subArticles = articletext.match(/{{:(.*)}}/g);
        if (subArticles != null){
            console.log("loading sub articles from Wikipedia…");

            for (var sub of subArticles){
                response = await fetch(`/w/api.php?action=query&prop=revisions|pageprops&titles=${encodeURIComponent(sub.replace(/{{:|}}/g,""))}&rvslots=*&rvprop=content&formatversion=2&format=json`);
                var subData = await response.json();
                var subtext = Object.values(subData.query.pages)[0].revisions[0].slots.main.content;
                articletext = articletext.replace(sub, subtext);
            }
        }

        var wikibaseId = Object.values(data.query.pages)[0].pageprops.wikibase_item;
        response = await GM.xmlHttpRequest({
            method: "GET",
            url: `https://www.wikidata.org/w/api.php?action=wbgetentities&props=claims&sitefilter=dewiki&ids=${wikibaseId}&format=json`,
            onload: function(response) {
                return response;
            }
        });
        var jsonObj = Object.values((JSON.parse(response.responseText)).entities)[0];
        var seriesId = 0;
        if (article.split("List of ").length==2){
            seriesId = jsonObj.claims.P360[0].qualifiers.P179[0].datavalue.value.id;
        }else if(article.split("season").length==2){
            seriesId = jsonObj.claims.P179[0].mainsnak.datavalue.value.id;
        }else{
            seriesId = wikibaseId;
        }
        response = await GM.xmlHttpRequest({
            method: "GET",
            url: `https://www.wikidata.org/w/api.php?action=wbgetentities&props=sitelinks|claims|labels&sitefilter=dewiki&ids=${seriesId}&format=json`,
            onload: function(response) {
                return response;
            }
        });
        jsonObj = Object.values((JSON.parse(response.responseText)).entities)[0];
        var series = jsonObj.labels.de.value;
        var seriesEn = jsonObj.labels.en.value;
        var seriesNl = jsonObj.labels.nl?.value ?? jsonObj.labels.en.value;
        var networkId = jsonObj.claims.P449[0].mainsnak.datavalue.value.id;
        var imdbId;
        if (jsonObj.claims.hasOwnProperty("P345")){
            imdbId = jsonObj.claims.P345[0].mainsnak.datavalue.value;
        }else{
            imdbId = prompt("IMDb-Id");
        }
        var fsId;
        if (jsonObj.claims.hasOwnProperty("P5327")){
            fsId = jsonObj.claims.P5327[0].mainsnak.datavalue.value;
        }else{
            fsId = prompt("Fernsehserien.de-Id");
        }
        var originalLanguageId = jsonObj.claims.P364[0].mainsnak.datavalue.value.id;
        var originalCountryId = jsonObj.claims.P495[0].mainsnak.datavalue.value.id;
        var seasons = jsonObj.claims.P527.sort((a,b) => a.qualifiers.P1545[0].datavalue.value - b.qualifiers.P1545[0].datavalue.value).map(i => i.mainsnak.datavalue.value.id);
        if (location.href.includes("season")){
            var epSeason = document.title.match(/season (\d+)\)/)[1];
            seasons = jsonObj.claims.P527.filter(i => i.qualifiers.P1545[0].datavalue.value==Number(epSeason)).map(i => i.mainsnak.datavalue.value.id);
        }
        var wikilinks = [];
        var eps = articletext.split(/{{Episode list.*\n/).map(i => i.split(/\n}}\n/)[0]).slice(1);
        for (var doubleEpText of eps.filter(i => i.match(/=.*<hr>.*\n/))){
            var doubleEpIndex = eps.indexOf(doubleEpText);
            doubleEpText = doubleEpText.replace(/(Title *= )\[\[.*\|(.*)\]\]/igm,"$1$2");
            doubleEpText = doubleEpText.replace(/(Title *= )\[\[(.*)\]\]/igm,"$1$2");
            eps.splice(doubleEpIndex, 0, doubleEpText.replace(/<hr>.*/g,"").replace(/(Title *= *.*)/ig,"$1, part 1"));
            eps[++doubleEpIndex]=doubleEpText.replace(/=.*<hr>/g,"=").replace(/(Title *= *.*)/ig,"$1, part 2");
        }
        var episodes = eps.map(i => {
            wikilinks = wikilinks.concat([...i.matchAll(/\[\[(.*?)\]\]/g)].map(i => i[1].split("|")[0]));
            if (i.match("OriginalAirDate *= *(\.+) *\n")==null){console.log("ERROR: OriginalAirDate\n",i);}
            if (i.match("DirectedBy *= *(\.+) *\n")==null){console.log("ERROR: DirectedBy\n",i);}
            if (i.match("WrittenBy *= *(\.+) *\n")==null){console.log("ERROR: WrittenBy\n",i);}
            return {
                "NR_GES": (i.match("EpisodeNumber *= *(\\d+) *\n")??["",(console.log("ERROR: EpisodeNumber\n",i),prompt("EpisodeNumber\n"+i.match("EpisodeNumber.*\n")))])[1],
                "NR_ST": (i.match("EpisodeNumber2 *= *(\\d+) *\n")??["",(console.log("ERROR: EpisodeNumber\n",i),prompt("EpisodeNumber2\n"+i.match("EpisodeNumber2.*\n")))])[1],
                "OT": (i.match("Title *= *(\.+) *\n")??["",(console.log("ERROR: EpisodeNumber\n",i),prompt("OT\n"+i.match("OT.*\n")))])[1].replace(/<!--.*?-->/i,""),
                "EA": getDate(i.match("OriginalAirDate *= *(\.+) *\n")[1]),
                "REG": [...i.match("DirectedBy_?1?2? *= *(\.+) *\n")[1].matchAll(new RegExp(wikilinks.join("|"),"g"))].map(i => i[0]).filter(i => i != ""),
                "DRB": [...i.match("WrittenBy_?1?2? *= *(\.+) *\n")[1].matchAll(new RegExp(wikilinks.join("|"),"g"))].map(i => i[0]).filter(i => i != "")
            };
        });
        var seasonId = 0;
        var episodeId = 0;
        var wikipediaLink = location.href.split("#")[0];
        var output = "";

        episodes.forEach(i => {
            if (Number(i.NR_ST)<episodeId){
                seasonId++;
            }
            i.season=seasonId;
            episodeId=i.NR_ST;
        });
        if (fsId){
            await GetFSLabels(fsId, episodes);
        }
        if (imdbId){
            await GetIMDbIds(imdbId, episodes);
        }
        console.log("loading Wikipedia article links from Wikidata…");
        episodes.forEach(ep=>{
            ep.DRBid = [];
            ep.REGid = [];
            ep.OTid = "";
            ep.DRB.forEach(async(drb)=>{
                response = await fetch(`/w/api.php?action=query&prop=pageprops&ppprop=wikibase_item&redirects=1&titles=${encodeURIComponent(drb)}&format=json`);
                data = await response.json();
                if (Object.values(data.query.pages)[0].pageprops != null){
                    ep.DRBid.push(Object.values(data.query.pages)[0].pageprops.wikibase_item);
                }
            });
            ep.REG.forEach(async(reg)=>{
                response = await fetch(`/w/api.php?action=query&prop=pageprops&ppprop=wikibase_item&redirects=1&titles=${encodeURIComponent(reg)}&format=json`);
                data = await response.json();
                if (Object.values(data.query.pages)[0].pageprops != null){
                    ep.REGid.push(Object.values(data.query.pages)[0].pageprops.wikibase_item);
                }
            });
            if (ep.OT.match(/\[\[.*\]\]/)!=null){
                var ot = ep.OT;
                ot = ot.match(/\[\[(.*)\]\]/)[1];
                ot = ot.replace(/\|.*/,"");
                (async()=>{
                    response = await fetch(`/w/api.php?action=query&prop=pageprops&ppprop=wikibase_item&redirects=1&titles=${encodeURIComponent(ot)}&format=json`);
                    data = await response.json();
                    if (Object.values(data.query.pages)[0].pageprops != null){
                        ep.OTid=Object.values(data.query.pages)[0].pageprops.wikibase_item;
                    }
                })();
            };
        });
        var stopInterval = setInterval(()=>{
            var results = 0;
            var drbnr = 0;
            var regnr = 0;
            var otnr = 0;
            episodes.forEach(i=> {
                results+=i.DRBid.length+i.REGid.length+(i.OTid!="")?1:0;
            });
            var requests = 0;
            episodes.forEach(i=>{
                requests+=i.DRB.length+i.REG.length+(i.OT.match(/\[\[.*\]\]/)!=null)?1:0;
            });
            if (requests == 0 || results == requests){
                clearInterval(stopInterval);
                //write CREATE-Statements, get DRB and REG
                episodes.forEach(ep => {
                    ep.OT = ep.OT.replace(/\[\[/,"").replace(/\]\]/,"").trim();
                    if (ep.OT.includes("|")){
                        ep.OT = ep.OT.split(/\|/)[1];
                    }
                    ep.OT = ep.OT.replace(/^(.*?)(?=...) ?[,:\-–]? \(?(?:part )?(\d+)\)?/i,"$1, part $2");
                    var epText = `CREATE
LAST	Len	"${ep.OT}"
`
                    if (ep.hasOwnProperty("DT")){
                        ep.DT = ep.DT.replace(/^(.*?)(?=...) ?[,:\-–]? \(?(?:Teil )?(\d+)\)?/i,"$1 – Teil $2");
                        epText += `LAST	Lde	"${ep.DT}"
`;
                    }
                    epText +=`LAST	P1476	en:"${ep.OT}"
LAST	Den	"episode of ${seriesEn}"
LAST	Dde	"Folge von ${series}"
LAST	Dnl	"aflevering van ${seriesNl}"
LAST	P31	Q21191270	S143	Q328	S4656	"${wikipediaLink}"
LAST	P179	${seriesId}	P1545	"${ep.NR_GES}"	S143	Q328	S4656	"${wikipediaLink}"
LAST	P4908	${seasons[ep.season]}	P1545	"${ep.NR_ST}"	S143	Q328	S4656	"${wikipediaLink}"
LAST	P449	${networkId}	S143	Q328	S4656	"${wikipediaLink}"
LAST	P364	${originalLanguageId}	S143	Q328	S4656	"${wikipediaLink}"
LAST	P495	${originalCountryId}	S143	Q328	S4656	"${wikipediaLink}"
LAST	P577	+${ep.EA}T00:00:00Z/11	P291	Q30	S143	Q328	S4656	"${wikipediaLink}"
`;
                    if (ep.hasOwnProperty("imdb")){
                        epText += `LAST	P345	"${ep.imdb}"
`;
                    }
                    ep.REGid.forEach(reg=> {
                        epText += `LAST	P57	${reg}	S143	Q328	S4656	"${wikipediaLink}"
`;});
                    ep.DRBid.forEach(drb=> {
                        epText += `LAST	P58	${drb}	S143	Q328	S4656	"${wikipediaLink}"
`;});
                    if (ep.OTid!=""){
                        epText = epText.replace(/LAST\sDen.*\nLAST\sDde.*\n/,"");
                        epText = epText.replace(/(CREATE\n)?LAST/g,ep.OTid);
                    }
                    output += epText;
                });
                console.log(output);
                GM.setClipboard(output);
            }
        },1000);
    }),"w");
    function getDate(episodeDate){
        return episodeDate.replace(/{{Start date\|(\d+)\|(\d+)\|(\d+)}}.*/i,"$1-$2-$3").replace(/-(\d)\b/g,"-0$1");
    }
    function compareString(title){
        if (!title){
            return null;
        }
        return title.trim().toLowerCase().replace(/\(?(?:part)? ?(\d+?)\)?$/i, "$1").replace(/&/i, "and").replace(/^the |^a |[\u200B-\u200D\uFEFF]| |\.|'|’|\(|\)|:|,|‚|\?|!|„|“|"|‘|…|\.|—|–|-/gi,"");
    }
    function levenshteinDistance(str1, str2){
        if (!str1 || !str2){
            return 100;
        }
        var track = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null));
        for (let i = 0; i <= str1.length; i += 1){
            track[0][i] = i;
        }
        for (let j = 0; j <= str2.length; j += 1){
            track[j][0] = j;
        }
        for (let j = 1; j <= str2.length; j += 1){
            for (let i = 1; i <= str1.length; i += 1){
                var indicator = str1[i - 1] === str2[j - 1] ? 0 : 1;
                track[j][i] = Math.min(
                    track[j][i - 1] + 1,
                    track[j - 1][i] + 1,
                    track[j - 1][i - 1] + indicator,
                );
            }
        }
        return track[str2.length][str1.length];
    }
    async function GetIMDbIds(imdbId, episodes){
        console.log("loading IDs from IMDb…");
        var imdbIds = [];
        var startEp = 1;
        var allEps = 0;
        do{
            var response = await GM.xmlHttpRequest({
                method: "GET",
                url: `https://www.imdb.com/search/title/?series=${imdbId}&view=simple&sort=release_date,asc&count=250&start=${startEp}`,
                onload: function(response) {
                    return response;
                }
            });
            var parser = new DOMParser();
            var xmlDoc = parser.parseFromString(response.responseText,"text/html");
            imdbIds = imdbIds.concat([...xmlDoc.querySelectorAll(".lister-item-header a:nth-child(5)")].map(i => {return {"title": i.innerText, "id": i.href.split("/")[4], "nr": i.parentElement.parentElement.querySelector(".text-primary").innerText.replace(".","")}}));
            allEps = xmlDoc.querySelector(".desc>span").innerText.split(" ")[2];
            startEp = startEp + 250;
        } while (imdbIds.length < allEps);

        episodes.forEach(ep => {
            var ot = ep.OT;
            if (ot.match(/\[\[.*\]\]/)!=null){
                ot = ot.match(/\[\[(.*)\]\]/)[1];
                ot = ot.replace(/.*\|/,"");
            }
            var imdbId = imdbIds.filter(id => compareString(id.title) == compareString(ot));
            if (imdbId.length == 1){
                ep.imdb = imdbId[0].id;
            }else{
                var matchedEp = imdbIds.reduce(function(prev, curr) {
                    return levenshteinDistance(compareString(prev.title), compareString(ot)) < levenshteinDistance(compareString(curr.title), compareString(ot)) ? prev : curr;
                });
                var match;
                if (ep.NR_GES != matchedEp.nr){
                    var epSeason;
                    if (location.href.includes("season")){
                        epSeason = document.title.match(/season (\d+)\)/)[1];
                    }else{
                        epSeason = ep.season+1
                    }
                    match = confirm(`fuzzy match?
WP: #${ep.NR_GES} / ${epSeason}x${(ep.NR_ST.length==1?"0":"")}${ep.NR_ST} ${ot}
IMDb: #${matchedEp.nr} ${matchedEp.title}`);
                }
                if (ep.NR_GES == matchedEp.nr || match){
                    ep.imdb = matchedEp.id;
                }
            }
        });
    };
    async function GetFSLabels(fsId, episodes){
        console.log("loading German labels from Fernsehserien.de…");
        var fsLabels = [];
        var response = await GM.xmlHttpRequest({
            method: "GET",
            url: `https://www.fernsehserien.de/${fsId}/episodenguide`,
            onload: function(response) {
                return response;
            }
        });
        var parser = new DOMParser();
        var xmlDoc = parser.parseFromString(response.responseText,"text/html");
        fsLabels = [...xmlDoc.querySelectorAll("a[data-event-category=liste-episoden]")].map(a => {return{"Lde": a.querySelector("div:nth-child(7)>span").innerText, "Len": a.querySelector("div:nth-child(7)>span.episodenliste-schmal")?.innerText, "nr": a.querySelector("div:nth-child(2)")?.firstChild?.nodeValue, "epNr": a.querySelector("span:nth-child(1)").innerText.replace(".","x")}});
        episodes.forEach(ep => {
            var ot = ep.OT;
            if (ot.match(/\[\[.*\]\]/)!=null){
                ot = ot.match(/\[\[(.*)\]\]/)[1];
                ot = ot.replace(/.*\|/,"");
            }
            var fsLabel = fsLabels.filter(id => compareString(id.Len) == compareString(ot));
            if (fsLabel.length == 1){
                if (fsLabel[0].Lde != "–"){
                    ep.DT = fsLabel[0].Lde;
                }
            }else{
                var matchedEp = fsLabels.reduce(function(prev, curr) {
                    return levenshteinDistance(compareString(prev.Len), compareString(ot)) < levenshteinDistance(compareString(curr.Len), compareString(ot)) ? prev : curr;
                });
                var epSeason;
                if (location.href.includes("season")){
                    epSeason = document.title.match(/season (\d+)\)/)[1];
                }else{
                    epSeason = ep.season+1
                }
                var epNr = epSeason + "x" + (ep.NR_ST.length==1?"0":"") + ep.NR_ST;
                if (matchedEp.Lde != "–"){
                    if (ep.NR_GES != matchedEp.nr && epNr != matchedEp.epNr){
                        var message = `WP: #${ep.NR_GES} / ${epNr} ${ot}
FS: #${matchedEp.nr} / ${matchedEp.epNr} ${matchedEp.Len}`
                        if (confirm("fuzzy match?\n"+message)){
                            ep.DT = matchedEp.Lde;
                            message = "matched:\n" + message;
                        }else{
                            message = "not matched:\n" + message;
                        }
                        console.log(message);
                    }else{
                        ep.DT = matchedEp.Lde;
                    }
                }
            }
        });
    };
})();