Wikidata Episode Generator

Creates QuickStatements for Wikidata episode items from Wikipedia episode lists

Fra og med 08.03.2022. Se den nyeste version.

You will need to install an extension such as Tampermonkey, Greasemonkey or Violentmonkey to install this script.

You will need to install an extension such as Tampermonkey to install this script.

You will need to install an extension such as Tampermonkey or Violentmonkey to install this script.

You will need to install an extension such as Tampermonkey or Userscripts to install this script.

You will need to install an extension such as Tampermonkey to install this script.

You will need to install a user script manager extension to install this script.

(I already have a user script manager, let me install it!)

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

(I already have a user style manager, let me install it!)

// ==UserScript==
// @name         Wikidata Episode Generator
// @version      0.4.0
// @description  Creates QuickStatements for Wikidata episode items from Wikipedia episode lists
// @author       CennoxX
// @contact      [email protected]
// @namespace    https://greasyfork.org/users/21515
// @homepage     https://github.com/CennoxX/userscripts
// @supportURL   https://github.com/CennoxX/userscripts/issues/new?title=[enwiki%20Episode%20Generator]%20
// @match        https://en.wikipedia.org/wiki/*
// @connect      www.wikidata.org
// @connect      www.imdb.com
// @connect      www.fernsehserien.de
// @icon         https://www.google.com/s2/favicons?sz=64&domain=www.wikidata.org
// @grant        GM.xmlHttpRequest
// @grant        GM.setClipboard
// @grant        GM.registerMenuCommand
// @license      MIT
// ==/UserScript==
/* jshint esversion: 10 */
/* eslint quotes: ["warn", "double", "avoid-escape"]*/

(function() {
    "use strict";
    GM.registerMenuCommand("convert episode lists for Wikidata",
                           (async()=>{
        console.clear();
        var article = document.title.split(" – Wikipedia")[0];
        var response = await fetch(`/w/api.php?action=query&prop=revisions|pageprops&titles=${encodeURIComponent(article)}&rvslots=*&rvprop=content&formatversion=2&format=json`);
        var data = await response.json();
        var articletext = Object.values(data.query.pages)[0].revisions[0].slots.main.content.split("Specials")[0];
        var subArticles = articletext.match(/{{:(.*)}}/g);
        if (subArticles != null){
            console.log("loading sub articles from Wikipedia…");

            for (var sub of subArticles){
                response = await fetch(`/w/api.php?action=query&prop=revisions|pageprops&titles=${encodeURIComponent(sub.replace(/{{:|}}/g,""))}&rvslots=*&rvprop=content&formatversion=2&format=json`);
                var subData = await response.json();
                var subtext = Object.values(subData.query.pages)[0].revisions[0].slots.main.content;
                articletext = articletext.replace(sub, subtext);
            }
        }

        var wikibaseId = Object.values(data.query.pages)[0].pageprops.wikibase_item;
        response = await GM.xmlHttpRequest({
            method: "GET",
            url: `https://www.wikidata.org/w/api.php?action=wbgetentities&props=claims&sitefilter=dewiki&ids=${wikibaseId}&format=json`,
            onload: function(response) {
                return response;
            }
        });
        var jsonObj = Object.values((JSON.parse(response.responseText)).entities)[0];
        var seriesId = 0;
        if (article.split("List of ").length==2){
            seriesId = jsonObj.claims.P360[0].qualifiers.P179[0].datavalue.value.id;
        }else if(article.split("season").length==2){
            seriesId = jsonObj.claims.P179[0].mainsnak.datavalue.value.id;
        }else{
            seriesId = wikibaseId;
        }
        response = await GM.xmlHttpRequest({
            method: "GET",
            url: `https://www.wikidata.org/w/api.php?action=wbgetentities&props=sitelinks|claims|labels&sitefilter=dewiki&ids=${seriesId}&format=json`,
            onload: function(response) {
                return response;
            }
        });
        jsonObj = Object.values((JSON.parse(response.responseText)).entities)[0];
        var series = jsonObj.labels.de.value;
        var seriesEn = jsonObj.labels.en.value;
        var seriesNl = jsonObj.labels.nl?.value ?? jsonObj.labels.en.value;
        var networkId = jsonObj.claims.P449[0].mainsnak.datavalue.value.id;
        var imdbId;
        if (jsonObj.claims.hasOwnProperty("P345")){
            imdbId = jsonObj.claims.P345[0].mainsnak.datavalue.value;
        }else{
            imdbId = prompt("IMDb-Id");
        }
        var fsId;
        if (jsonObj.claims.hasOwnProperty("P5327")){
            fsId = jsonObj.claims.P5327[0].mainsnak.datavalue.value;
        }else{
            fsId = prompt("Fernsehserien.de-Id");
        }
        var originalLanguageId = jsonObj.claims.P364[0].mainsnak.datavalue.value.id;
        var originalCountryId = jsonObj.claims.P495[0].mainsnak.datavalue.value.id;
        var seasons = jsonObj.claims.P527.sort((a,b) => a.qualifiers.P1545[0].datavalue.value - b.qualifiers.P1545[0].datavalue.value).map(i => i.mainsnak.datavalue.value.id);
        if (location.href.includes("season")){
            var epSeason = document.title.match(/season (\d+)\)/)[1];
            seasons = jsonObj.claims.P527.filter(i => i.qualifiers.P1545[0].datavalue.value==Number(epSeason)).map(i => i.mainsnak.datavalue.value.id);
        }
        var wikilinks = [];
        var eps = articletext.split(/{{Episode list.*\n/).map(i => i.split(/\n}}\n/)[0]).slice(1);
        for (var doubleEpText of eps.filter(i => i.match(/=.*<hr>.*\n/))){
            var doubleEpIndex = eps.indexOf(doubleEpText);
            doubleEpText = doubleEpText.replace(/(Title *= )\[\[.*\|(.*)\]\]/igm,"$1$2");
            doubleEpText = doubleEpText.replace(/(Title *= )\[\[(.*)\]\]/igm,"$1$2");
            eps.splice(doubleEpIndex, 0, doubleEpText.replace(/<hr>.*/g,"").replace(/(Title *= *.*)/ig,"$1, part 1"));
            eps[++doubleEpIndex]=doubleEpText.replace(/=.*<hr>/g,"=").replace(/(Title *= *.*)/ig,"$1, part 2");
        }
        var episodes = eps.map(i => {
            wikilinks = wikilinks.concat([...i.matchAll(/\[\[(.*?)\]\]/g)].map(i => i[1].split("|")[0]));
            if (i.match("OriginalAirDate *= *(\.+) *\n")==null){console.log("ERROR: OriginalAirDate\n",i);}
            if (i.match("DirectedBy *= *(\.+) *\n")==null){console.log("ERROR: DirectedBy\n",i);}
            if (i.match("WrittenBy *= *(\.+) *\n")==null){console.log("ERROR: WrittenBy\n",i);}
            return {
                "NR_GES": (i.match("EpisodeNumber *= *(\\d+) *\n")??["",(console.log("ERROR: EpisodeNumber\n",i),prompt("EpisodeNumber\n"+i.match("EpisodeNumber.*\n")))])[1],
                "NR_ST": (i.match("EpisodeNumber2 *= *(\\d+) *\n")??["",(console.log("ERROR: EpisodeNumber\n",i),prompt("EpisodeNumber2\n"+i.match("EpisodeNumber2.*\n")))])[1],
                "OT": (i.match("Title *= *(\.+) *\n")??["",(console.log("ERROR: EpisodeNumber\n",i),prompt("OT\n"+i.match("OT.*\n")))])[1].replace(/<!--.*?-->/i,""),
                "EA": getDate(i.match("OriginalAirDate *= *(\.+) *\n")[1]),
                "REG": [...i.match("DirectedBy_?1?2? *= *(\.+) *\n")[1].matchAll(new RegExp(wikilinks.join("|"),"g"))].map(i => i[0]).filter(i => i != ""),
                "DRB": [...i.match("WrittenBy_?1?2? *= *(\.+) *\n")[1].matchAll(new RegExp(wikilinks.join("|"),"g"))].map(i => i[0]).filter(i => i != "")
            };
        });
        var seasonId = 0;
        var episodeId = 0;
        var wikipediaLink = location.href.split("#")[0];
        var output = "";

        episodes.forEach(i => {
            if (Number(i.NR_ST)<episodeId){
                seasonId++;
            }
            i.season=seasonId;
            episodeId=i.NR_ST;
        });
        if (fsId){
            await GetFSLabels(fsId, episodes);
        }
        if (imdbId){
            await GetIMDbIds(imdbId, episodes);
        }
        console.log("loading Wikipedia article links from Wikidata…");
        episodes.forEach(ep=>{
            ep.DRBid = [];
            ep.REGid = [];
            ep.OTid = "";
            ep.DRB.forEach(async(drb)=>{
                response = await fetch(`/w/api.php?action=query&prop=pageprops&ppprop=wikibase_item&redirects=1&titles=${encodeURIComponent(drb)}&format=json`);
                data = await response.json();
                if (Object.values(data.query.pages)[0].pageprops != null){
                    ep.DRBid.push(Object.values(data.query.pages)[0].pageprops.wikibase_item);
                }
            });
            ep.REG.forEach(async(reg)=>{
                response = await fetch(`/w/api.php?action=query&prop=pageprops&ppprop=wikibase_item&redirects=1&titles=${encodeURIComponent(reg)}&format=json`);
                data = await response.json();
                if (Object.values(data.query.pages)[0].pageprops != null){
                    ep.REGid.push(Object.values(data.query.pages)[0].pageprops.wikibase_item);
                }
            });
            if (ep.OT.match(/\[\[.*\]\]/)!=null){
                var ot = ep.OT;
                ot = ot.match(/\[\[(.*)\]\]/)[1];
                ot = ot.replace(/\|.*/,"");
                (async()=>{
                    response = await fetch(`/w/api.php?action=query&prop=pageprops&ppprop=wikibase_item&redirects=1&titles=${encodeURIComponent(ot)}&format=json`);
                    data = await response.json();
                    if (Object.values(data.query.pages)[0].pageprops != null){
                        ep.OTid=Object.values(data.query.pages)[0].pageprops.wikibase_item;
                    }
                })();
            };
        });
        var stopInterval = setInterval(()=>{
            var results = 0;
            var drbnr = 0;
            var regnr = 0;
            var otnr = 0;
            episodes.forEach(i=> {
                results+=i.DRBid.length+i.REGid.length+(i.OTid!="")?1:0;
            });
            var requests = 0;
            episodes.forEach(i=>{
                requests+=i.DRB.length+i.REG.length+(i.OT.match(/\[\[.*\]\]/)!=null)?1:0;
            });
            if (requests == 0 || results == requests){
                clearInterval(stopInterval);
                //write CREATE-Statements, get DRB and REG
                episodes.forEach(ep => {
                    ep.OT = ep.OT.replace(/\[\[/,"").replace(/\]\]/,"").trim();
                    if (ep.OT.includes("|")){
                        ep.OT = ep.OT.split(/\|/)[1];
                    }
                    ep.OT = ep.OT.replace(/^(.*?)(?=...) ?[,:\-–]? \(?(?:part )?(\d+)\)?/i,"$1, part $2");
                    var epText = `CREATE
LAST	Len	"${ep.OT}"
`
                    if (ep.hasOwnProperty("DT")){
                        ep.DT = ep.DT.replace(/^(.*?)(?=...) ?[,:\-–]? \(?(?:Teil )?(\d+)\)?/i,"$1 – Teil $2");
                        epText += `LAST	Lde	"${ep.DT}"
`;
                    }
                    epText +=`LAST	P1476	en:"${ep.OT}"
LAST	Den	"episode of ${seriesEn}"
LAST	Dde	"Folge von ${series}"
LAST	Dnl	"aflevering van ${seriesNl}"
LAST	P31	Q21191270	S143	Q328	S4656	"${wikipediaLink}"
LAST	P179	${seriesId}	P1545	"${ep.NR_GES}"	S143	Q328	S4656	"${wikipediaLink}"
LAST	P4908	${seasons[ep.season]}	P1545	"${ep.NR_ST}"	S143	Q328	S4656	"${wikipediaLink}"
LAST	P449	${networkId}	S143	Q328	S4656	"${wikipediaLink}"
LAST	P364	${originalLanguageId}	S143	Q328	S4656	"${wikipediaLink}"
LAST	P495	${originalCountryId}	S143	Q328	S4656	"${wikipediaLink}"
LAST	P577	+${ep.EA}T00:00:00Z/11	P291	Q30	S143	Q328	S4656	"${wikipediaLink}"
`;
                    if (ep.hasOwnProperty("imdb")){
                        epText += `LAST	P345	"${ep.imdb}"
`;
                    }
                    ep.REGid.forEach(reg=> {
                        epText += `LAST	P57	${reg}	S143	Q328	S4656	"${wikipediaLink}"
`;});
                    ep.DRBid.forEach(drb=> {
                        epText += `LAST	P58	${drb}	S143	Q328	S4656	"${wikipediaLink}"
`;});
                    if (ep.OTid!=""){
                        epText = epText.replace(/LAST\sDen.*\nLAST\sDde.*\n/,"");
                        epText = epText.replace(/(CREATE\n)?LAST/g,ep.OTid);
                    }
                    output += epText;
                });
                console.log(output);
                GM.setClipboard(output);
            }
        },1000);
    }),"w");
    function getDate(episodeDate){
        return episodeDate.replace(/{{Start date\|(\d+)\|(\d+)\|(\d+)}}.*/i,"$1-$2-$3").replace(/-(\d)\b/g,"-0$1");
    }
    function compareString(title){
        if (!title){
            return null;
        }
        return title.trim().toLowerCase().replace(/\(?(?:part)? ?(\d+?)\)?$/i, "$1").replace(/&/i, "and").replace(/^the |^a |[\u200B-\u200D\uFEFF]| |\.|'|’|\(|\)|:|,|‚|\?|!|„|“|"|‘|…|\.|—|–|-/gi,"");
    }
    function levenshteinDistance(str1, str2){
        if (!str1 || !str2){
            return 100;
        }
        var track = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null));
        for (let i = 0; i <= str1.length; i += 1){
            track[0][i] = i;
        }
        for (let j = 0; j <= str2.length; j += 1){
            track[j][0] = j;
        }
        for (let j = 1; j <= str2.length; j += 1){
            for (let i = 1; i <= str1.length; i += 1){
                var indicator = str1[i - 1] === str2[j - 1] ? 0 : 1;
                track[j][i] = Math.min(
                    track[j][i - 1] + 1,
                    track[j - 1][i] + 1,
                    track[j - 1][i - 1] + indicator,
                );
            }
        }
        return track[str2.length][str1.length];
    }
    async function GetIMDbIds(imdbId, episodes){
        console.log("loading IDs from IMDb…");
        var imdbIds = [];
        var startEp = 1;
        var allEps = 0;
        do{
            var response = await GM.xmlHttpRequest({
                method: "GET",
                url: `https://www.imdb.com/search/title/?series=${imdbId}&view=simple&sort=release_date,asc&count=250&start=${startEp}`,
                onload: function(response) {
                    return response;
                }
            });
            var parser = new DOMParser();
            var xmlDoc = parser.parseFromString(response.responseText,"text/html");
            imdbIds = imdbIds.concat([...xmlDoc.querySelectorAll(".lister-item-header a:nth-child(5)")].map(i => {return {"title": i.innerText, "id": i.href.split("/")[4], "nr": i.parentElement.parentElement.querySelector(".text-primary").innerText.replace(".","")}}));
            allEps = xmlDoc.querySelector(".desc>span").innerText.split(" ")[2];
            startEp = startEp + 250;
        } while (imdbIds.length < allEps);

        episodes.forEach(ep => {
            var ot = ep.OT;
            if (ot.match(/\[\[.*\]\]/)!=null){
                ot = ot.match(/\[\[(.*)\]\]/)[1];
                ot = ot.replace(/.*\|/,"");
            }
            var imdbId = imdbIds.filter(id => compareString(id.title) == compareString(ot));
            if (imdbId.length == 1){
                ep.imdb = imdbId[0].id;
            }else{
                var matchedEp = imdbIds.reduce(function(prev, curr) {
                    return levenshteinDistance(compareString(prev.title), compareString(ot)) < levenshteinDistance(compareString(curr.title), compareString(ot)) ? prev : curr;
                });
                var match;
                if (ep.NR_GES != matchedEp.nr){
                    var epSeason;
                    if (location.href.includes("season")){
                        epSeason = document.title.match(/season (\d+)\)/)[1];
                    }else{
                        epSeason = ep.season+1
                    }
                    match = confirm(`fuzzy match?
WP: #${ep.NR_GES} / ${epSeason}x${(ep.NR_ST.length==1?"0":"")}${ep.NR_ST} ${ot}
IMDb: #${matchedEp.nr} ${matchedEp.title}`);
                }
                if (ep.NR_GES == matchedEp.nr || match){
                    ep.imdb = matchedEp.id;
                }
            }
        });
    };
    async function GetFSLabels(fsId, episodes){
        console.log("loading German labels from Fernsehserien.de…");
        var fsLabels = [];
        var response = await GM.xmlHttpRequest({
            method: "GET",
            url: `https://www.fernsehserien.de/${fsId}/episodenguide`,
            onload: function(response) {
                return response;
            }
        });
        var parser = new DOMParser();
        var xmlDoc = parser.parseFromString(response.responseText,"text/html");
        fsLabels = [...xmlDoc.querySelectorAll("a[data-event-category=liste-episoden]")].map(a => {return{"Lde": a.querySelector("div:nth-child(7)>span").innerText, "Len": a.querySelector("div:nth-child(7)>span.episodenliste-schmal")?.innerText, "nr": a.querySelector("div:nth-child(2)")?.firstChild?.nodeValue, "epNr": a.querySelector("span:nth-child(1)").innerText.replace(".","x")}});
        episodes.forEach(ep => {
            var ot = ep.OT;
            if (ot.match(/\[\[.*\]\]/)!=null){
                ot = ot.match(/\[\[(.*)\]\]/)[1];
                ot = ot.replace(/.*\|/,"");
            }
            var fsLabel = fsLabels.filter(id => compareString(id.Len) == compareString(ot));
            if (fsLabel.length == 1){
                if (fsLabel[0].Lde != "–"){
                    ep.DT = fsLabel[0].Lde;
                }
            }else{
                var matchedEp = fsLabels.reduce(function(prev, curr) {
                    return levenshteinDistance(compareString(prev.Len), compareString(ot)) < levenshteinDistance(compareString(curr.Len), compareString(ot)) ? prev : curr;
                });
                var epSeason;
                if (location.href.includes("season")){
                    epSeason = document.title.match(/season (\d+)\)/)[1];
                }else{
                    epSeason = ep.season+1
                }
                var epNr = epSeason + "x" + (ep.NR_ST.length==1?"0":"") + ep.NR_ST;
                if (matchedEp.Lde != "–"){
                    if (ep.NR_GES != matchedEp.nr && epNr != matchedEp.epNr){
                        var message = `WP: #${ep.NR_GES} / ${epNr} ${ot}
FS: #${matchedEp.nr} / ${matchedEp.epNr} ${matchedEp.Len}`
                        if (confirm("fuzzy match?\n"+message)){
                            ep.DT = matchedEp.Lde;
                            message = "matched:\n" + message;
                        }else{
                            message = "not matched:\n" + message;
                        }
                        console.log(message);
                    }else{
                        ep.DT = matchedEp.Lde;
                    }
                }
            }
        });
    };
})();