// ==UserScript==
// @name Wikidata Episode Generator
// @version 0.3.0
// @description Creates QuickStatements for Wikidata episode items from Wikipedia episode lists
// @author CennoxX
// @contact [email protected]
// @namespace https://gf.qytechs.cn/users/21515
// @homepage https://github.com/CennoxX/userscripts
// @supportURL https://github.com/CennoxX/userscripts/issues/new?title=[enwiki%20Episode%20Generator]%20
// @match https://en.wikipedia.org/wiki/*
// @connect www.wikidata.org
// @connect www.imdb.com
// @connect www.fernsehserien.de
// @icon https://www.google.com/s2/favicons?sz=64&domain=www.wikidata.org
// @grant GM.xmlHttpRequest
// @grant GM.setClipboard
// @grant GM.registerMenuCommand
// @license MIT
// ==/UserScript==
/* jshint esversion: 10 */
/* eslint quotes: ["warn", "double", "avoid-escape"]*/
(function() {
"use strict";
GM.registerMenuCommand("convert episode lists for Wikidata",
(async()=>{
console.clear();
var article = document.title.split(" – Wikipedia")[0];
var response = await fetch(`/w/api.php?action=query&prop=revisions|pageprops&titles=${encodeURIComponent(article)}&rvslots=*&rvprop=content&formatversion=2&format=json`);
var data = await response.json();
var articletext = Object.values(data.query.pages)[0].revisions[0].slots.main.content.split("Specials")[0];
var wikibaseId = Object.values(data.query.pages)[0].pageprops.wikibase_item;
response = await GM.xmlHttpRequest({
method: "GET",
url: `https://www.wikidata.org/w/api.php?action=wbgetentities&props=claims&sitefilter=dewiki&ids=${wikibaseId}&format=json`,
onload: function(response) {
return response;
}
});
var jsonObj = Object.values((JSON.parse(response.responseText)).entities)[0];
var seriesId = 0;
if (article.split("List of ").length==2){
seriesId = jsonObj.claims.P360[0].qualifiers.P179[0].datavalue.value.id;
}else if(article.split("season").length==2){
seriesId = jsonObj.claims.P179[0].mainsnak.datavalue.value.id;
}else{
seriesId = wikibaseId;
}
response = await GM.xmlHttpRequest({
method: "GET",
url: `https://www.wikidata.org/w/api.php?action=wbgetentities&props=sitelinks|claims|labels&sitefilter=dewiki&ids=${seriesId}&format=json`,
onload: function(response) {
return response;
}
});
jsonObj = Object.values((JSON.parse(response.responseText)).entities)[0];
var series = jsonObj.labels.de.value;
var seriesEn = jsonObj.labels.en.value;
var seriesNl = jsonObj.labels.nl?.value ?? jsonObj.labels.en.value;
var networkId = jsonObj.claims.P449[0].mainsnak.datavalue.value.id;
var imdbId;
if (jsonObj.claims.hasOwnProperty("P345")){
imdbId = jsonObj.claims.P345[0].mainsnak.datavalue.value;
}else{
imdbId = prompt("IMDb-Id");
}
var fsId;
if (jsonObj.claims.hasOwnProperty("P5327")){
fsId = jsonObj.claims.P5327[0].mainsnak.datavalue.value;
}else{
fsId = prompt("Fernsehserien.de-Id");
}
var originalLanguageId = jsonObj.claims.P364[0].mainsnak.datavalue.value.id;
var originalCountryId = jsonObj.claims.P495[0].mainsnak.datavalue.value.id;
var seasons = jsonObj.claims.P527.sort((a,b) => a.qualifiers.P1545[0].datavalue.value - b.qualifiers.P1545[0].datavalue.value).map(i => i.mainsnak.datavalue.value.id);
if (location.href.includes("season")){
seasons = jsonObj.claims.P527.filter(i => i.qualifiers.P1545[0].datavalue.value==Number(document.title.match(/season (\d+)\)/)[1])).map(i => i.mainsnak.datavalue.value.id);
}
var wikilinks = [];
var episodes = articletext.split(/{{Episode list.*\n/).map(i => i.split(/\n}}\n/)[0]).slice(1).map(i => {
wikilinks=wikilinks.concat([...i.matchAll(/\[\[(.*?)\]\]/g)].map(i => i[1].split("|")[0]));
if (i.match("OriginalAirDate *= *(\.+) *\n")==null){console.log("ERROR: OriginalAirDate\n",i);}
if (i.match("DirectedBy *= *(\.+) *\n")==null){console.log("ERROR: DirectedBy\n",i);}
if (i.match("WrittenBy *= *(\.+) *\n")==null){console.log("ERROR: WrittenBy\n",i);}
return {
"NR_GES": (i.match("EpisodeNumber *= *(\\d+) *\n")??["",(console.log("ERROR: EpisodeNumber\n",i),prompt("EpisodeNumber\n"+i.match("EpisodeNumber.*\n")))])[1],
"NR_ST": (i.match("EpisodeNumber2 *= *(\\d+) *\n")??["",(console.log("ERROR: EpisodeNumber\n",i),prompt("EpisodeNumber2\n"+i.match("EpisodeNumber2.*\n")))])[1],
"OT": (i.match("Title *= *(\.+) *\n")??["",(console.log("ERROR: EpisodeNumber\n",i),prompt("OT\n"+i.match("OT.*\n")))])[1].replace(/<!--.*?-->/i,""),
"EA": getDate(i.match("OriginalAirDate *= *(\.+) *\n")[1]),
"REG": [...i.match("DirectedBy_?1?2? *= *(\.+) *\n")[1].matchAll(new RegExp(wikilinks.join("|"),"g"))].map(i => i[0]).filter(i => i != ""),
"DRB": [...i.match("WrittenBy_?1?2? *= *(\.+) *\n")[1].matchAll(new RegExp(wikilinks.join("|"),"g"))].map(i => i[0]).filter(i => i != "")
};
});
var seasonId = 0;
var episodeId = 0;
var wikipediaLink = location.href.split("#")[0];
var output = "";
episodes.forEach(i => {
if (Number(i.NR_ST)<episodeId){
seasonId++;
}
i.season=seasonId;
episodeId=i.NR_ST;
});
console.log("getting Wikipedia articles from Wikidata…");
episodes.forEach(ep=>{
ep.DRBid = [];
ep.REGid = [];
ep.OTid = "";
ep.DRB.forEach(async(drb)=>{
response = await fetch(`/w/api.php?action=query&prop=pageprops&ppprop=wikibase_item&redirects=1&titles=${encodeURIComponent(drb)}&format=json`);
data = await response.json();
if (Object.values(data.query.pages)[0].pageprops != null){
ep.DRBid.push(Object.values(data.query.pages)[0].pageprops.wikibase_item);
}
});
ep.REG.forEach(async(reg)=>{
response = await fetch(`/w/api.php?action=query&prop=pageprops&ppprop=wikibase_item&redirects=1&titles=${encodeURIComponent(reg)}&format=json`);
data = await response.json();
if (Object.values(data.query.pages)[0].pageprops != null){
ep.REGid.push(Object.values(data.query.pages)[0].pageprops.wikibase_item);
}
});
if (ep.OT.match(/\[\[.*\]\]/)!=null){
var ot = ep.OT;
ot = ot.match(/\[\[(.*)\]\]/)[1];
ot = ot.replace(/\|.*/,"");
(async()=>{
response = await fetch(`/w/api.php?action=query&prop=pageprops&ppprop=wikibase_item&redirects=1&titles=${encodeURIComponent(ot)}&format=json`);
data = await response.json();
if (Object.values(data.query.pages)[0].pageprops != null){
ep.OTid=Object.values(data.query.pages)[0].pageprops.wikibase_item;
}
})();
};
});
if (fsId){
await GetFSLabels(fsId, episodes);
}
if (imdbId){
await GetIMDbIds(imdbId, episodes);
}
var stopInterval = setInterval(()=>{
var results = 0;
var drbnr = 0;
var regnr = 0;
var otnr = 0;
episodes.forEach(i=> {
results+=i.DRBid.length+i.REGid.length+(i.OTid!="")?1:0;
});
var requests = 0;
episodes.forEach(i=>{
requests+=i.DRB.length+i.REG.length+(i.OT.match(/\[\[.*\]\]/)!=null)?1:0;
});
if (requests == 0 || results == requests){
clearInterval(stopInterval);
//write CREATE-Statements, get DRB and REG
episodes.forEach(ep => {
ep.OT=ep.OT.replace(/\[\[/,"").replace(/\]\]/,"").trim();
if (ep.OT.includes("|")){
ep.OT = ep.OT.split(/\|/)[1];
}
var epText = `CREATE
LAST Len "${ep.OT}"
`
if (ep.hasOwnProperty("DT")){
epText += `LAST Lde "${ep.DT}"
`;
}
epText +=`LAST P1476 en:"${ep.OT}"
LAST Den "episode of ${seriesEn}"
LAST Dde "Folge von ${series}"
LAST Dnl "aflevering van ${seriesNl}"
LAST P31 Q21191270 S143 Q328 S4656 "${wikipediaLink}"
LAST P179 ${seriesId} P1545 "${ep.NR_GES}" S143 Q328 S4656 "${wikipediaLink}"
LAST P4908 ${seasons[ep.season]} P1545 "${ep.NR_ST}" S143 Q328 S4656 "${wikipediaLink}"
LAST P449 ${networkId} S143 Q328 S4656 "${wikipediaLink}"
LAST P364 ${originalLanguageId} S143 Q328 S4656 "${wikipediaLink}"
LAST P495 ${originalCountryId} S143 Q328 S4656 "${wikipediaLink}"
LAST P577 +${ep.EA}T00:00:00Z/11 P291 Q30 S143 Q328 S4656 "${wikipediaLink}"
`;
if (ep.hasOwnProperty("imdb")){
epText += `LAST P345 "${ep.imdb}"
`;
}
ep.REGid.forEach(reg=> {
epText += `LAST P57 ${reg} S143 Q328 S4656 "${wikipediaLink}"
`;});
ep.DRBid.forEach(drb=> {
epText += `LAST P58 ${drb} S143 Q328 S4656 "${wikipediaLink}"
`;});
if (ep.OTid!=""){
epText = epText.replace(/LAST\sDen.*\nLAST\sDde.*\n/,"");
epText = epText.replace(/(CREATE\n)?LAST/g,ep.OTid);
}
output += epText;
});
console.log(output);
GM.setClipboard(output);
}
},1000);
}),"w");
function getDate(episodeDate){
return episodeDate.replace(/{{Start date\|(\d+)\|(\d+)\|(\d+)}}.*/i,"$1-$2-$3").replace(/-(\d)\b/g,"-0$1");
}
function compareString(title){
return title.trim().toLowerCase().replace(/\(?(?:part)? ?(\d+?)\)?$/i, "$1").replace(/&/i, "and").replace(/^the |^a |[\u200B-\u200D\uFEFF]| |\.|'|’|\(|\)|:|,|‚|\?|!|„|“|"|‘|…|\.|—|–|-/gi,"");
}
function levenshteinDistance(str1, str2){
var track = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null));
for (let i = 0; i <= str1.length; i += 1){
track[0][i] = i;
}
for (let j = 0; j <= str2.length; j += 1){
track[j][0] = j;
}
for (let j = 1; j <= str2.length; j += 1){
for (let i = 1; i <= str1.length; i += 1){
var indicator = str1[i - 1] === str2[j - 1] ? 0 : 1;
track[j][i] = Math.min(
track[j][i - 1] + 1,
track[j - 1][i] + 1,
track[j - 1][i - 1] + indicator,
);
}
}
return track[str2.length][str1.length];
}
async function GetIMDbIds(imdbId, episodes){
console.log("getting IDs from IMDb…");
var imdbIds = [];
var startEp = 1;
var allEps = 0;
do{
var response = await GM.xmlHttpRequest({
method: "GET",
url: `https://www.imdb.com/search/title/?series=${imdbId}&view=simple&sort=release_date,asc&count=250&start=${startEp}`,
onload: function(response) {
return response;
}
});
var parser = new DOMParser();
var xmlDoc = parser.parseFromString(response.responseText,"text/html");
imdbIds = imdbIds.concat([...xmlDoc.querySelectorAll(".lister-item-header a:nth-child(5)")].map(i => {return {"title": i.innerText, "id": i.href.split("/")[4], "nr": i.parentElement.parentElement.querySelector(".text-primary").innerText.replace(".","")}}));
allEps = xmlDoc.querySelector(".desc>span").innerText.split(" ")[2];
startEp = startEp + 250;
} while (imdbIds.length < allEps);
episodes.forEach(ep => {
var imdbId = imdbIds.filter(id => compareString(id.title) == compareString(ep.OT));
if (imdbId.length == 1){
ep.imdb = imdbId[0].id;
}else{
var matchedEp = imdbIds.reduce(function(prev, curr) {
return levenshteinDistance(compareString(prev.title), compareString(ep.OT)) < levenshteinDistance(compareString(curr.title), compareString(ep.OT)) ? prev : curr;
});
if (ep.NR_GES != matchedEp.nr){
console.log("fuzzy match:");
console.log("OT: #" + ep.NR_GES + " / " + (ep.season+1)+"x"+(ep.NR_ST.length==1?"0":"") + ep.NR_ST, ep.OT);
console.log("IMDb: #" + matchedEp.nr, matchedEp.title);
}
ep.imdb = matchedEp.id;
}
});
};
async function GetFSLabels(fsId, episodes){
console.log("getting German labels from Fernsehserien.de…");
var fsLabels = [];
var response = await GM.xmlHttpRequest({
method: "GET",
url: `https://www.fernsehserien.de/${fsId}/episodenguide`,
onload: function(response) {
return response;
}
});
var parser = new DOMParser();
var xmlDoc = parser.parseFromString(response.responseText,"text/html");
fsLabels = [...xmlDoc.querySelectorAll("a[data-event-category=liste-episoden]")].map(a => {return{"Lde": a.querySelector("div:nth-child(7)>span").innerText, "Len": a.querySelector("div:nth-child(7)>span.episodenliste-schmal").innerText, "nr": a.querySelector("div:nth-child(2)").firstChild.nodeValue, "epNr": a.querySelector("span:nth-child(1)").innerText.replace(".","x")}});
console.log(fsLabels);
episodes.forEach(ep => {
var fsLabel = fsLabels.filter(id => compareString(id.Len) == compareString(ep.OT));
if (fsLabel.length == 1){
ep.DT = fsLabel[0].Lde;
}else{
var matchedEp = fsLabels.reduce(function(prev, curr) {
return levenshteinDistance(compareString(prev.Len), compareString(ep.OT)) < levenshteinDistance(compareString(curr.Len), compareString(ep.OT)) ? prev : curr;
});
var epNr = (ep.season+1) + "x" + (ep.NR_ST.length==1?"0":"") + ep.NR_ST;
if (ep.NR_GES != matchedEp.nr || epNr != matchedEp.epNr){
console.log("fuzzy match:");
console.log("OT: #" + ep.NR_GES + " / " + epNr, ep.OT);
console.log("FS: #" + matchedEp.nr + " / " + matchedEp.epNr, matchedEp.Len);
}
ep.DT = matchedEp.Lde;
}
});
};
})();