// ==UserScript==
// @name Hakka Romanization Converter (HPF ⇄ PFS)
// @namespace hakka-romanization
// @version 1.1.1
// @description Display Si-yen Hakka Dictionary entries in Pha̍k-fa-sṳ!
// @author TongcyDai
// @match http*://hakkadict.moe.edu.tw/*
// @grant none
// @license GNU GPLv3
// ==/UserScript==
(function() {
'use strict';
// Default to using Pha̍k-fa-sṳ
let usePFS = localStorage.getItem('hakkaDictUsePFS') !== 'false';
// Store original text in a mapping table
const originalTextMap = new WeakMap();
// Create toggle button
function createToggleButton() {
const button = document.createElement('button');
button.id = 'pfs-toggle-btn';
button.textContent = usePFS ? '用拼音 (HPF)' : '用白話字 (PFS)';
button.style.position = 'fixed';
button.style.bottom = '20px';
button.style.left = '20px';
button.style.zIndex = '9999';
button.style.padding = '8px 12px';
button.style.backgroundColor = '#3498db';
button.style.color = 'white';
button.style.border = 'none';
button.style.borderRadius = '4px';
button.style.cursor = 'pointer';
button.style.fontWeight = 'bold';
button.style.boxShadow = '0 2px 5px rgba(0,0,0,0.2)';
button.style.fontFamily = "'Microsoft JhengHei', 'Heiti TC', 'Heiti SC', 'STHeiti', 'WenQuanYi Zen Hei', sans-serif";
// Mouse hover effect
button.addEventListener('mouseover', function() {
this.style.backgroundColor = '#2980b9';
});
button.addEventListener('mouseout', function() {
this.style.backgroundColor = '#3498db';
});
// Click event: Switch romanization
button.addEventListener('click', toggleRomanization);
document.body.appendChild(button);
}
// Toggle between romanization systems
function toggleRomanization() {
usePFS = !usePFS;
localStorage.setItem('hakkaDictUsePFS', usePFS);
const button = document.getElementById('pfs-toggle-btn');
if (button) {
button.textContent = usePFS ? '用拼音 (HPF)' : '用白話字 (PFS)';
}
if (usePFS) {
applyPFSTransformation(document.body);
} else {
restoreOriginalText(document.body);
}
}
// Convert Hakka Pinyin initials to PFS
function convertInitial(initial) {
const initialMap = {
'b': 'p',
'p': 'ph',
'd': 't',
't': 'th',
'g': 'k',
'k': 'kh',
'z': 'ch',
'c': 'chh',
'j': 'ch',
'q': 'chh',
'x': 's'
};
return initialMap[initial] || initial;
}
// Convert Hakka Pinyin finals to PFS
function convertEnding(ending) {
const endingMap = {
'b': 'p',
'd': 't',
'g': 'k'
};
return endingMap[ending] || ending;
}
// Convert main part of Hakka Pinyin vowels to PFS
function convertNucleusMain(nucleus) {
if (nucleus === 'ii') return 'ṳ';
if (nucleus === 'ua') return 'oa';
if (nucleus === 'ue') return 'oe';
return nucleus;
}
// Determine which vowel should receive the tone mark
function findToneTarget(nucleusMain) {
// Vowel priority order: a > o > e > u > ṳ > i
const vowels = ['a', 'o', 'e', 'u', 'ṳ', 'i'];
// Special case: oa-starting vowels (like oa, oai, oat etc.)
if (nucleusMain.startsWith('oa')) {
return {
index: 0,
char: 'o'
};
}
// Normal case: find first vowel according to priority order
for (const vowel of vowels) {
const index = nucleusMain.indexOf(vowel);
if (index !== -1) {
return {
index: index,
char: vowel
};
}
}
// Case for syllabic consonants (m, n, ng as nucleus)
if (nucleusMain === 'm') {
return { index: 0, char: 'm' };
} else if (nucleusMain === 'n') {
return { index: 0, char: 'n' };
} else if (nucleusMain === 'ng') {
return { index: 0, char: 'n' };
}
// No suitable vowel found
return { index: -1, char: '' };
}
// Apply tone marks
function applyToneMark(syllable, toneTarget, toneType, initialLength) {
if (toneTarget.index === -1) return syllable;
const toneMarks = {
'1': '\u0302', // 1st tone uses circumflex (◌̂, U+0302)
'2': '\u0301', // 2nd tone uses acute (◌́, U+0301)
'5': '\u0300', // 5th tone uses grave (◌̀, U+0300)
'8': '\u030D' // 8th tone uses vertical line (◌̍, U+030D)
};
const mark = toneMarks[toneType];
if (!mark) return syllable; // 3rd and 4th tones have no tone marks
// Calculate position in the full string (considering initial length)
const actualIndex = initialLength + toneTarget.index;
// Find the actual character to mark
const targetChar = syllable.charAt(actualIndex);
// Special handling for ng nucleus
if (targetChar === 'n' && syllable.substring(actualIndex, actualIndex + 2) === 'ng') {
return syllable.substring(0, actualIndex) +
targetChar + mark +
syllable.substring(actualIndex + 1);
}
// General case
return syllable.substring(0, actualIndex) +
targetChar + mark +
syllable.substring(actualIndex + 1);
}
// Handle syllables beginning with 'i'
function handleInitialI(nucleusMain) {
// If main part is only the letter 'i'
if (nucleusMain === 'i') {
return 'yi';
}
// Other cases: starts with 'i' but not only 'i' (like iu, ia, iai etc.)
if (nucleusMain.charAt(0) === 'i') {
return 'y' + nucleusMain.substring(1);
}
return nucleusMain;
}
// Convert Hakka Pinyin syllable to PFS
function convertSyllable(hpf) {
// Handle special tone symbols (+ and ^)
let specialTone = '';
if (hpf.includes('+')) {
specialTone = '+';
hpf = hpf.replace(/\+/g, '');
} else if (hpf.includes('^')) {
specialTone = '^';
hpf = hpf.replace(/\^/g, '');
}
// If special tone symbol exists, keep original form
if (specialTone) {
return hpf + specialTone;
}
// Check if it's a special syllable (without vowels: m/n/ng)
const hasVowel = /[aeiou]/.test(hpf);
if (!hasVowel) {
// Handle special syllables without vowels (m, n, ng)
const specialRegex = /^([bpmfdtnlgkhvzcsjqx]{1,2})?(m|n|ng)([ˊˋˇ])?$/;
const match = hpf.match(specialRegex);
if (match) {
const [, initial, nucleus, tone] = match;
// Convert initial
let pfs = '';
if (initial) {
pfs += convertInitial(initial);
}
// Add nucleus (m, n, ng)
pfs += nucleus;
// Handle tone
let toneType = '';
if (tone === 'ˊ') toneType = '1'; // 1st tone uses ◌̂
else if (tone === 'ˋ') toneType = '2'; // 2nd tone uses ◌́
else if (tone === 'ˇ') toneType = '5'; // 5th tone uses ◌̀
// Determine tone mark position
if (toneType) {
const toneMarks = {
'1': '\u0302', // 1st tone uses circumflex (◌̂, U+0302)
'2': '\u0301', // 2nd tone uses acute (◌́, U+0301)
'5': '\u0300', // 5th tone uses grave (◌̀, U+0300)
'8': '\u030D' // 8th tone uses vertical line (◌̍, U+030D)
};
const mark = toneMarks[toneType];
const initialLen = initial ? convertInitial(initial).length : 0;
// If nucleus is ng, add tone mark to n
if (nucleus === 'ng') {
pfs = pfs.substring(0, initialLen) + 'n' + mark + 'g';
} else {
pfs = pfs.substring(0, initialLen) + nucleus + mark;
}
}
return pfs;
}
}
// Regular expression to match Hakka Pinyin syllable
// Format: (initial)?(nucleus main)(ending)?(tone)?
const syllableRegex = /^([bpmfdtnlgkhvzcsjqx]{1,2})?([aeiou]+)([bdgmn]|ng)?([ˊˋˇ])?$/;
const match = hpf.match(syllableRegex);
if (!match) return hpf; // If no match found, return unchanged
let [, initial, nucleusMain, ending, tone] = match;
// 1. Process initial
let pfs = '';
const initialLength = initial ? convertInitial(initial).length : 0;
if (initial) {
pfs += convertInitial(initial);
}
// 2. Process nucleus main part
let convertedNucleusMain = convertNucleusMain(nucleusMain);
// 3. If no initial and starts with i, special handling
if (!initial && convertedNucleusMain.charAt(0) === 'i') {
convertedNucleusMain = handleInitialI(convertedNucleusMain);
}
pfs += convertedNucleusMain;
// 4. Process ending
if (ending) {
pfs += convertEnding(ending);
}
// 5. Process tone
// Determine if it's 3rd tone (no mark), 4th tone (-d/b/gˋ ending) or 8th tone (-d/b/g ending with no tone mark)
let toneType = '';
if (tone === 'ˊ') {
toneType = '1'; // 1st tone
} else if (tone === 'ˋ') {
if (ending === 'd' || ending === 'b' || ending === 'g') {
toneType = '4'; // 4th tone (ending is b/d/g with ˋ symbol)
// Convert ending but don't add tone mark
pfs = pfs.slice(0, -1) + convertEnding(ending);
} else {
toneType = '2'; // 2nd tone (ending is not b/d/g)
}
} else if (tone === 'ˇ') {
toneType = '5'; // 5th tone
} else if (!tone) {
if (ending === 'd' || ending === 'b' || ending === 'g') {
toneType = '8'; // 8th tone (ending is b/d/g without tone symbol)
// Convert ending part (e.g. d -> t)
pfs = pfs.slice(0, -1) + convertEnding(ending);
} else {
toneType = '3'; // 3rd tone (ending is not b/d/g without tone symbol)
}
}
// Find which vowel to apply tone mark to (relative to nucleus main part)
const toneTarget = findToneTarget(convertedNucleusMain);
// Apply tone mark, passing initial length to calculate actual position
pfs = applyToneMark(pfs, toneTarget, toneType, initialLength);
return pfs;
}
// Convert entire Hakka Pinyin sentence to PFS
function convertHPFToPFS(text) {
// Check for and preserve leading spaces
const hasLeadingSpace = text.startsWith(' ');
const leadingSpace = hasLeadingSpace ? ' ' : '';
// Remove <sup> tags
text = text.replace(/<sup>([^<]*)<\/sup>/g, '$1');
// Convert full-width punctuation to half-width
const fullWidthPuncs = {',': ', ', '。': '. ', '、': ', ', ';': '; ', ':': ': '};
for (const [full, half] of Object.entries(fullWidthPuncs)) {
text = text.replace(new RegExp(full + '\\s*', 'g'), half);
}
// Trim text but preserve internal spaces
const trimmedText = text.trim();
// Find positions of all punctuation marks
const puncRegex = /[,.:;?!]/g;
const puncs = [];
let match;
while ((match = puncRegex.exec(trimmedText)) !== null) {
puncs.push({
pos: match.index,
punc: match[0]
});
}
// If no punctuation marks, process in standard way
if (puncs.length === 0) {
// Separate each syllable, convert and join with hyphens
const syllables = trimmedText.split(/\s+/);
const convertedSyllables = syllables.map(syllable => convertSyllable(syllable));
// Use NFC to normalize combined Unicode characters and add back leading space
return leadingSpace + convertedSyllables.join('-').normalize('NFC');
}
// If there are punctuation marks, process in segments
let result = '';
let lastEnd = 0;
for (const punc of puncs) {
// Extract this segment of text (without punctuation)
const segment = trimmedText.substring(lastEnd, punc.pos).trim();
// Convert this segment
if (segment) {
const syllables = segment.split(/\s+/);
const convertedSyllables = syllables.map(syllable => convertSyllable(syllable));
result += (result ? ' ' : '') + convertedSyllables.join('-');
}
// Add punctuation and space
result += punc.punc + ' ';
// Update start position for next segment
lastEnd = punc.pos + punc.punc.length;
while (lastEnd < trimmedText.length && /\s/.test(trimmedText[lastEnd])) {
lastEnd++;
}
}
// Process last segment (part after the last punctuation)
if (lastEnd < trimmedText.length) {
const segment = trimmedText.substring(lastEnd).trim();
if (segment) {
const syllables = segment.split(/\s+/);
const convertedSyllables = syllables.map(syllable => convertSyllable(syllable));
result += (result ? ' ' : '') + convertedSyllables.join('-');
}
}
// Use NFC to normalize combined Unicode characters and add back leading space
return leadingSpace + result.normalize('NFC');
}
// Convert text from Hakka Pinyin to PFS
function applyPFSTransformation(node) {
if (node.nodeType === Node.TEXT_NODE) {
let text = node.nodeValue;
// Store original text
if (!originalTextMap.has(node)) {
originalTextMap.set(node, text);
}
// Don't directly convert normal text, only process specific Hakka romanization blocks
} else if (node.nodeType === Node.ELEMENT_NODE) {
// Determine if this is a Hakka romanization block
if (node.classList && node.classList.contains('accent-data')) {
const accentId = node.getAttribute('data-accent-id');
const dataType = node.getAttribute('data-type');
// Check if parent is a Si-yen or Nam-si-yen dialect tab
const isTargetDialect = isInSiyenOrNamSiyenTab(node);
// Handle special symbols for Hailu dialect (2)
if (dataType === 'contour' && (accentId === '2' || isInHailuTab(node))) {
// Store original HTML
if (!originalTextMap.has(node)) {
originalTextMap.set(node, node.innerHTML);
}
// Process all sup elements
const supElements = node.querySelectorAll('sup');
for (let i = 0; i < supElements.length; i++) {
const sup = supElements[i];
// Store original content
if (!originalTextMap.has(sup)) {
originalTextMap.set(sup, sup.innerHTML);
}
// Replace + with ˖
if (sup.textContent === '+') {
sup.textContent = '˖';
}
}
}
// Only convert 'contour' type for Si-yen(1) and Nam-si-yen(6)
else if (dataType === 'contour' &&
(accentId === '1' || accentId === '6' || (isTargetDialect && !accentId))) {
node.classList.add('pfs-converted');
// Store original HTML
if (!originalTextMap.has(node)) {
originalTextMap.set(node, node.innerHTML);
}
// First process all pinyin in inner span elements
const spanElements = node.querySelectorAll('span');
for (let i = 0; i < spanElements.length; i++) {
const span = spanElements[i];
const spanText = span.textContent;
// Store original HTML
if (!originalTextMap.has(span)) {
originalTextMap.set(span, span.innerHTML);
}
// Check if it contains pinyin
if (/[a-z]/i.test(spanText)) {
// Check if it's the first span (usually dialect name)
const isFirst = span === node.querySelector('span:first-child');
if (!isFirst) {
// Process pinyin in span
span.innerHTML = ''; // Clear content
// Build pinyin text
let pinyinText = '';
let hasCollectedText = false;
// Collect all pinyin and tones from child nodes
Array.from(spanElements[i].childNodes).forEach(child => {
if (child.nodeType === Node.TEXT_NODE) {
const text = child.nodeValue.trim();
if (text && /[a-z]/i.test(text)) {
pinyinText += text;
hasCollectedText = true;
}
} else if (child.nodeName.toLowerCase() === 'sup') {
if (hasCollectedText) {
pinyinText += child.textContent;
child.style.display = 'none';
}
}
});
// Convert and update span content
if (pinyinText) {
span.textContent = convertHPFToPFS(pinyinText);
} else {
// If no pinyin found, convert the entire content
span.textContent = convertHPFToPFS(spanText);
}
}
}
}
// Then process text and sup elements that are direct children
const textContents = [];
let currentText = '';
let waitingForTone = false;
const childNodes = Array.from(node.childNodes);
for (let i = 0; i < childNodes.length; i++) {
const child = childNodes[i];
// Skip already processed span elements
if (child.nodeName.toLowerCase() === 'span') {
continue;
}
// Process text nodes
if (child.nodeType === Node.TEXT_NODE) {
const text = child.nodeValue;
// Store original text
if (!originalTextMap.has(child)) {
originalTextMap.set(child, text);
}
// Check if text contains pinyin parts
if (text.trim() && /[a-z]/i.test(text)) {
// Add to previously collected text
currentText += text;
waitingForTone = true;
} else if (text.trim()) {
// Non-pinyin text, keep as is
if (waitingForTone && currentText) {
textContents.push({
text: currentText,
node: child.previousSibling,
hasTone: false
});
currentText = '';
waitingForTone = false;
}
}
}
// Process tone sup elements
else if (child.nodeName.toLowerCase() === 'sup') {
if (waitingForTone) {
const tone = child.textContent;
// Store original text
if (!originalTextMap.has(child)) {
originalTextMap.set(child, child.innerHTML);
}
// Add tone to current text
textContents.push({
text: currentText + tone,
node: child.previousSibling,
supNode: child,
hasTone: true
});
currentText = '';
waitingForTone = false;
}
}
// Process other elements like links, line breaks, etc.
else {
// If there's still unprocessed text, add to list
if (waitingForTone && currentText) {
textContents.push({
text: currentText,
node: child.previousSibling,
hasTone: false
});
currentText = '';
waitingForTone = false;
}
}
}
// Process the last possibly incomplete text
if (waitingForTone && currentText) {
textContents.push({
text: currentText,
node: childNodes[childNodes.length - 1],
hasTone: false
});
}
// Process all collected texts
if (textContents.length > 0) {
// Find all complete pinyin texts
const fullPinyinText = textContents.map(item => item.text).join(' ');
// Convert the entire text
if (/[a-z]/i.test(fullPinyinText)) {
const convertedText = convertHPFToPFS(fullPinyinText);
// Find the first text node
const firstTextNode = textContents[0].node;
// Update the first node's text
if (firstTextNode && firstTextNode.nodeType === Node.TEXT_NODE) {
firstTextNode.nodeValue = convertedText;
// Hide all other nodes
for (let i = 0; i < textContents.length; i++) {
if (textContents[i].supNode) {
textContents[i].supNode.style.display = 'none';
}
// Hide other text nodes (except the first)
if (i > 0 && textContents[i].node && textContents[i].node.nodeType === Node.TEXT_NODE) {
textContents[i].node.nodeValue = '';
}
}
}
}
}
}
} else {
// Exclude toggle button
if (node.id !== 'pfs-toggle-btn') {
for (let i = 0; i < node.childNodes.length; i++) {
applyPFSTransformation(node.childNodes[i]);
}
}
}
}
}
// Determine if element is in Si-yen or Nam-si-yen tab
function isInSiyenOrNamSiyenTab(node) {
let current = node;
// Look upward for tab-pane element
while (current && current !== document.body) {
if (current.classList && current.classList.contains('tab-pane')) {
// Check ID, item1 is Si-yen, item6 is Nam-si-yen
const id = current.id;
return id === 'item1' || id === 'item6';
}
current = current.parentNode;
}
return false;
}
// Determine if element is in Hailu tab
function isInHailuTab(node) {
let current = node;
// Look upward for tab-pane element
while (current && current !== document.body) {
if (current.classList && current.classList.contains('tab-pane')) {
// Check ID, item2 is Hailu
const id = current.id;
return id === 'item2';
}
current = current.parentNode;
}
return false;
}
// Restore original text
function restoreOriginalText(node) {
if (node.nodeType === Node.TEXT_NODE) {
if (originalTextMap.has(node)) {
node.nodeValue = originalTextMap.get(node);
}
} else if (node.nodeType === Node.ELEMENT_NODE) {
if (node.classList && node.classList.contains('accent-data')) {
node.classList.remove('pfs-converted');
// If entire element HTML is stored, use it first
if (originalTextMap.has(node)) {
node.innerHTML = originalTextMap.get(node);
return; // Element already restored, no need to process child nodes
}
// Find all child nodes
const childNodes = Array.from(node.childNodes);
for (let i = 0; i < childNodes.length; i++) {
const child = childNodes[i];
if (originalTextMap.has(child)) {
if (child.nodeType === Node.TEXT_NODE) {
child.nodeValue = originalTextMap.get(child);
} else if (child.nodeName.toLowerCase() === 'span') {
child.innerHTML = originalTextMap.get(child);
} else if (child.nodeName.toLowerCase() === 'sup') {
child.innerHTML = originalTextMap.get(child);
child.style.display = ''; // Restore display
}
}
}
// Also check all span and sup elements
const elements = node.querySelectorAll('span, sup');
for (let i = 0; i < elements.length; i++) {
const element = elements[i];
if (originalTextMap.has(element)) {
element.innerHTML = originalTextMap.get(element);
element.style.display = ''; // Restore display
}
}
} else {
// Skip toggle button
if (node.id !== 'pfs-toggle-btn') {
for (let i = 0; i < node.childNodes.length; i++) {
restoreOriginalText(node.childNodes[i]);
}
}
}
}
}
// Handle dynamically loaded content
function setupMutationObserver() {
const observer = new MutationObserver(function(mutations) {
mutations.forEach(function(mutation) {
if (mutation.addedNodes && mutation.addedNodes.length > 0) {
for (let i = 0; i < mutation.addedNodes.length; i++) {
const node = mutation.addedNodes[i];
// Skip toggle button
if (node.id === 'pfs-toggle-btn') continue;
// If currently using PFS, apply conversion
if (usePFS) {
applyPFSTransformation(node);
}
}
}
});
});
observer.observe(document.body, { childList: true, subtree: true });
}
function setupDiacriticFontSupport() {
const style = document.createElement('style');
style.id = 'pfs-font-style';
style.textContent = `
/* Target only the romanization text, not the hanzi */
.accent-data.pfs-converted > span:not(:first-child),
.accent-data.pfs-converted > :not(span):not(.han-char) {
font-family: 'Gentium Plus', 'Doulos SIL', 'Charis SIL', 'Noto Sans', 'DejaVu Sans', 'Lucida Sans Unicode', sans-serif !important;
letter-spacing: 0.5px;
}
/* Make sure hanzi characters keep their original font */
.accent-data.pfs-converted .han-char,
.accent-data.pfs-converted > span:first-child {
font-family: inherit;
}
`;
document.head.appendChild(style);
}
// Initialize on page load
window.addEventListener('load', function() {
createToggleButton();
setupDiacriticFontSupport();
setupMutationObserver();
// Decide initial display based on user's choice
if (usePFS) {
applyPFSTransformation(document.body);
}
});
})();