// ==UserScript==
// @name Webpage to Markdown
// @namespace http://tampermonkey.net/
// @version 2.0
// @description Enhanced webpage to Markdown converter with advanced content detection, multi-platform support, and intelligent filtering. Significantly improved content selection, forced conversion capabilities, and unwanted element recognition.
// @author Feiyt
// @homepageURL https://github.com/Feiyt
// @license MIT
// @match *://*/*
// @require https://unpkg.com/turndown/dist/turndown.js
// @require https://unpkg.com/[email protected]/dist/turndown-plugin-gfm.js
// @grant GM_registerMenuCommand
// @grant GM_download
// @grant GM_addStyle
// @grant GM_setValue
// @grant GM_getValue
// @run-at document-idle
// ==/UserScript==
// Copyright (c) 2025 Feiyt
// Released under the MIT license
// https://github.com/Feiyt (or specify the exact repo if available)
(function() {
'use strict';
console.log("Enhanced Webpage to Markdown (v2.0) script starting..."); // Version updated
// --- Configuration ---
const turndownOptions = {
headingStyle: 'atx',
hr: '---',
bulletListMarker: '*',
codeBlockStyle: 'fenced',
emDelimiter: '*',
strongDelimiter: '**',
linkStyle: 'inlined',
linkReferenceStyle: 'full',
preformattedCode: false,
blankReplacement: function(content, node) {
return node.isBlock ? '\n\n' : '';
},
keepReplacement: function(content, node) {
return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML;
},
defaultReplacement: function(content, node) {
return node.isBlock ? '\n\n' + content + '\n\n' : content;
}
};
// --- Helper Functions ---
function sanitizeFilename(name) { /* ... function from previous version ... */ } // Placeholder comment
// Sanitizes a string to be used as a filename.
sanitizeFilename = function(name) {
// Replace forbidden characters with underscore, collapse whitespace, trim, provide default.
return name.replace(/[\/\\:*?"<>|#%\n\r]/g, '_').replace(/\s+/g, ' ').trim() || "markdown_export";
};
/**
* Improved content selection and cleaning.
* Prioritizes semantic tags and common content IDs/classes.
* @returns {object|null} Object containing { title: string, contentNode: Node } or null on failure.
*/
function getPageContentNode() {
console.log("getPageContentNode (v2.0 enhanced logic): Starting content retrieval..."); // Adjusted log message
const pageTitle = document.title || window.location.hostname;
let bestCandidate = null;
let maxScore = -1; // Simple scoring mechanism
// More robust selectors with priorities implied by order
const selectors = [
// Highest Priority: Semantic & Specific Roles/IDs/Classes
'article', '[role="article"]', '.article-body', '.post-content', '.entry-content', '#article-content', '.post-body', '.markdown-body',
// High Priority: Main content areas
'main', '[role="main"]', '#main-content', '#main', '.main-content', '.main', '#primary',
// Medium Priority: Common generic containers (often need cleaning)
'#content', '.content',
// Lower Priority: More specific layout patterns
'#page .content', // Example of nested structure
'.container .content',
// Stack Overflow Example
'#mainbar',
// Lowest Priority (if nothing else works, but avoid body initially)
// Maybe add specific blog platform IDs? '.hentry'?
];
// 增强内容抓取能力 - 更全面的网页平台适配
const enhancedSelectors = [
// 通用内容选择器
'.content-area', '.post-article', '.blog-post', '.entry', '.single-post',
'.article-content', '.story-content', '.news-content', '.page-content',
'.main-article', '.primary-content', '.main-body', '.content-wrapper',
'.post-wrapper', '.article-wrapper', '.entry-wrapper',
// 博客平台特定选择器
'.hentry', '.post', '.article', '.blog-entry', '.content-post',
'.entry-content-wrap', '.post-content-wrap', '.article-body-wrap',
// 社交媒体和论坛平台
'.twitter-tweet', '.fb-post', '.linkedin-post', '.reddit-post',
'.discourse-post', '.discourse-post-stream',
// 知识平台
'.zhihu-content', '.zhihu-post', '.zhihu-answer',
'.notion-page-content', '.notion-selectable',
'.medium-article', '.medium-content', '.postArticle-content',
'.quora-answer', '.stackoverflow-post', '#answers .answer',
'.wiki-content', '.mediawiki-content', '.mw-parser-output',
// 新闻网站
'.news-article', '.article-text', '.story-body', '.story-content',
'.news-content', '.article-body-text', '.paragraph-content',
'.content-body', '.text-content', '.full-content',
// 技术文档和教程
'.documentation', '.docs-content', '.tutorial-content',
'.guide-content', '.manual-content', '.readme-content',
'.markdown-content', '.rst-content', '.asciidoc-content',
// 电商和产品页面
'.product-description', '.product-details', '.item-description',
'.listing-description', '.product-content',
// 学术和期刊
'.abstract', '.paper-content', '.journal-content', '.academic-content',
'.citation-content', '.research-content',
// CMS系统特定
'.wordpress-content', '.drupal-content', '.joomla-content',
'.contentful-content', '.strapi-content', '.ghost-content',
// 移动端适配
'.mobile-content', '.responsive-content', '.adaptive-content',
// 通用语义化选择器
'[role="document"]', '[role="article"]', '[role="main"]',
'[itemtype*="Article"]', '[itemtype*="BlogPosting"]',
'.text', '.copy', '.body-text', '.article-text'
];
selectors.push(...enhancedSelectors);
selectors.forEach((selector, index) => {
try {
const element = document.querySelector(selector);
if (element) {
// 增强评分系统
let score = selectors.length - index; // 基础优先级分数
// 内容质量评估
const textLength = element.textContent?.trim().length || 0;
const childCount = element.childElementCount || 0;
const linkCount = element.querySelectorAll('a').length || 0;
const paragraphCount = element.querySelectorAll('p').length || 0;
const headingCount = element.querySelectorAll('h1,h2,h3,h4,h5,h6').length || 0;
// 加分项
if (textLength > 500) score += 2; // 内容丰富
if (paragraphCount > 3) score += 1; // 段落结构良好
if (headingCount > 0) score += 1; // 有标题结构
if (textLength / Math.max(linkCount, 1) > 50) score += 1; // 内容与链接比例合理
// 减分项
if (textLength < 100) score -= 3; // 内容过少
if (childCount < 2 && textLength < 200) score -= 2; // 结构简单且内容少
if (linkCount > textLength / 10) score -= 1; // 链接过多可能是导航区域
// 特殊元素检查
if (element.querySelector('nav, .nav, .navigation')) score -= 2; // 包含导航
if (element.querySelector('footer, .footer')) score -= 1; // 包含页脚
if (element.querySelector('.sidebar, .widget')) score -= 1; // 包含侧边栏
console.log(`Found candidate [${selector}] with enhanced score ${score} (text: ${textLength}, children: ${childCount}, paragraphs: ${paragraphCount})`);
if (score > maxScore) {
maxScore = score;
bestCandidate = element;
console.log(`>>> New best candidate: [${selector}] with score ${score}`);
}
}
} catch (e) { console.warn(`Error querying selector "${selector}": ${e.message}`); }
});
// If no good candidate found via specific selectors, use body as last resort
if (!bestCandidate || maxScore < 0) {
console.warn("No suitable specific container found after checking selectors. Attempting fallback strategies...");
// 强制转换策略1: 尝试移除明显的非内容区域后使用body
const bodyClone = document.body.cloneNode(true);
const obviousNonContent = [
'header', 'nav', '.header', '.nav', '.navigation', '.navbar', '.menu',
'footer', '.footer', 'aside', '.sidebar', '.widget-area'
];
obviousNonContent.forEach(sel => {
try {
const elements = bodyClone.querySelectorAll(sel);
elements.forEach(el => el.remove());
} catch (e) {}
});
// 检查处理后的body是否有足够内容
const bodyTextLength = bodyClone.textContent?.trim().length || 0;
if (bodyTextLength > 200) {
console.log("Using processed body as fallback with text length:", bodyTextLength);
bestCandidate = bodyClone;
} else {
// 强制转换策略2: 查找包含最多文本的单个元素
console.log("Attempting to find element with most text content...");
let maxTextElement = null;
let maxTextLength = 0;
document.querySelectorAll('div, section, article, main').forEach(el => {
const textLen = el.textContent?.trim().length || 0;
if (textLen > maxTextLength && textLen > 100) {
maxTextLength = textLen;
maxTextElement = el;
}
});
if (maxTextElement) {
console.log(`Found element with most text (${maxTextLength} chars), using as fallback.`);
bestCandidate = maxTextElement;
} else {
// 最后的强制策略: 使用原始body
console.warn("All fallback strategies failed. Using document.body as absolute last resort.");
bestCandidate = document.body;
}
}
} else {
const likelySelectorIndex = selectors.length - 1 - Math.floor(maxScore);
const likelySelector = selectors[likelySelectorIndex] || 'heuristic/fallback';
console.log(`Selected final container: <${bestCandidate.tagName.toLowerCase()}> (Selector likely: ${likelySelector})`);
}
// --- Clone and Clean ---
try {
if (!bestCandidate || typeof bestCandidate.cloneNode !== 'function') {
console.error("Cannot clone the selected content element."); return null;
}
console.log("Cloning selected container...");
const clone = bestCandidate.cloneNode(true);
// Define selectors for elements to exclude from the conversion.
const excludeSelectors = [
// 页面结构元素
'header', 'footer', 'nav', '.header', '.footer', '.navbar', '.menu', '.toc', '#toc',
'.breadcrumb', '#breadcrumb', '[role="navigation"]', '[role="banner"]', '[role="contentinfo"]',
'aside', '.sidebar', '#sidebar', '.widget-area', '#secondary', '.left-column', '.right-column',
'[role="complementary"]',
// 交互和操作元素
'.actions', '.share', '.social', '.buttons', '.post-meta', '.entry-meta',
'.feedback', '.related-posts', '.like-button-container', '.feedback-container',
'.edit-link', '.print-link', '[role="search"]', '.search', '.search-form',
'.login', '.register', '.signup', '.signin', '.auth-form',
// 评论系统
'#comments', '.comments', '.comment-section', '#respond', '.disqus',
'.livefyre', '.facebook-comments', '.giscus', '.utterances',
// 广告和推广
'.ad', '.ads', '.advertisement', '.adsbygoogle', '[id*="ad-"]', '[class*="ad-"]',
'[class*="advert"]', '.sponsored', '.promoted', '.promo', '.banner-ad',
'.google-ad', '.adsense', '.doubleclick', '.outbrain', '.taboola',
// 弹窗和模态框
'.popup', '.modal', '.overlay', '.lightbox', '.dialog', '.tooltip',
'.cookie-banner', '.cookie-consent', '.tracking-consent', '.gdpr-notice',
'.newsletter-popup', '.subscription-modal', '.survey', '.feedback-form',
// 技术元素
'script', 'style', 'noscript', 'template', 'link[rel="stylesheet"]', 'meta',
'input[type="hidden"]', '.visually-hidden', '.sr-only', '[aria-hidden="true"]',
'.hidden', '.invisible', '.offscreen', 'iframe[src*="ads"]', 'iframe[src*="tracking"]',
// 相关和推荐内容
'.related-articles', '#related-articles', '.related_posts', '.related-content',
'.recommended', '.suggestions', '.more-stories', '.you-might-like',
'.trending', '.popular', '.most-read', '.external-links',
// 社交媒体嵌入(保留内容,移除容器)
'.twitter-embed', '.facebook-embed', '.instagram-embed', '.youtube-embed',
'.social-embed', '.embed-wrapper', '.iframe-wrapper',
// 特定平台元素
'.medium-footer', '.medium-clap', '.medium-highlight-menu',
'.notion-sidebar', '.notion-topbar', '.notion-collection-view-item',
'.zhihu-ad', '.zhihu-recommend', '.zhihu-footer',
'.stackoverflow-sidebar', '.stackoverflow-footer',
'.reddit-sidebar', '.reddit-footer', '.reddit-vote',
// 导航和分页
'.pagination', '.pager', '.page-nav', '.next-prev', '.post-navigation',
'.tag-list', '.category-list', '.archive-list', '.recent-posts',
// 表单元素
'form:not(.content form)', '.form', '.newsletter', '.subscription',
'.contact-form', '.feedback-form', 'input', 'textarea', 'select', 'button:not(.content button)',
// 版权和法律信息
'.copyright', '.legal', '.terms', '.privacy', '.disclaimer',
'.license-info', '.attribution',
// 加载和占位符
'.loading', '.spinner', '.placeholder', '.skeleton', '.lazy-load',
'.intersection-observer', '.lazyload',
// 追踪和分析
'[id*="analytics"]', '[class*="analytics"]', '[id*="tracking"]', '[class*="tracking"]',
'[id*="gtm"]', '[class*="gtm"]', '.google-analytics', '.ga-', '.fb-pixel'
];
// 增强无效元素过滤规则 - 更精确的平台适配
const enhancedExcludeSelectors = [
// 通用无效内容模式
'[style*="display:none"]', '[style*="display: none"]', '[style*="visibility:hidden"]',
'[class*="hidden"]', '[class*="invisible"]', '[id*="hidden"]',
// 更多广告和追踪相关
'[id*="sponsor"]', '[class*="sponsor"]', '[data-ad]', '[data-ads]',
'div[id^="div-gpt-ad"]', '.gpt-ad', '.ad-slot', '.ad-container',
// 更多社交和分享
'.share-bar', '.sharing-tools', '.social-sharing', '.follow-us',
'.subscribe-box', '.newsletter-box', '.email-signup',
// 更多导航和菜单
'.top-menu', '.bottom-menu', '.side-menu', '.mobile-menu',
'.menu-toggle', '.hamburger', '.dropdown-menu',
// 更多元数据和时间戳(根据需要保留或删除)
'.published-date', '.author-info', '.byline', '.meta-info',
'.reading-time', '.word-count', '.view-count',
// 特定内容管理系统
'.wp-block-group', '.wp-block-columns', '.wp-block-cover',
'.elementor-widget', '.vc_row', '.fusion-row',
// 移动端特定元素
'.mobile-only', '.tablet-only', '.desktop-only',
'@media print { display: none }',
// 无障碍和屏幕阅读器专用(通常不需要转换)
'.screen-reader-text', '.assistive-text', '.skip-link'
];
excludeSelectors.push(...enhancedExcludeSelectors);
console.log("Removing excluded elements from clone...");
let removedCount = 0;
// 分阶段清理,先处理明显的非内容元素
const criticalExcludes = [
'script', 'style', 'noscript', 'template', 'meta', 'link[rel="stylesheet"]',
'.ad', '.ads', '.advertisement', '[id*="ad-"]', '[class*="ad-"]',
'header', 'footer', 'nav', '.header', '.footer', '.navbar'
];
// 第一阶段:移除关键非内容元素
criticalExcludes.forEach(selector => {
try {
const elementsToRemove = clone.querySelectorAll(selector);
elementsToRemove.forEach(el => {
if (el !== clone && typeof el.remove === 'function') {
el.remove();
removedCount++;
}
});
} catch (e) { console.warn(`Error removing critical elements for selector "${selector}": ${e.message}`); }
});
// 第二阶段:移除其他非必要元素
const remainingExcludes = excludeSelectors.filter(sel => !criticalExcludes.includes(sel));
for (const selector of remainingExcludes) {
try {
const elementsToRemove = clone.querySelectorAll(selector);
elementsToRemove.forEach(el => {
if (el !== clone && typeof el.remove === 'function') {
// 额外检查:如果元素包含大量文本内容,可能是误删
const textLength = el.textContent?.trim().length || 0;
const isLikelyContent = textLength > 200 && el.querySelectorAll('p').length > 2;
if (!isLikelyContent) {
el.remove();
removedCount++;
} else {
console.log(`Preserved element matching "${selector}" due to substantial content (${textLength} chars)`);
}
} else if (el === clone) {
console.warn(`Exclusion selector "${selector}" matched the container root itself! Skipping removal of root.`);
}
});
} catch (e) { console.warn(`Error removing elements for selector "${selector}": ${e.message}`); }
}
// 第三阶段:清理空元素和只包含空格的元素
try {
const emptyElements = clone.querySelectorAll('*');
emptyElements.forEach(el => {
const text = el.textContent?.trim() || '';
const hasContent = text.length > 0 || el.querySelector('img, video, audio, canvas, svg');
const isStructural = ['div', 'span', 'section', 'article'].includes(el.tagName.toLowerCase());
if (!hasContent && !isStructural && el.children.length === 0) {
el.remove();
removedCount++;
}
});
} catch (e) { console.warn('Error during empty element cleanup:', e.message); }
console.log(`Removed ${removedCount} elements/subtrees from clone.`);
// --- Post-cleaning Check and Recovery ---
const finalTextLength = clone.textContent?.trim().length || 0;
const finalChildCount = clone.childElementCount || 0;
if (finalTextLength < 50 || (finalChildCount === 0 && finalTextLength < 200)) {
console.warn(`Clone seems empty after cleaning! (Text: ${finalTextLength}, Children: ${finalChildCount})`);
console.log("Attempting content recovery...");
// 内容恢复策略:重新克隆并使用更保守的清理
const recoveryClone = bestCandidate.cloneNode(true);
const conservativeExcludes = [
'script', 'style', 'noscript', 'template', 'meta', 'link',
'.ad', '.ads', '.advertisement', 'iframe[src*="ads"]',
'header:not(.content header)', 'footer:not(.content footer)', 'nav:not(.content nav)'
];
conservativeExcludes.forEach(selector => {
try {
const elements = recoveryClone.querySelectorAll(selector);
elements.forEach(el => {
if (el !== recoveryClone) el.remove();
});
} catch (e) {}
});
const recoveredTextLength = recoveryClone.textContent?.trim().length || 0;
if (recoveredTextLength > finalTextLength * 2) {
console.log(`Content recovery successful! Recovered ${recoveredTextLength} chars vs ${finalTextLength} chars.`);
return { title: pageTitle, contentNode: recoveryClone };
} else {
console.warn("Content recovery failed. Proceeding with original cleaned content.");
}
} else {
console.log(`Content cleaning successful. Final content: ${finalTextLength} chars, ${finalChildCount} child elements.`);
}
return { title: pageTitle, contentNode: clone };
} catch (error) {
console.error("Critical error during cloning or cleaning:", error.message, error.stack);
return null;
}
}
/**
* 后处理Markdown内容,清理和优化格式
* @param {string} markdown - 原始markdown内容
* @returns {string} 优化后的markdown内容
*/
function postProcessMarkdown(markdown) {
console.log("Post-processing Markdown content...");
if (!markdown || typeof markdown !== 'string') {
console.warn("Invalid markdown content for post-processing");
return markdown || '';
}
let processed = markdown;
// 1. 清理多余的空行(超过2个连续空行压缩为2个)
processed = processed.replace(/\n\s*\n\s*\n/g, '\n\n');
// 2. 修复列表格式
processed = processed.replace(/\n(\s*[\*\-\+])/g, '\n\n$1');
processed = processed.replace(/(\n\s*[\*\-\+].*)\n([^\n\s\*\-\+])/g, '$1\n\n$2');
// 3. 修复标题前后的空行
processed = processed.replace(/([^\n])\n(#{1,6}\s)/g, '$1\n\n$2');
processed = processed.replace(/(#{1,6}.*)\n([^\n#\s])/g, '$1\n\n$2');
// 4. 清理链接中的多余空格
processed = processed.replace(/\[\s+([^\]]*?)\s+\]/g, '[$1]');
processed = processed.replace(/\(\s+([^\)]*?)\s+\)/g, '($1)');
// 5. 修复代码块格式
processed = processed.replace(/([^\n])\n```/g, '$1\n\n```');
processed = processed.replace(/```\n([^\n])/g, '```\n\n$1');
// 6. 清理引用块格式
processed = processed.replace(/([^\n])\n>/g, '$1\n\n>');
processed = processed.replace(/>\s*\n\n>/g, '>\n>');
// 7. 移除孤立的HTML标签残留
processed = processed.replace(/<\/?[^>]+(>|$)/g, '');
// 8. 清理开头和结尾的多余空行
processed = processed.trim();
// 9. 确保文档以单个换行符结尾
if (processed && !processed.endsWith('\n')) {
processed += '\n';
}
console.log("Markdown post-processing completed");
return processed;
}
// --- Main Conversion and Download Logic ---
function convertAndDownload() {
console.log("Enhanced Convert to Markdown (v2.0): Button clicked..."); // Version updated
try {
// --- Initialize Turndown, Apply GFM, Add Math Rule ---
console.log("Initializing TurndownService...");
if (typeof TurndownService === 'undefined') { throw new Error('TurndownService is not defined.'); }
const turndownService = new TurndownService(turndownOptions);
console.log("Applying GFM plugin...");
if (typeof turndownPluginGfm !== 'undefined' && typeof turndownPluginGfm.gfm === 'function') {
try {
turndownService.use(turndownPluginGfm.gfm);
console.log("GFM applied.");
}
catch (gfmError) { console.error("Error applying GFM plugin:", gfmError); }
} else { console.warn("GFM plugin not loaded."); }
// Define and Add Math Rule (for KaTeX/MathJax)
const mathRule = {}; // Simplified for brevity, keep full logic from previous step
mathRule.filter = function (node, options) {
try {
return (
(node.nodeName === 'SPAN' && (node.classList.contains('katex') || node.classList.contains('MathJax_Preview'))) ||
(node.nodeName === 'DIV' && node.classList.contains('katex-display')) ||
(node.nodeName === 'SCRIPT' && node.getAttribute('type')?.startsWith('math/tex')) ||
(node.getAttribute('role') === 'math')
);
} catch (filterError) { console.error("Error inside MathJax filter function:", filterError, "Node:", node); return false; }
};
mathRule.replacement = function (content, node, options) {
let latex = '', delimiter = '$';
try {
if (node.nodeName === 'SCRIPT') {
latex = node.textContent || '';
if (node.getAttribute('type')?.includes('mode=display') || latex.trim().startsWith('\\display')) { delimiter = '$$'; }
} else if (node.dataset && node.dataset.originalLatex) {
latex = node.dataset.originalLatex;
if (node.classList.contains('katex-display') || node.closest('.MathJax_Display')) { delimiter = '$$'; }
} else if (node.getAttribute('aria-label')) {
latex = node.getAttribute('aria-label');
if (node.nodeName === 'DIV' || node.classList.contains('katex-display') || node.closest('.MathJax_Display')) { delimiter = '$$'; }
} else if (node.classList.contains('katex')) {
const annotation = node.querySelector('annotation[encoding="application/x-tex"]');
if (annotation) {
latex = annotation.textContent || '';
if (node.classList.contains('katex-display')) { delimiter = '$$'; }
}
} else if (node.nodeName === 'MATH' && node.getAttribute('alttext')) {
latex = node.getAttribute('alttext');
if (node.getAttribute('display') === 'block') { delimiter = '$$'; }
}
if (latex) {
latex = latex.trim();
if ((latex.startsWith('$$') && latex.endsWith('$$')) || (latex.startsWith('$') && latex.endsWith('$') && !latex.startsWith('$$'))) { return latex; }
return `${delimiter}${latex}${delimiter}`;
}
return '';
} catch (ruleError) { console.error("Error processing math rule replacement for node:", node, ruleError); return ''; }
};
try {
console.log("Adding Math rule...");
if (typeof mathRule.filter !== 'function') { throw new Error("Math rule filter is not a function!"); }
turndownService.addRule('mathjaxKatex', mathRule);
console.log("Math rule added.");
} catch (addRuleError) { console.error("Failed to add Math rule:", addRuleError); }
// 添加图片处理规则
turndownService.addRule('images', {
filter: 'img',
replacement: function(content, node) {
const alt = node.getAttribute('alt') || '';
const src = node.getAttribute('src') || node.getAttribute('data-src') || '';
const title = node.getAttribute('title') ? ` "${node.getAttribute('title')}"` : '';
if (!src) return alt ? `[${alt}]` : '';
// 处理相对URL
let fullSrc = src;
if (src.startsWith('//')) {
fullSrc = window.location.protocol + src;
} else if (src.startsWith('/')) {
fullSrc = window.location.origin + src;
} else if (!src.startsWith('http')) {
fullSrc = new URL(src, window.location.href).href;
}
return ``;
}
});
// 添加代码块处理规则
turndownService.addRule('codeBlocks', {
filter: ['pre'],
replacement: function(content, node) {
const codeElement = node.querySelector('code');
const language = codeElement ?
(codeElement.className.match(/language-(\w+)/) ||
codeElement.className.match(/lang-(\w+)/) ||
[])[1] || '' : '';
return '\n\n```' + language + '\n' + content + '\n```\n\n';
}
});
// 添加表格处理规则(增强)
turndownService.addRule('tables', {
filter: 'table',
replacement: function(content, node) {
const rows = Array.from(node.querySelectorAll('tr'));
if (rows.length === 0) return content;
let markdown = '\n\n';
let hasHeader = false;
rows.forEach((row, index) => {
const cells = Array.from(row.querySelectorAll('td, th'));
if (cells.length === 0) return;
const isHeader = row.querySelector('th') || (index === 0 && !hasHeader);
if (isHeader) hasHeader = true;
const cellContents = cells.map(cell => {
return cell.textContent.trim().replace(/\|/g, '\\|').replace(/\n/g, ' ');
});
markdown += '| ' + cellContents.join(' | ') + ' |\n';
// 添加表头分隔行
if (isHeader && index === 0) {
markdown += '|' + ' --- |'.repeat(cells.length) + '\n';
}
});
return markdown + '\n';
}
});
// 添加引用块处理
turndownService.addRule('blockquotes', {
filter: 'blockquote',
replacement: function(content, node) {
const cite = node.querySelector('cite');
const attribution = cite ? `\n\n— ${cite.textContent.trim()}` : '';
return '\n\n> ' + content.trim().replace(/\n/g, '\n> ') + attribution + '\n\n';
}
});
// 添加视频处理规则
turndownService.addRule('videos', {
filter: ['video', 'iframe'],
replacement: function(content, node) {
if (node.tagName === 'VIDEO') {
const src = node.getAttribute('src') || node.querySelector('source')?.getAttribute('src') || '';
const poster = node.getAttribute('poster') || '';
const alt = node.getAttribute('alt') || 'Video';
if (poster) {
return `[](${src})`;
}
return `[${alt}](${src})`;
}
if (node.tagName === 'IFRAME') {
const src = node.getAttribute('src') || '';
const title = node.getAttribute('title') || 'Embedded content';
// 检测YouTube等视频平台
if (src.includes('youtube.com') || src.includes('youtu.be')) {
const videoId = src.match(/(?:youtube\.com\/embed\/|youtu\.be\/)([^?&]+)/)?.[1];
if (videoId) {
return `\n\n[](${src})\n\n`;
}
}
return `\n\n[${title}](${src})\n\n`;
}
return content;
}
});
// --- Perform Conversion ---
console.log("Getting page content node...");
const pageData = getPageContentNode();
if (!pageData || !pageData.contentNode) {
console.error("Failed to get valid page content node. Aborting.");
alert("Could not get a valid page content node for conversion.");
return;
}
console.log(`Content node retrieved. Title: ${pageData.title}. Starting conversion...`);
let markdownContent = '';
try {
markdownContent = turndownService.turndown(pageData.contentNode);
console.log("Markdown conversion complete. Applying post-processing...");
// 应用后处理优化
markdownContent = postProcessMarkdown(markdownContent);
console.log("Final markdown processing completed.");
} catch (convertError) {
console.error("Error during Turndown conversion:", convertError.message, convertError.stack);
alert(`Error during Markdown conversion: ${convertError.message}`);
return;
}
// 内容质量检查
const finalLength = markdownContent.trim().length;
const lineCount = markdownContent.split('\n').length;
const wordCount = markdownContent.split(/\s+/).length;
console.log(`Conversion quality metrics: ${finalLength} chars, ${lineCount} lines, ~${wordCount} words`);
if (!markdownContent || markdownContent.trim() === '') {
console.warn("Conversion resulted in empty Markdown content.");
alert("Warning: The converted Markdown content is empty. This might indicate that the page structure is not supported or contains mostly non-text content.");
return;
}
if (finalLength < 100) {
const proceed = confirm(`Warning: The converted content is very short (${finalLength} characters). This might indicate incomplete conversion. Do you want to proceed with the download?`);
if (!proceed) {
console.log("User chose to cancel due to short content length.");
return;
}
}
// --- Prepare Filename & Download ---
const filename = sanitizeFilename(pageData.title) + ".md";
/**
* 尝试使用GM_download下载,失败时回退到浏览器下载
*/
function downloadMarkdown(content, fileName) {
console.log(`Attempting to download ${fileName}...`);
// 方法1: 尝试使用GM_download (Tampermonkey原生方法)
if (typeof GM_download === 'function') {
const dataUri = `data:text/markdown;charset=utf-8,${encodeURIComponent(content)}`;
GM_download({
url: dataUri,
name: fileName,
saveAs: true,
onload: () => {
console.log("Download completed successfully via GM_download");
},
onerror: (err) => {
console.warn('GM_download failed:', err);
if (err.error === 'not_whitelisted') {
console.log("File extension not whitelisted. Attempting fallback download method...");
// 显示友好的提示信息
const userChoice = confirm(
"🚫 Tampermonkey下载被阻止\n\n" +
"原因:.md文件扩展名未在Tampermonkey白名单中\n\n" +
"解决方案:\n" +
"✅ 点击'确定' - 使用浏览器下载(推荐,无需设置)\n" +
"⚙️ 点击'取消' - 查看详细设置指南\n\n" +
"注意:浏览器下载功能完全正常,您可以放心使用!"
);
if (userChoice) {
console.log("User chose browser download method");
fallbackDownload(content, fileName);
} else {
console.log("User wants to see setup guide");
// 提示用户查看主菜单中的设置指南
setTimeout(() => {
alert("请在Tampermonkey菜单中点击 '📥 Download Settings Guide' 查看详细设置说明");
}, 100);
}
} else {
console.error("GM_download error:", err.error || 'Unknown error');
// 其他错误也使用备用下载
console.log("Using fallback download due to GM_download error");
fallbackDownload(content, fileName);
}
}
});
} else {
console.warn("GM_download not available, using fallback method");
fallbackDownload(content, fileName);
}
}
/**
* 备用下载方法:使用浏览器的下载API
*/
function fallbackDownload(content, fileName) {
console.log("Using fallback download method...");
try {
// 方法2: 使用Blob和URL.createObjectURL
const blob = new Blob([content], { type: 'text/markdown;charset=utf-8' });
const url = URL.createObjectURL(blob);
// 创建临时下载链接
const downloadLink = document.createElement('a');
downloadLink.href = url;
downloadLink.download = fileName;
downloadLink.style.display = 'none';
// 添加到DOM,触发下载,然后清理
document.body.appendChild(downloadLink);
downloadLink.click();
document.body.removeChild(downloadLink);
// 清理对象URL
setTimeout(() => {
URL.revokeObjectURL(url);
}, 100);
console.log("Fallback download initiated successfully");
// 显示美观的成功提示
const notification = document.createElement('div');
notification.style.cssText = `
position: fixed;
top: 20px;
right: 20px;
background: linear-gradient(135deg, #4caf50, #45a049);
color: white;
padding: 16px 24px;
border-radius: 8px;
z-index: 10000;
font-family: 'Segoe UI', Arial, sans-serif;
font-size: 14px;
box-shadow: 0 4px 12px rgba(76, 175, 80, 0.3);
border-left: 4px solid #2e7d32;
min-width: 300px;
animation: slideIn 0.3s ease-out;
`;
notification.innerHTML = `
<div style="display: flex; align-items: center;">
<div style="font-size: 20px; margin-right: 10px;">✅</div>
<div>
<div style="font-weight: bold; margin-bottom: 4px;">下载成功!</div>
<div style="font-size: 12px; opacity: 0.9;">文件名: ${fileName}</div>
</div>
</div>
`;
// 添加CSS动画
if (!document.getElementById('download-notification-style')) {
const style = document.createElement('style');
style.id = 'download-notification-style';
style.textContent = `
@keyframes slideIn {
from { transform: translateX(100%); opacity: 0; }
to { transform: translateX(0); opacity: 1; }
}
@keyframes slideOut {
from { transform: translateX(0); opacity: 1; }
to { transform: translateX(100%); opacity: 0; }
}
`;
document.head.appendChild(style);
}
document.body.appendChild(notification);
// 3秒后淡出
setTimeout(() => {
notification.style.animation = 'slideOut 0.3s ease-in';
setTimeout(() => {
if (notification.parentNode) {
notification.parentNode.removeChild(notification);
}
}, 300);
}, 3000);
} catch (fallbackError) {
console.error("Fallback download also failed:", fallbackError);
// 方法3: 最后的备用方案 - 显示内容让用户手动复制
const modalContent = `
<div style="
position: fixed; top: 0; left: 0; width: 100%; height: 100%;
background: rgba(0,0,0,0.85); z-index: 10000;
display: flex; align-items: center; justify-content: center;
font-family: 'Segoe UI', Arial, sans-serif;
">
<div style="
background: white; padding: 30px; border-radius: 12px;
max-width: 85%; max-height: 85%; overflow: hidden;
box-shadow: 0 20px 40px rgba(0,0,0,0.3);
display: flex; flex-direction: column;
">
<div style="margin-bottom: 20px;">
<h2 style="color: #333; margin: 0 0 10px 0; display: flex; align-items: center;">
<span style="font-size: 24px; margin-right: 10px;">📋</span>
手动保存 Markdown 文件
</h2>
<p style="color: #666; margin: 0; line-height: 1.5;">
自动下载失败,请复制以下内容并手动保存为
<code style="background:#f0f0f0;padding:2px 6px;border-radius:3px;color:#e91e63;">${fileName}</code>
</p>
</div>
<div style="flex: 1; display: flex; flex-direction: column; min-height: 0;">
<textarea readonly style="
width: 100%;
height: 400px;
font-family: 'Consolas', 'Monaco', monospace;
font-size: 13px;
border: 2px solid #e0e0e0;
border-radius: 6px;
padding: 15px;
resize: none;
outline: none;
background: #fafafa;
line-height: 1.4;
flex: 1;
" id="markdown-content">${content.replace(/</g, '<').replace(/>/g, '>')}</textarea>
</div>
<div style="margin-top: 20px; text-align: center;">
<button onclick="
const textarea = document.getElementById('markdown-content');
textarea.select();
textarea.setSelectionRange(0, 99999);
try {
const successful = document.execCommand('copy');
if (successful) {
this.innerHTML = '✅ 已复制到剪贴板!';
this.style.background = 'linear-gradient(135deg, #4caf50, #45a049)';
setTimeout(() => {
this.innerHTML = '📋 复制内容';
this.style.background = 'linear-gradient(135deg, #2196f3, #1976d2)';
}, 2000);
} else {
throw new Error('Copy command failed');
}
} catch (err) {
// 使用现代 Clipboard API
if (navigator.clipboard && navigator.clipboard.writeText) {
navigator.clipboard.writeText(textarea.value).then(() => {
this.innerHTML = '✅ 已复制到剪贴板!';
this.style.background = 'linear-gradient(135deg, #4caf50, #45a049)';
setTimeout(() => {
this.innerHTML = '📋 复制内容';
this.style.background = 'linear-gradient(135deg, #2196f3, #1976d2)';
}, 2000);
}).catch(() => {
alert('复制失败,请手动选择文本内容进行复制');
});
} else {
alert('复制失败,请手动选择文本内容进行复制');
}
}
" style="
padding: 12px 24px;
background: linear-gradient(135deg, #2196f3, #1976d2);
color: white;
border: none;
border-radius: 6px;
cursor: pointer;
font-size: 14px;
font-weight: 500;
margin-right: 10px;
transition: all 0.3s ease;
" onmouseover="this.style.transform='translateY(-1px)'" onmouseout="this.style.transform='translateY(0)'">
📋 复制内容
</button>
<button onclick="this.parentElement.parentElement.parentElement.remove()"
style="
padding: 12px 24px;
background: linear-gradient(135deg, #757575, #616161);
color: white;
border: none;
border-radius: 6px;
cursor: pointer;
font-size: 14px;
font-weight: 500;
transition: all 0.3s ease;
" onmouseover="this.style.transform='translateY(-1px)'" onmouseout="this.style.transform='translateY(0)'">
❌ 关闭
</button>
</div>
<div style="margin-top: 15px; padding: 10px; background: #f5f5f5; border-radius: 6px; font-size: 12px; color: #666; text-align: center;">
💡 提示:复制后可以粘贴到任何文本编辑器中,然后保存为 .md 文件
</div>
</div>
</div>
`;
const modalDiv = document.createElement('div');
modalDiv.innerHTML = modalContent;
document.body.appendChild(modalDiv);
}
}
// 执行下载
downloadMarkdown(markdownContent, filename);
} catch (error) {
console.error("Critical error during convertAndDownload:", error.message, error.stack);
alert(`A critical error occurred while running the script: ${error.message}`);
}
}
// --- Register Menu Command ---
if (typeof GM_registerMenuCommand === 'function') {
try {
// 主要转换功能
GM_registerMenuCommand("🔄 Convert Page to Markdown (v2.0 Enhanced)", convertAndDownload, "m");
// 合并的下载设置指南 - 移至主菜单级别
GM_registerMenuCommand("📥 Download Settings Guide", function() {
const unifiedHelpContent = `
<div style="
position: fixed; top: 0; left: 0; width: 100%; height: 100%;
background: rgba(0,0,0,0.9); z-index: 10000;
display: flex; align-items: center; justify-content: center;
font-family: 'Segoe UI', Arial, sans-serif;
">
<div style="
background: white; padding: 30px; border-radius: 12px;
max-width: 720px; max-height: 90%; overflow: auto;
box-shadow: 0 20px 40px rgba(0,0,0,0.3);
">
<h2 style="color: #333; margin-top: 0; text-align: center; display: flex; align-items: center; justify-content: center;">
<span style="font-size: 28px; margin-right: 10px;">📥</span>
下载设置完整指南
</h2>
<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 18px; border-radius: 10px; margin: 20px 0; text-align: center;">
<strong style="font-size: 16px;">🎯 目标:允许Tampermonkey下载.md文件</strong><br>
<div style="margin-top: 8px; font-size: 14px; opacity: 0.9;">
如果设置失败,脚本会自动使用浏览器下载,无需担心!
</div>
</div>
<h3 style="color: #555; border-bottom: 2px solid #e0e0e0; padding-bottom: 8px;">🔧 详细设置步骤</h3>
<div style="background: #f8f9fa; padding: 25px; border-radius: 10px; line-height: 1.7; margin: 15px 0;">
<div style="margin-bottom: 20px;">
<strong style="color: #2e7d32; font-size: 15px;">1️⃣ 打开Tampermonkey管理界面</strong>
<ul style="margin: 8px 0; padding-left: 25px; color: #555;">
<li>点击浏览器工具栏的Tampermonkey图标 🐒</li>
<li>选择 "管理面板" 或 "Dashboard"</li>
</ul>
</div>
<div style="margin-bottom: 20px;">
<strong style="color: #2e7d32; font-size: 15px;">2️⃣ 进入设置页面</strong>
<ul style="margin: 8px 0; padding-left: 25px; color: #555;">
<li>点击页面顶部的 "设置" 或 "Settings" 标签</li>
<li>向下滚动找到 "Advanced" 部分</li>
</ul>
</div>
<div style="margin-bottom: 20px;">
<strong style="color: #2e7d32; font-size: 15px;">3️⃣ 配置下载白名单</strong>
<ul style="margin: 8px 0; padding-left: 25px; color: #555;">
<li>找到 "Downloads BETA" 选项</li>
<li>在 "Whitelist" 输入框中添加:</li>
</ul>
<div style="text-align: center; margin: 10px 0;">
<code style="background: linear-gradient(135deg, #ff9a9e 0%, #fecfef 100%); color: #333; padding: 10px 20px; border-radius: 6px; font-size: 16px; font-weight: bold; display: inline-block;">*.md</code>
</div>
</div>
<div>
<strong style="color: #2e7d32; font-size: 15px;">4️⃣ 保存设置</strong>
<ul style="margin: 8px 0; padding-left: 25px; color: #555;">
<li>滚动到页面底部点击 "Save" 按钮</li>
<li>刷新当前页面以使设置生效</li>
</ul>
</div>
</div>
<div style="background: #fff3cd; border-left: 4px solid #ffc107; padding: 15px; border-radius: 8px; margin: 20px 0;">
<strong style="color: #8a6d00;">💡 浏览器特殊说明</strong><br>
<div style="margin-top: 8px; color: #6c5500; line-height: 1.5;">
<strong>Edge浏览器用户:</strong> 如果无法直接访问设置,可以:<br>
• 右键点击Tampermonkey图标 → 选择 "扩展选项"<br>
• 或在地址栏输入:<code style="background: rgba(0,0,0,0.1); padding: 2px 6px; border-radius: 3px;">edge://extensions/</code>
</div>
</div>
<div style="background: #e8f5e8; border-left: 4px solid #4caf50; padding: 15px; border-radius: 8px; margin: 20px 0;">
<strong style="color: #2e7d32;">✅ 备用下载方案</strong><br>
<div style="margin-top: 8px; color: #2e7d32; line-height: 1.5;">
即使无法设置Tampermonkey权限,脚本也会自动使用浏览器原生下载功能,<br>
保证您能够成功获取转换后的Markdown文件!
</div>
</div>
<div style="background: #f3e5f5; border-left: 4px solid #9c27b0; padding: 15px; border-radius: 8px; margin: 20px 0;">
<strong style="color: #6a1b9a;">🔧 常见问题解答</strong><br>
<div style="margin-top: 8px; color: #6a1b9a; line-height: 1.5;">
<strong>Q:</strong> 下载被浏览器阻止?<br>
<strong>A:</strong> 检查浏览器弹窗拦截设置,允许当前网站的下载<br><br>
<strong>Q:</strong> 文件名显示乱码?<br>
<strong>A:</strong> 使用支持UTF-8编码的文本编辑器打开文件<br><br>
<strong>Q:</strong> 转换内容不完整?<br>
<strong>A:</strong> 刷新页面后重新尝试转换
</div>
</div>
<div style="text-align: center; margin-top: 30px;">
<button onclick="this.parentElement.parentElement.parentElement.remove()"
style="
padding: 15px 35px;
background: linear-gradient(135deg, #4caf50, #45a049);
color: white;
border: none;
border-radius: 8px;
cursor: pointer;
font-size: 16px;
font-weight: bold;
box-shadow: 0 4px 15px rgba(76, 175, 80, 0.3);
transition: all 0.3s ease;
"
onmouseover="this.style.transform='translateY(-2px)'; this.style.boxShadow='0 6px 20px rgba(76, 175, 80, 0.4)'"
onmouseout="this.style.transform='translateY(0)'; this.style.boxShadow='0 4px 15px rgba(76, 175, 80, 0.3)'">
✓ 我已了解,关闭指南
</button>
</div>
</div>
</div>
`;
const unifiedHelpDiv = document.createElement('div');
unifiedHelpDiv.innerHTML = unifiedHelpContent;
document.body.appendChild(unifiedHelpDiv);
}, "h");
console.log("Menu commands registered.");
} catch (registerError) { console.error("Failed to register menu command:", registerError); alert("Failed to register menu command!"); }
} else { console.error("GM_registerMenuCommand is not available."); alert("GM_registerMenuCommand is not available!"); }
console.log("Enhanced Webpage to Markdown (v2.0) script finished loading."); // Version updated
})();