MyDealz Scraper | Full Thread Exporter

Scrapes all comments from MyDealz threads via GraphQL API and exports to clean TXT format

您需要先安裝使用者腳本管理器擴展,如 TampermonkeyGreasemonkeyViolentmonkey 之後才能安裝該腳本。

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyViolentmonkey 後才能安裝該腳本。

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyViolentmonkey 後才能安裝該腳本。

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyUserscripts 後才能安裝該腳本。

你需要先安裝一款使用者腳本管理器擴展,比如 Tampermonkey,才能安裝此腳本

您需要先安裝使用者腳本管理器擴充功能後才能安裝該腳本。

(我已經安裝了使用者腳本管理器,讓我安裝!)

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

(我已經安裝了使用者樣式管理器,讓我安裝!)

// ==UserScript==
// @name         MyDealz Scraper | Full Thread Exporter
// @namespace    Violentmonkey
// @version      2.5
// @description  Scrapes all comments from MyDealz threads via GraphQL API and exports to clean TXT format
// @author       Piknockyou/Antigravity
// @match        https://www.mydealz.de/deals/*
// @match        https://www.mydealz.de/diskussion/*
// @match        https://www.mydealz.de/feedback/*
// @match        https://www.mydealz.de/gutscheine/*
// @icon         https://www.google.com/s2/favicons?sz=64&domain=mydealz.de
// @grant        none
// @run-at       document-idle
// ==/UserScript==

(function () {
    'use strict';

    let state = {
        isScraping: false,
        abortController: null,
        threadId: null,
        xsrfToken: null,
        threadTitle: '',
        allComments: [],
        totalPages: 0,
        totalRoot: 0,
        totalReplies: 0
    };

    function getCookie(name) {
        const value = `; ${document.cookie}`;
        const parts = value.split(`; ${name}=`);
        if (parts.length === 2) return parts.pop().split(';').shift();
    }

    function getThreadId() {
        const el = document.querySelector('[data-thread-id]');
        if (el) return el.dataset.threadId;
        const match = window.location.pathname.match(/-(\d+)(?:\?|$)/);
        return match ? match[1] : null;
    }

    function sanitizeFilename(name) {
        return (name || 'mydealz_export').replace(/[<>:"/\\|?*]/g, '_').replace(/\s+/g, ' ').trim().substring(0, 200);
    }

    function sleep(ms) {
        return new Promise(resolve => setTimeout(resolve, ms));
    }

    function cleanHtml(html) {
        const temp = document.createElement('div');
        temp.innerHTML = html;

        // Convert br tags to newlines
        temp.querySelectorAll('br').forEach(br => br.replaceWith('\n'));

        // Convert p tags to paragraphs with double newlines
        temp.querySelectorAll('p').forEach(p => {
            p.insertAdjacentText('afterend', '\n\n');
        });

        // Replace emojis with their text
        temp.querySelectorAll('i.emoji').forEach(emoji => {
            emoji.replaceWith(emoji.getAttribute('title') || '');
        });

        return temp.textContent.trim();
    }

    function formatDate(timestamp) {
        return new Date(timestamp * 1000).toLocaleString('de-DE');
    }

    function formatReactions(reactionCounts) {
        if (!reactionCounts || reactionCounts.length === 0) return '';
        const icons = { 'LIKE': '👍', 'FUNNY': '😂', 'HELPFUL': '💡' };
        return reactionCounts.map(r => `${icons[r.type] || '❓'} ${r.count}`).join('  |  ');
    }

    function getThreadMetadata() {
        const metadata = {
            title: '',
            merchant: '',
            postedDate: '',
            temperature: '',
            author: '',
            description: '',
            additionalInfo: []
        };

        // Title
        metadata.title = document.querySelector('h1.thread-title')?.textContent.trim() || '';

        // Merchant
        metadata.merchant = document.querySelector('.threadItem-content a[data-t="merchantLink"]')?.textContent.trim() || '';

        // Posted Date
        const dateEl = document.querySelector('.threadItem-content time[title]');
        metadata.postedDate = dateEl?.getAttribute('title') || dateEl?.textContent.trim() || '';

        // Temperature
        metadata.temperature = document.querySelector('.vote-temp')?.textContent.trim() || '';

        // Author (different selector for discussions)
        let authorEl = document.querySelector('.threadItemCard-author .thread-user');
        if (!authorEl) {
            authorEl = document.querySelector('.short-profile-target .thread-user');
        }
        metadata.author = authorEl?.textContent.trim() || '';

        // Description (clean HTML) - works for deals/vouchers
        let descEl = document.querySelector('div[data-t="description"]');
        // Fallback for discussions
        if (!descEl) {
            descEl = document.querySelector('#threadDescriptionItemPortal .userHtml-content');
        }
        if (descEl) {
            const descClone = descEl.cloneNode(true);
            // Remove images
            descClone.querySelectorAll('img').forEach(img => img.remove());
            metadata.description = cleanHtml(descClone.innerHTML);
        }

        // Additional Info (user tips)
        document.querySelectorAll('#additionalInfoPortal .threadInfo-item').forEach(item => {
            const author = item.querySelector('.user button')?.textContent.trim();
            const date = item.querySelector('time')?.textContent.trim();
            const body = item.querySelector('.comment-body .userHtml-content');
            if (author && body) {
                metadata.additionalInfo.push({
                    author,
                    date,
                    content: cleanHtml(body.innerHTML)
                });
            }
        });

        return metadata;
    }

    async function fetchRootComments(page) {
        const query = `
        query comments($filter: CommentFilter!, $limit: Int, $page: Int) {
          comments(filter: $filter, limit: $limit, page: $page) {
            items {
              commentId
              user { username }
              preparedHtmlContent
              reactionCounts { type count }
              createdAtTs
              wasEdited
              isPinned
              replyCount
            }
            pagination { current last }
          }
        }`;

        const response = await fetch("https://www.mydealz.de/graphql", {
            method: 'POST',
            headers: {
                'content-type': 'application/json',
                'x-pepper-txn': 'threads.show.deal',
                'x-request-type': 'application/vnd.pepper.v1+json',
                'x-requested-with': 'XMLHttpRequest',
                'x-xsrf-token': state.xsrfToken
            },
            body: JSON.stringify({
                query,
                variables: {
                    filter: { threadId: { eq: state.threadId }, order: { direction: "Ascending" } },
                    page,
                    limit: 100
                }
            })
        });

        const data = await response.json();
        return data.errors ? null : data.data.comments;
    }

    async function fetchNestedReplies(mainCommentId) {
        const query = `
        query comments($filter: CommentFilter!, $limit: Int, $page: Int) {
          comments(filter: $filter, limit: $limit, page: $page) {
            items {
              commentId
              user { username }
              preparedHtmlContent
              reactionCounts { type count }
              createdAtTs
              wasEdited
            }
          }
        }`;

        const response = await fetch("https://www.mydealz.de/graphql", {
            method: 'POST',
            headers: {
                'content-type': 'application/json',
                'x-pepper-txn': 'threads.show.deal',
                'x-request-type': 'application/vnd.pepper.v1+json',
                'x-requested-with': 'XMLHttpRequest',
                'x-xsrf-token': state.xsrfToken
            },
            body: JSON.stringify({
                query,
                variables: {
                    filter: {
                        mainCommentId,
                        threadId: { eq: state.threadId },
                        order: { direction: "Ascending" }
                    },
                    page: 1,
                    limit: 100
                }
            })
        });

        const data = await response.json();
        return data.errors ? [] : data.data.comments.items;
    }

    async function toggleScraping() {
        const btn = document.getElementById('md-scraper-btn');

        if (state.isScraping) {
            // STOP SCRAPING
            if (state.abortController) {
                state.abortController.abort();
            }
            state.isScraping = false;
            btn.textContent = 'Stopping...';
            return;
        }

        // START SCRAPING
        state.isScraping = true;
        state.abortController = new AbortController();
        const signal = state.abortController.signal;

        btn.textContent = 'Stop Scraping';
        btn.style.background = '#dc3545'; // Red color for stop

        state.threadId = getThreadId();
        state.xsrfToken = decodeURIComponent(getCookie('xsrf_t'));
        state.threadTitle = document.querySelector('h1.thread-title')?.textContent.trim() || 'MyDealz Thread';
        state.allComments = [];
        state.totalRoot = 0;
        state.totalReplies = 0;

        if (!state.threadId) {
            alert('Error: Could not determine Thread ID.');
            resetUI();
            return;
        }

        updateProgress('Scanning...', 0, 0, 0, 0);

        try {
            const firstPage = await fetchRootComments(1);
            if (!firstPage) throw new Error('Failed to fetch comments');

            state.totalPages = firstPage.pagination.last;
            let expectedReplies = firstPage.items.reduce((sum, c) => sum + (c.replyCount || 0), 0);
            let totalComments = firstPage.items.length;

            // Pre-scan for totals
            for (let p = 2; p <= state.totalPages; p++) {
                if (signal.aborted) break;
                const pageData = await fetchRootComments(p);
                if (pageData) {
                    totalComments += pageData.items.length;
                    expectedReplies += pageData.items.reduce((sum, c) => sum + (c.replyCount || 0), 0);
                }
            }

            updateProgress('Scraping...', 0, totalComments, 0, expectedReplies);
            await sleep(500);

            let processedComments = 0;
            let processedReplies = 0;

            for (let page = 1; page <= state.totalPages; page++) {
                if (signal.aborted) break;

                const pageData = await fetchRootComments(page);
                if (!pageData) continue;

                for (const comment of pageData.items) {
                    if (signal.aborted) break;

                    processedComments++;
                    updateProgress('Scraping...', processedComments, totalComments, processedReplies, expectedReplies);

                    const commentData = { ...comment, replies: [] };

                    if (comment.replyCount > 0) {
                        const replies = await fetchNestedReplies(comment.commentId);
                        commentData.replies = replies;
                        processedReplies += replies.length;
                        state.totalReplies += replies.length;
                        updateProgress('Scraping...', processedComments, totalComments, processedReplies, expectedReplies);
                        await sleep(100);
                    }

                    state.allComments.push(commentData);
                    state.totalRoot++;
                }
                await sleep(200);
            }

            // Auto-download if we have data
            if (state.allComments.length > 0) {
                updateProgress(signal.aborted ? 'Stopped & Downloading...' : 'Complete! Downloading...', processedComments, totalComments, processedReplies, expectedReplies);
                await sleep(1000); // Give user a moment to see the status
                exportTxt();
            } else {
                updateProgress('No data found.', 0, 0, 0, 0);
            }

        } catch (err) {
            console.error(err);
            alert('Error: ' + err.message);
        } finally {
            resetUI();
        }
    }

    function resetUI() {
        state.isScraping = false;
        state.abortController = null;
        const btn = document.getElementById('md-scraper-btn');
        if (btn) {
            btn.textContent = 'Start Scraping';
            btn.style.background = '#03a5c1';
        }
    }

    function exportTxt() {
        const meta = getThreadMetadata();

        let text = `${meta.title}\n${'='.repeat(meta.title.length)}\n\n`;

        // Deal Info
        if (meta.merchant) text += `Merchant: ${meta.merchant}\n`;
        if (meta.postedDate) text += `Posted: ${meta.postedDate}\n`;
        if (meta.temperature) text += `Temperature: ${meta.temperature}\n`;
        if (meta.author) text += `Author: ${meta.author}\n`;
        text += `\n`;

        // Description
        if (meta.description) {
            text += `DESCRIPTION:\n${'-'.repeat(60)}\n${meta.description}\n\n`;
        }

        // Additional Info (user tips)
        if (meta.additionalInfo.length > 0) {
            text += `ADDITIONAL TIPS:\n${'-'.repeat(60)}\n`;
            meta.additionalInfo.forEach(tip => {
                text += `[${tip.author}] - ${tip.date}\n${tip.content}\n\n`;
            });
            text += '\n';
        }

        // Comments header
        text += `COMMENTS:\n${'-'.repeat(60)}\n`;
        text += `Total: ${state.totalRoot} comments + ${state.totalReplies} replies = ${state.totalRoot + state.totalReplies} items\n\n`;

        for (const comment of state.allComments) {
            const badges = [];
            if (comment.isPinned) badges.push('📌 PINNED');
            if (comment.wasEdited) badges.push('✏️ EDITED');
            const badgeStr = badges.length > 0 ? `  [${badges.join(' ')}]` : '';

            text += `[${comment.user.username}] - ${formatDate(comment.createdAtTs)}${badgeStr}\n`;

            const reactions = formatReactions(comment.reactionCounts);
            if (reactions) text += `${reactions}\n`;

            text += `\n${cleanHtml(comment.preparedHtmlContent)}\n\n${'-'.repeat(40)}\n\n`;

            for (const reply of comment.replies) {
                const replyBadge = reply.wasEdited ? '  [✏️ EDITED]' : '';
                text += `  >> [${reply.user.username}] - ${formatDate(reply.createdAtTs)}${replyBadge}\n`;

                const replyReactions = formatReactions(reply.reactionCounts);
                if (replyReactions) text += `  ${replyReactions}\n`;

                text += `\n  ${cleanHtml(reply.preparedHtmlContent).replace(/\n/g, '\n  ')}\n\n  ${'-'.repeat(40)}\n\n`;
            }
        }

        const blob = new Blob([text], { type: 'text/plain;charset=utf-8' });
        const url = URL.createObjectURL(blob);
        const a = document.createElement('a');
        a.href = url;
        a.download = `${sanitizeFilename(state.threadTitle)}.txt`;
        a.click();
        URL.revokeObjectURL(url);
    }

    function createFloatingUI() {
        if (document.getElementById('md-scraper-ui')) return;

        const container = document.createElement('div');
        container.id = 'md-scraper-ui';
        container.style.cssText = `
            position: fixed; bottom: 20px; right: 20px; z-index: 99999;
            background: white; padding: 15px; border-radius: 8px;
            box-shadow: 0 4px 12px rgba(0,0,0,0.2);
            display: flex; flex-direction: column; gap: 10px; width: 220px;
            font-family: sans-serif;
        `;

        const title = document.createElement('div');
        title.textContent = 'MyDealz Scraper';
        title.style.cssText = 'font-weight: bold; color: #03a5c1; border-bottom: 1px solid #eee; padding-bottom: 5px; font-size: 13px; text-align: center;';

        const statusMsg = document.createElement('div');
        statusMsg.id = 'md-scraper-msg';
        statusMsg.textContent = 'Ready';
        statusMsg.style.cssText = 'font-size: 11px; color: #666; text-align: center; height: 15px;';

        // Table for perfectly aligned numbers
        const progressTable = document.createElement('table');
        progressTable.id = 'md-scraper-progress';
        progressTable.style.cssText = `
            display: none; width: 100%; font-family: "Courier New", monospace; 
            font-size: 11px; color: #333; border-collapse: collapse;
        `;
        progressTable.innerHTML = `
            <tr style="height: 18px;">
                <td style="width: 60px;">Comments</td>
                <td style="width: 35px; text-align: right;" id="prog-c-curr">0</td>
                <td style="width: 15px; text-align: center;">/</td>
                <td style="width: 35px; text-align: left;" id="prog-c-total">0</td>
            </tr>
            <tr style="height: 18px;">
                <td>Replies</td>
                <td style="width: 35px; text-align: right;" id="prog-r-curr">0</td>
                <td style="width: 15px; text-align: center;">/</td>
                <td style="width: 35px; text-align: left;" id="prog-r-total">0</td>
            </tr>
        `;

        const btn = document.createElement('button');
        btn.id = 'md-scraper-btn';
        btn.textContent = 'Start Scraping';
        btn.style.cssText = `
            background: #03a5c1; color: white; border: none; padding: 10px; 
            border-radius: 4px; cursor: pointer; font-weight: bold; 
            text-align: center; width: 100%; transition: background 0.2s;
        `;
        btn.onclick = toggleScraping;

        const closeBtn = document.createElement('span');
        closeBtn.textContent = '×';
        closeBtn.style.cssText = `
            position: absolute; top: 8px; right: 10px;
            cursor: pointer; font-size: 18px; color: #999;
            line-height: 1; font-weight: bold;
        `;
        closeBtn.onmouseover = () => closeBtn.style.color = '#333';
        closeBtn.onmouseout = () => closeBtn.style.color = '#999';
        closeBtn.onclick = () => container.remove();

        container.appendChild(closeBtn);
        container.appendChild(title);
        container.appendChild(statusMsg);
        container.appendChild(progressTable);
        container.appendChild(btn);
        document.body.appendChild(container);
    }

    function updateProgress(msg, currentComments, totalComments, currentReplies, totalReplies) {
        const msgEl = document.getElementById('md-scraper-msg');
        const tableEl = document.getElementById('md-scraper-progress');

        if (msgEl) msgEl.textContent = msg;

        if (tableEl && totalComments > 0) {
            tableEl.style.display = 'table';

            document.getElementById('prog-c-curr').textContent = currentComments;
            document.getElementById('prog-c-total').textContent = totalComments;
            document.getElementById('prog-r-curr').textContent = currentReplies;
            document.getElementById('prog-r-total').textContent = totalReplies;
        }
    }

    if (document.readyState === 'complete') {
        createFloatingUI();
    } else {
        window.addEventListener('load', createFloatingUI);
    }

})();