Google Maps Reviews Scraper & Exporter (Enhanced)

Scrapes reviews from Google Maps with improved date handling and photo extraction

您需要先安装一个扩展,例如 篡改猴Greasemonkey暴力猴,之后才能安装此脚本。

您需要先安装一个扩展,例如 篡改猴暴力猴,之后才能安装此脚本。

您需要先安装一个扩展,例如 篡改猴暴力猴,之后才能安装此脚本。

您需要先安装一个扩展,例如 篡改猴Userscripts ,之后才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。

您需要先安装用户脚本管理器扩展后才能安装此脚本。

(我已经安装了用户脚本管理器,让我安装!)

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

(我已经安装了用户样式管理器,让我安装!)

// ==UserScript==
// @name         Google Maps Reviews Scraper & Exporter (Enhanced)
// @namespace    http://tampermonkey.net/
// @version      0.16
// @description  Scrapes reviews from Google Maps with improved date handling and photo extraction
// @author       sharmanhall
// @match        https://www.google.com/maps/place/*
// @icon         https://www.google.com/s2/favicons?sz=64&domain=google.com
// @grant        GM_addStyle
// @license      MIT 
// ==/UserScript==
 
(function() {
    'use strict';
 
    // Add styles for floating panel
    GM_addStyle(`
        #review-scraper-panel {
            position: fixed;
            bottom: 20px;
            right: 20px;
            background-color: #fff;
            border: 1px solid #ccc;
            border-radius: 8px;
            padding: 10px;
            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
            z-index: 1000;
            font-family: Arial, sans-serif;
            min-width: 200px;
        }
        #review-scraper-panel button {
            background-color: #4285f4;
            color: white;
            border: none;
            padding: 8px 12px;
            margin: 5px;
            border-radius: 4px;
            cursor: pointer;
            font-size: 14px;
            width: 100%;
        }
        #review-scraper-panel button:hover {
            background-color: #3367d6;
        }
        #scraper-status {
            margin-top: 10px;
            font-size: 12px;
            color: #666;
        }
        .scraper-progress {
            background-color: #e8f0fe;
            border-radius: 4px;
            padding: 5px;
            margin-top: 5px;
        }
    `);
 
    // Create floating panel
    const panel = document.createElement('div');
    panel.id = 'review-scraper-panel';
    panel.innerHTML = `
        <button id="scrape-reviews">Scrape Reviews</button>
        <button id="copy-to-clipboard">Copy to Clipboard</button>
        <button id="export-csv">Export as CSV</button>
        <div id="scraper-status"></div>
    `;
    document.body.appendChild(panel);
 
    const setStatus = (message) => {
        const statusDiv = document.getElementById('scraper-status');
        statusDiv.innerHTML = `<div class="scraper-progress">${message}</div>`;
    };

    // Enhanced date parsing function
    const parseReviewDate = (dateText) => {
        if (!dateText) return { raw_date: '', estimated_date: '' };
        
        const now = new Date();
        const rawDate = dateText.trim();
        let estimatedDate = '';
        
        try {
            // Handle relative dates
            if (rawDate.includes('ago')) {
                const timeMatch = rawDate.match(/(\d+)\s+(second|minute|hour|day|week|month|year)s?\s+ago/i);
                if (timeMatch) {
                    const [, amount, unit] = timeMatch;
                    const num = parseInt(amount);
                    const date = new Date(now);
                    
                    switch(unit.toLowerCase()) {
                        case 'second': date.setSeconds(date.getSeconds() - num); break;
                        case 'minute': date.setMinutes(date.getMinutes() - num); break;
                        case 'hour': date.setHours(date.getHours() - num); break;
                        case 'day': date.setDate(date.getDate() - num); break;
                        case 'week': date.setDate(date.getDate() - (num * 7)); break;
                        case 'month': date.setMonth(date.getMonth() - num); break;
                        case 'year': date.setFullYear(date.getFullYear() - num); break;
                    }
                    estimatedDate = date.toISOString().split('T')[0];
                }
            } else {
                // Try to parse absolute dates
                const parsedDate = new Date(rawDate);
                if (!isNaN(parsedDate.getTime())) {
                    estimatedDate = parsedDate.toISOString().split('T')[0];
                }
            }
        } catch (e) {
            console.warn('Date parsing error:', e);
        }
        
        return {
            raw_date: rawDate,
            estimated_date: estimatedDate || 'unknown'
        };
    };

    // Extract review photos - Updated based on actual Google Maps HTML structure
    const extractReviewPhotos = (reviewDiv) => {
        const photos = [];
        
        // Look for the specific photo container class from Google Maps
        const photoContainer = reviewDiv.querySelector('.KtCyie');
        if (photoContainer) {
            // Find photo buttons with background images
            const photoButtons = photoContainer.querySelectorAll('button.Tya61d[style*="background-image"]');
            photoButtons.forEach(button => {
                if (button.style.backgroundImage) {
                    const bgMatch = button.style.backgroundImage.match(/url\(["']?(.*?)["']?\)/);
                    if (bgMatch && bgMatch[1]) {
                        let photoUrl = bgMatch[1];
                        
                        // Filter out profile pictures (they contain /a- or /a/)
                        if (!photoUrl.includes('/a-/') && !photoUrl.includes('/a/')) {
                            // Upgrade to higher resolution if possible
                            if (photoUrl.includes('=w') && photoUrl.includes('-h')) {
                                photoUrl = photoUrl.replace(/=w\d+-h\d+([^=]*)?/g, '=w1920-h1080-k-no');
                            }
                            
                            if (!photos.includes(photoUrl)) {
                                photos.push(photoUrl);
                            }
                        }
                    }
                }
            });
        }
        
        // Backup: Look for any other elements with background images containing googleusercontent
        const elementsWithBg = reviewDiv.querySelectorAll('[style*="googleusercontent.com"]');
        elementsWithBg.forEach(element => {
            if (element.style.backgroundImage) {
                const bgMatch = element.style.backgroundImage.match(/url\(["']?(.*?)["']?\)/);
                if (bgMatch && bgMatch[1]) {
                    let photoUrl = bgMatch[1];
                    
                    // Only include review photos, not profile pictures
                    if (photoUrl.includes('googleusercontent.com') && 
                        !photoUrl.includes('/a-/') && 
                        !photoUrl.includes('/a/') &&
                        !photoUrl.includes('avatar') &&
                        !photoUrl.includes('NBa7we')) {  // NBa7we is the profile image class
                        
                        // Upgrade resolution
                        if (photoUrl.includes('=w') && photoUrl.includes('-h')) {
                            photoUrl = photoUrl.replace(/=w\d+-h\d+([^=]*)?/g, '=w1920-h1080-k-no');
                        }
                        
                        if (!photos.includes(photoUrl)) {
                            photos.push(photoUrl);
                        }
                    }
                }
            }
        });
        
        return photos;
    };
 
    const autoScroll = async () => {
        setStatus('Auto-scrolling to load all reviews...');
        
        // Try to find and scroll within the reviews container
        const reviewsContainer = document.querySelector('.m6QErb[data-value="Sort"]')?.parentElement?.parentElement;
        
        if (reviewsContainer) {
            let lastScrollHeight = 0;
            let currentScrollHeight = reviewsContainer.scrollHeight;
            let scrollAttempts = 0;
            
            do {
                lastScrollHeight = currentScrollHeight;
                reviewsContainer.scrollTo(0, currentScrollHeight);
                await new Promise(r => setTimeout(r, 2000));
                currentScrollHeight = reviewsContainer.scrollHeight;
                scrollAttempts++;
                
                setStatus(`Loading reviews... (attempt ${scrollAttempts})`);
                
                // Prevent infinite loops
                if (scrollAttempts > 50) break;
                
            } while(currentScrollHeight > lastScrollHeight);
            
            reviewsContainer.scrollTo(0, 0);
        } else {
            // Fallback to window scrolling
            let lastScrollHeight = 0;
            let currentScrollHeight = document.documentElement.scrollHeight;
            
            do {
                lastScrollHeight = currentScrollHeight;
                window.scrollTo(0, currentScrollHeight);
                await new Promise(r => setTimeout(r, 2000));
                currentScrollHeight = document.documentElement.scrollHeight;
            } while(currentScrollHeight > lastScrollHeight);
            
            window.scrollTo(0, 0);
        }
    };
 
    const expandReviews = async () => {
        setStatus('Expanding truncated reviews...');
        
        // Multiple selectors for "More" buttons
        const moreButtonSelectors = [
            "button.w8nwRe.kyuRq",
            "button[aria-label*='more']",
            "button.review-more-link",
            ".review-full-text button"
        ];
        
        for (const selector of moreButtonSelectors) {
            const moreButtons = document.querySelectorAll(selector);
            for (const button of moreButtons) {
                if (button.textContent.toLowerCase().includes('more') || 
                    button.getAttribute('aria-label')?.toLowerCase().includes('more')) {
                    button.click();
                    await new Promise(r => setTimeout(r, 300));
                }
            }
        }
    };
 
    const scrapeReviews = async () => {
        await autoScroll();
        await expandReviews();
 
        setStatus('Extracting review data...');
        const reviewDivs = document.querySelectorAll("div[data-review-id]");
        const reviews = [];
        const scrapedReviewIds = new Set();
 
        for (let i = 0; i < reviewDivs.length; i++) {
            const reviewDiv = reviewDivs[i];
            const reviewId = reviewDiv.getAttribute("data-review-id");
            
            if (scrapedReviewIds.has(reviewId)) continue;
            
            setStatus(`Processing review ${i + 1} of ${reviewDivs.length}...`);
            
            // Extract date information
            const dateElement = reviewDiv.querySelector("span.rsqaWe");
            const dateInfo = parseReviewDate(dateElement?.textContent);
            
            // Extract photos
            const reviewPhotos = extractReviewPhotos(reviewDiv);
            
            // Extract star rating more reliably
            const starElement = reviewDiv.querySelector("span.kvMYJc[role='img']");
            let starRating = 0;
            if (starElement) {
                const ariaLabel = starElement.getAttribute("aria-label");
                const ratingMatch = ariaLabel?.match(/(\d+)/);
                starRating = ratingMatch ? parseInt(ratingMatch[0]) : 0;
            }
            
            // Extract reviewer information
            const reviewerElement = reviewDiv.querySelector("div.d4r55");
            const reviewerName = reviewerElement?.textContent.trim() || '';
            
            // Get profile image with higher resolution
            const profileImg = reviewDiv.querySelector("img.NBa7we");
            let profileImgUrl = '';
            if (profileImg?.src) {
                profileImgUrl = profileImg.src.replace('=w36-h36-p-rp-mo-br100', '=s200');
            }
            
            const review = {
                reviewer_name: reviewerName,
                profile_img_url: profileImgUrl,
                review_date_raw: dateInfo.raw_date,
                review_date_estimated: dateInfo.estimated_date,
                star_rating: starRating,
                review_url: reviewDiv.querySelector("button[data-href]")?.getAttribute("data-href") || '',
                review_content: reviewDiv.querySelector("span.wiI7pd")?.textContent.trim() || '',
                review_photos: reviewPhotos,
                review_id: reviewId,
                scraped_at: new Date().toISOString()
            };
 
            scrapedReviewIds.add(reviewId);
            reviews.push(review);
        }
 
        setStatus(`✅ Successfully scraped ${reviews.length} reviews with enhanced data!`);
        window.scrapedReviews = reviews; // Store globally for other functions
        return reviews;
    };
 
    const copyToClipboard = async () => {
        const reviews = window.scrapedReviews || await scrapeReviews();
        const contentToCopy = JSON.stringify(reviews, null, 2);
        
        try {
            await navigator.clipboard.writeText(contentToCopy);
            setStatus(`✅ ${reviews.length} reviews copied to clipboard as JSON!`);
        } catch (err) {
            setStatus('❌ Error copying to clipboard. Check console.');
            console.error("Could not copy content to clipboard: ", err);
        }
    };

    const exportAsCSV = async () => {
        const reviews = window.scrapedReviews || await scrapeReviews();
        
        // CSV headers
        const headers = [
            'reviewer_name',
            'profile_img_url', 
            'review_date_raw',
            'review_date_estimated',
            'star_rating',
            'review_url',
            'review_content',
            'review_photos',
            'review_id',
            'scraped_at'
        ];
        
        // Convert to CSV
        const csvContent = [
            headers.join(','),
            ...reviews.map(review => 
                headers.map(header => {
                    let value = review[header];
                    if (Array.isArray(value)) {
                        value = value.join('; ');
                    }
                    // Escape quotes and wrap in quotes if contains comma or quote
                    value = String(value).replace(/"/g, '""');
                    if (value.includes(',') || value.includes('"') || value.includes('\n')) {
                        value = `"${value}"`;
                    }
                    return value;
                }).join(',')
            )
        ].join('\n');
        
        // Download CSV file
        const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
        const link = document.createElement('a');
        const url = URL.createObjectURL(blob);
        link.setAttribute('href', url);
        link.setAttribute('download', `google_maps_reviews_${new Date().toISOString().split('T')[0]}.csv`);
        link.style.visibility = 'hidden';
        document.body.appendChild(link);
        link.click();
        document.body.removeChild(link);
        
        setStatus(`✅ CSV file downloaded with ${reviews.length} reviews!`);
    };
 
    // Event listeners
    document.getElementById('scrape-reviews').addEventListener('click', scrapeReviews);
    document.getElementById('copy-to-clipboard').addEventListener('click', copyToClipboard);
    document.getElementById('export-csv').addEventListener('click', exportAsCSV);
    
    // Initialize
    setStatus('Ready to scrape reviews! 🚀');
})();