douyin-user-data-download

下载抖音用户主页数据!

目前為 2024-06-13 提交的版本,檢視 最新版本

// ==UserScript==
// @name         douyin-user-data-download
// @namespace    http://tampermonkey.net/
// @version      0.3.4
// @description  下载抖音用户主页数据!
// @author       xxmdmst
// @match        https://www.douyin.com/*
// @icon         https://xxmdmst.oss-cn-beijing.aliyuncs.com/imgs/favicon.ico
// @grant        none
// @require      https://cdnjs.cloudflare.com/ajax/libs/jszip/3.6.0/jszip.min.js
// @license MIT
// ==/UserScript==

(function () {
    let table;

    function initGbkTable() {
        // https://en.wikipedia.org/wiki/GBK_(character_encoding)#Encoding
        const ranges = [
            [0xA1, 0xA9, 0xA1, 0xFE],
            [0xB0, 0xF7, 0xA1, 0xFE],
            [0x81, 0xA0, 0x40, 0xFE],
            [0xAA, 0xFE, 0x40, 0xA0],
            [0xA8, 0xA9, 0x40, 0xA0],
            [0xAA, 0xAF, 0xA1, 0xFE],
            [0xF8, 0xFE, 0xA1, 0xFE],
            [0xA1, 0xA7, 0x40, 0xA0],
        ];
        const codes = new Uint16Array(23940);
        let i = 0;

        for (const [b1Begin, b1End, b2Begin, b2End] of ranges) {
            for (let b2 = b2Begin; b2 <= b2End; b2++) {
                if (b2 !== 0x7F) {
                    for (let b1 = b1Begin; b1 <= b1End; b1++) {
                        codes[i++] = b2 << 8 | b1
                    }
                }
            }
        }
        table = new Uint16Array(65536);
        table.fill(0xFFFF);
        const str = new TextDecoder('gbk').decode(codes);
        for (let i = 0; i < str.length; i++) {
            table[str.charCodeAt(i)] = codes[i]
        }
    }

    function str2gbk(str, opt = {}) {
        if (!table) {
            initGbkTable()
        }
        const NodeJsBufAlloc = typeof Buffer === 'function' && Buffer.allocUnsafe;
        const defaultOnAlloc = NodeJsBufAlloc
            ? (len) => NodeJsBufAlloc(len)
            : (len) => new Uint8Array(len);
        const defaultOnError = () => 63;
        const onAlloc = opt.onAlloc || defaultOnAlloc;
        const onError = opt.onError || defaultOnError;

        const buf = onAlloc(str.length * 2);
        let n = 0;

        for (let i = 0; i < str.length; i++) {
            const code = str.charCodeAt(i);
            if (code < 0x80) {
                buf[n++] = code;
                continue
            }
            const gbk = table[code];

            if (gbk !== 0xFFFF) {
                buf[n++] = gbk;
                buf[n++] = gbk >> 8
            } else if (code === 8364) {
                buf[n++] = 0x80
            } else {
                const ret = onError(i, str);
                if (ret === -1) {
                    break
                }
                if (ret > 0xFF) {
                    buf[n++] = ret;
                    buf[n++] = ret >> 8
                } else {
                    buf[n++] = ret
                }
            }
        }
        return buf.subarray(0, n)
    }

    let aweme_list = [];
    let userKey = [
        "昵称", "关注", "粉丝", "获赞",
        "抖音号", "IP属地", "性别",
        "位置", "签名", "作品数", "主页"
    ];
    let userData = [];
    let timer;

    function copyUserData(node) {
        if (!userData) {
            alert("未抓取到用户数据!");
            return;
        }
        let text = [];
        for (let i = 0; i < userKey.length; i++) {
            let key = userKey[i];
            let value = userData[i];
            text.push(key + ":" + value);
        }
        navigator.clipboard.writeText(text.join("\n")).then(r => {
            node.textContent = "复制成功";
        }).catch((e) => {
            node.textContent = "复制失败";
        })
        setTimeout(() => node.textContent = '', 2000);
    }

    function createVideoButton(text, top, func) {
        const button = document.createElement("button");
        button.textContent = text;
        button.style.position = "absolute";
        button.style.right = "0px";
        button.style.top = top;
        button.style.opacity = "0.5";
        button.addEventListener("click", func);
        return button;
    }

    function openLink(url) {
        const link = document.createElement('a');
        link.href = url;
        link.target = "_blank";
        document.body.appendChild(link);
        link.click();
        document.body.removeChild(link);
    }

    function createEachButton() {
        let targetNodes = document.querySelectorAll("div[data-e2e='user-post-list'] > ul[data-e2e='scroll-list'] > li a");
        for (let i = 0; i < targetNodes.length; i++) {
            let targetNode = targetNodes[i];
            if (targetNode.dataset.added) {
                continue;
            }
            targetNode.appendChild(createVideoButton("打开视频源", "0px", (event) => {
                event.preventDefault();
                event.stopPropagation();
                openLink(aweme_list[i].url);
            }));
            let downloadVideoButton = createVideoButton("下载视频", "21px", (event) => {
                event.preventDefault();
                event.stopPropagation();
                let xhr = new XMLHttpRequest();
                xhr.open('GET', aweme_list[i].url.replace("http://", "https://"), true);
                xhr.responseType = 'blob';
                xhr.onload = (e) => {
                    let a = document.createElement('a');
                    a.href = window.URL.createObjectURL(xhr.response);
                    a.download = (aweme_list[i].desc ? aweme_list[i].desc.slice(0, 20).replace(/[\/:*?"<>|\s]/g, "") : aweme_list[i].awemeId) + (aweme_list[i].images ? ".mp3" : ".mp4");
                    a.click()
                };
                xhr.onprogress = (event) => {
                    if (event.lengthComputable) {
                        downloadVideoButton.textContent = "下载" + (event.loaded * 100 / event.total).toFixed(1) + '%';
                    }
                };
                xhr.send();
            });
            targetNode.appendChild(downloadVideoButton);
            if (aweme_list[i].images) {
                targetNode.appendChild(createVideoButton("图片打包下载", "42px", (event) => {
                    event.preventDefault();
                    event.stopPropagation();
                    const zip = new JSZip();
                    console.log(aweme_list[i].images);
                    downloadVideoButton.textContent = "下载并打包中...";
                    const promises = aweme_list[i].images.map((link, index) => {
                        return fetch(link)
                            .then((response) => response.arrayBuffer())
                            .then((buffer) => {
                                zip.file(`image_${index + 1}.jpg`, buffer);
                            });
                    });
                    Promise.all(promises)
                        .then(() => {
                            return zip.generateAsync({type: "blob"});
                        })
                        .then((content) => {
                            const link = document.createElement("a");
                            link.href = URL.createObjectURL(content);
                            link.download = (aweme_list[i].desc ? aweme_list[i].desc.slice(0, 20).replace(/[\/:*?"<>|\s]/g, "") : aweme_list[i].awemeId) + ".zip";
                            link.click();
                            downloadVideoButton.textContent = "图片打包完成";
                        });
                }));
            }
            targetNode.dataset.added = true;
        }
    }

    function flush() {
        if (timer !== undefined) {
            clearTimeout(timer);
            timer = undefined;
        }
        timer = setTimeout(createEachButton, 500);
        data_button.p2.textContent = `${aweme_list.length}`;
        let img_num = aweme_list.filter(a => a.images).length;
        dimg_button.p2.textContent = `${img_num}`;
        msg_pre.textContent = `已加载${aweme_list.length}个作品,${img_num}个图文\n激活上方头像可展开下载按钮`;
    }

    let flag = false;

    function interceptResponse() {
        const originalSend = XMLHttpRequest.prototype.send;
        XMLHttpRequest.prototype.send = function () {
            const self = this;
            this.onreadystatechange = function () {
                if (self.readyState === 4 && self._url) {
                    if (self._url.indexOf("/aweme/v1/web/aweme/post") > -1) {
                        var json = JSON.parse(self.response);
                        let post_data = json.aweme_list.map(item => Object.assign(
                            {
                                "awemeId": item.aweme_id,
                                "desc": item.desc.replace(/[^\x00-\x7F\u4E00-\u9FFF\uFF00-\uFFEF]+/g, " ").trim()
                            },
                            {
                                "diggCount": item.statistics.digg_count,
                                "commentCount": item.statistics.comment_count,
                                "collectCount": item.statistics.collect_count,
                                "shareCount": item.statistics.share_count
                            },
                            {
                                "date": new Date(item.create_time * 1000).toLocaleString(),
                                "url": item.video.play_addr.url_list[0]
                            },
                            {
                                "images": item.images ? item.images.map(row => row.url_list.pop()) : null
                            }
                        ));
                        aweme_list.push(...post_data);
                        if (domLoadedTimer === null) {
                            flush();
                        } else {
                            flag = true;
                        }
                    } else if (self._url.indexOf("/aweme/v1/web/user/profile/other") > -1) {
                        let userInfo = JSON.parse(self.response).user;
                        userInfo.city = userInfo.city ? userInfo.city : '';
                        userInfo.district = userInfo.district ? userInfo.district : '';
                        userData.push(
                            userInfo.nickname, userInfo.following_count, userInfo.mplatform_followers_count,
                            userInfo.total_favorited, '\t' + (userInfo.unique_id ? userInfo.unique_id : userInfo.short_id), userInfo.ip_location, userInfo.gender === 2 ? "女" : "男",
                            `${userInfo.city}·${userInfo.district}`, '"' + (userInfo.signature ? userInfo.signature : '') + '"', userInfo.aweme_count, "https://www.douyin.com/user/" + userInfo.sec_uid
                        );
                    }
                }
            };
            originalSend.apply(this, arguments);
        };
    }

    function txt2file(txt, filename) {
        const blob = new Blob([txt], {type: 'text/plain'});
        const url = URL.createObjectURL(blob);
        const link = document.createElement('a');
        link.href = url;
        link.download = filename.slice(0, 20).replace(/[\/:*?"<>|\s]/g, "");
        document.body.appendChild(link);
        link.click();
        document.body.removeChild(link);
        URL.revokeObjectURL(url);
    }

    function downloadData(encoding) {
        let text = userKey.join(",") + "\n" + userData.join(",") + "\n\n";
        text += "作品描述,点赞数,评论数,收藏数,分享数,发布时间,下载链接\n";
        aweme_list.forEach(item => {
            text += ['"' + item.desc + '"', item.diggCount, item.commentCount,
                item.collectCount, item.shareCount, item.date, item.url].join(",") + "\n"
        });
        if (encoding === "gbk") {
            text = str2gbk(text);
        }
        txt2file(text, userData[0] + ".csv");
    }

    let dimg_button, data_button, scroll_button, msg_pre;

    function createMsgBox() {
        msg_pre = document.createElement('pre');
        msg_pre.textContent = '等待上方头像加载完毕';
        msg_pre.style.color = 'white';
        msg_pre.style.position = 'fixed';
        msg_pre.style.right = '5px';
        msg_pre.style.top = '60px';
        msg_pre.style.color = 'white';
        msg_pre.style.zIndex = '90000';
        msg_pre.style.opacity = "0.5";
        document.body.appendChild(msg_pre);
    }

    function createAllButton() {
        let dom = document.querySelector("#douyin-header-menuCt pace-island > div > div:nth-last-child(1) ul a:nth-last-child(1)");
        let baseNode = dom.cloneNode(true);
        baseNode.removeAttribute("target");
        baseNode.removeAttribute("rel");
        baseNode.removeAttribute("href");
        let svgChild = baseNode.querySelector("svg");
        if (svgChild) baseNode.removeChild(svgChild);

        dimg_button = baseNode.cloneNode(true);
        dimg_button.p1 = dimg_button.querySelector("p:nth-child(1)");
        dimg_button.p2 = dimg_button.querySelector("p:nth-child(2)");
        dimg_button.p1.textContent = "图文打包下载";
        dimg_button.p2.textContent = "0";
        dom.after(dimg_button);
        dimg_button.addEventListener('click', downloadImg);

        data_button = baseNode.cloneNode(true);
        data_button.p1 = data_button.querySelector("p:nth-child(1)");
        data_button.p2 = data_button.querySelector("p:nth-child(2)");
        data_button.p1.textContent = "下载已加载的数据";
        data_button.p2.textContent = "0";
        const label = document.createElement('label');
        label.setAttribute('for', 'gbk');
        label.innerText = 'gbk';
        data_button.p1.after(label);
        const checkbox = document.createElement('input');
        checkbox.setAttribute('type', 'checkbox');
        checkbox.setAttribute('id', 'gbk');
        data_button.p1.after(checkbox);
        dom.after(data_button);
        let stopPropagation = (event) => event.stopPropagation()
        label.addEventListener('click', stopPropagation);
        checkbox.addEventListener('click', stopPropagation);
        data_button.addEventListener('click', () => downloadData(checkbox.checked ? "gbk" : "utf-8"));

        scroll_button = baseNode.cloneNode(true);
        scroll_button.p1 = scroll_button.querySelector("p:nth-child(1)");
        scroll_button.p2 = scroll_button.querySelector("p:nth-child(2)");
        scroll_button.p1.textContent = "开启自动下拉到底";
        scroll_button.p2.textContent = "";
        dom.after(scroll_button);

        let copyUserData_button = baseNode.cloneNode(true);
        copyUserData_button.p1 = copyUserData_button.querySelector("p:nth-child(1)");
        copyUserData_button.p2 = copyUserData_button.querySelector("p:nth-child(2)");
        copyUserData_button.p1.textContent = "复制作者信息";
        copyUserData_button.p2.textContent = "";
        dom.after(copyUserData_button);
        copyUserData_button.addEventListener('click', () => copyUserData(copyUserData_button.p2));
    }

    function scrollPageToBottom() {
        let scrollInterval;

        function scrollLoop() {
            let endText = document.querySelector("div[data-e2e='user-post-list'] > ul[data-e2e='scroll-list'] + div div").innerText;
            if (aweme_list.length < userData[9] && !endText) {
                scrollTo(0, document.body.scrollHeight);
            } else {
                clearInterval(scrollInterval);
                scrollInterval = null;
                scroll_button.p1.textContent = "已加载全部!";
            }
        }

        scroll_button.addEventListener('click', () => {
            if (!scrollInterval) {
                scrollInterval = setInterval(scrollLoop, 1200);
                scroll_button.p1.textContent = "停止自动下拉";
            } else {
                clearInterval(scrollInterval);
                scrollInterval = null;
                scroll_button.p1.textContent = "开启自动下拉";
            }
        });
    }

    async function downloadImg() {
        const zip = new JSZip();
        let flag = true;
        for (let [index, aweme] of aweme_list.filter(a => a.images).entries()) {
            msg_pre.textContent = `${index + 1}.${aweme.desc.slice(0, 20)}...`;
            let folder = zip.folder((index + 1) + "." + (aweme.desc ? aweme.desc.replace(/[\/:*?"<>|\s]/g, "").slice(0, 20).replace(/[.\d]+$/g, "") : aweme.awemeId));
            await Promise.all(aweme.images.map((link, index) => {
                return fetch(link)
                    .then((res) => res.arrayBuffer())
                    .then((buffer) => {
                        folder.file(`image_${index + 1}.jpg`, buffer);
                    });
            }));
            flag = false;
        }
        if (flag) {
            alert("当前页面未发现图文链接");
            return
        }
        msg_pre.textContent = "图片打包中...";
        zip.generateAsync({type: "blob"})
            .then((content) => {
                const link = document.createElement("a");
                link.href = URL.createObjectURL(content);
                link.download = userData[0].slice(0, 20).replace(/[\/:*?"<>|\s]/g, "") + ".zip";
                link.click();
                msg_pre.textContent = "图片打包完成";
            });
    }

    function douyinVideoDownloader() {
        function run() {
            let downloadOption = [{name: '打开视频源', id: 'toLink'}];
            let videoElements = document.querySelectorAll('video');
            if (videoElements.length === 0) return;
            //把自动播放的video标签选择出来
            let playVideoElements = [];
            videoElements.forEach(function (element) {
                let autoplay = element.getAttribute('autoplay');
                if (autoplay !== null) {
                    playVideoElements.push(element);
                }
            })
            let videoContainer = location.href.indexOf('modal_id') !== -1
                ? playVideoElements[0]
                : playVideoElements[playVideoElements.length - 1];
            if (!videoContainer) return;
            //获取视频播放地址
            let url = videoContainer && videoContainer.children.length > 0 && videoContainer.children[0].src
                ? videoContainer.children[0].src
                : videoContainer.src;
            //获取视频ID,配合自定义id使用
            let videoId;
            let resp = url.match(/^(https:)?\/\/.+\.com\/([a-zA-Z0-9]+)\/[a-zA-Z0-9]+\/video/);
            let res = url.match(/blob:https:\/\/www.douyin.com\/(.*)/);
            if (resp && resp[2]) {
                videoId = resp[2];
            } else if (res && res[1]) {
                videoId = res[1]
            } else {
                videoId = videoContainer.getAttribute('data-xgplayerid')
            }
            let playContainer = videoContainer.parentNode.parentNode.querySelector('.xg-right-grid');
            if (!playContainer) return;
            //在对主页就行视频浏览时会出现多个按钮,删除不需要的,只保留当前对应的
            let videoDownloadDom = playContainer.querySelector('#scriptVideoDownload' + videoId);
            if (videoDownloadDom) {
                let dom = playContainer.querySelectorAll('.xgplayer-playclarity-setting');
                dom.forEach(function (d) {
                    let btn = d.querySelector('.btn');
                    if (d.id !== 'scriptVideoDownload' + videoId && btn.innerText === '下载') {
                        d.parentNode.removeChild(d);
                    }
                });
                return;
            }
            if (videoContainer && playContainer) {
                let playClarityDom = playContainer.querySelector('.xgplayer-playclarity-setting');
                if (!playClarityDom) return;

                let palyClarityBtn = playClarityDom.querySelector('.btn');
                if (!palyClarityBtn) return;

                let downloadDom = playClarityDom.cloneNode(true);
                downloadDom.setAttribute('id', 'scriptVideoDownload' + videoId);

                if (location.href.indexOf('search') === -1) {
                    downloadDom.style = 'margin-top:-68px;padding-top:100px;padding-left:20px;padding-right:20px;';
                } else {
                    downloadDom.style = 'margin-top:0px;padding-top:100px;';
                }

                let downloadText = downloadDom.querySelector('.btn');
                downloadText.innerText = '下载';
                downloadText.style = 'font-size:14px;font-weight:600;';
                downloadText.setAttribute('id', 'zhmDouyinDownload' + videoId);
                let detail = playContainer.querySelector('xg-icon:nth-of-type(1)').children[0];
                let linkUrl = detail.getAttribute('href') ? detail.getAttribute('href') : location.href;

                if (linkUrl.indexOf('www.douyin.com') === -1) {
                    linkUrl = '//www.douyin.com' + linkUrl;
                }

                downloadText.setAttribute('data-url', linkUrl);
                downloadText.removeAttribute('target');
                downloadText.setAttribute('href', 'javascript:void(0);');

                let virtualDom = downloadDom.querySelector('.virtual');
                downloadDom.onmouseover = function () {
                    if (location.href.indexOf('search') === -1) {
                        virtualDom.style = 'display:block !important';
                    } else {
                        virtualDom.style = 'display:block !important;margin-bottom:37px;';
                    }
                }

                downloadDom.onmouseout = function () {
                    virtualDom.style = 'display:none !important';
                }

                let downloadHtml = '';
                downloadOption.forEach(function (item) {
                    if (item.id === "toLink") {
                        downloadHtml += `<div style="text-align:center;" class="item ${item.id}" id="${item.id}${videoId}">${item.name}</div>`;
                    }
                })
                if (downloadDom.querySelector('.virtual')) {
                    downloadDom.querySelector('.virtual').innerHTML = downloadHtml;
                }
                playClarityDom.after(downloadDom);
                //直接打开
                let toLinkDom = playContainer.querySelector('#toLink' + videoId);
                if (toLinkDom) {
                    toLinkDom.addEventListener('click', function () {
                        if (url.match(/^blob/)) {
                            alert("加密视频地址,无法直接打开");
                        } else {
                            window.open(url);
                        }
                    })
                }
            }
        }

        setInterval(run, 500);
    }

    if (document.title === "验证码中间页") {
        return
    }
    createMsgBox();
    interceptResponse();
    let domLoadedTimer;
    const checkElementLoaded = () => {
        const element = document.querySelector('#douyin-header-menuCt pace-island > div > div:nth-last-child(1) ul a');
        if (element) {
            console.log('Node has been loaded.');
            clearInterval(domLoadedTimer);
            domLoadedTimer = null;
            createAllButton();
            douyinVideoDownloader();
            scrollPageToBottom();
            if (flag) flush();
        }
    };
    window.onload = () => {
        domLoadedTimer = setInterval(checkElementLoaded, 500);
    }
})();

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址