Paper Clip (Save HTML)

Save plain HTML of selection; optimized for printing. Hotkey: Command + Shift + S

目前为 2023-05-10 提交的版本。查看 最新版本

// ==UserScript== 
// @name        Paper Clip (Save HTML)
// @description Save plain HTML of selection; optimized for printing. Hotkey: Command + Shift + S
// @author      Schimon Jehudah, Adv.
// @namespace   i2p.schimon.paperclip
// @homepageURL https://gf.qytechs.cn/en/scripts/465960-paper-clip-save-html
// @supportURL  https://gf.qytechs.cn/en/scripts/465960-paper-clip-save-html/feedback
// @copyright   2023, Schimon Jehudah (http://schimon.i2p)
// @license     MIT; https://opensource.org/licenses/MIT
// @exclude     devtools://*
// @include     *
// @version     23.05.09
// @run-at      document-end
// @icon        
// ==/UserScript==

/* TODO

1) Bookmarklet

2) jsPDF /parallax/jsPDF

*/

// Check whether HTML; otherwise, exit.
//if (!document.contentType == 'text/html')
if (document.doctype == null) return;

var
  originalBackground, originalColor,
  originalDisplay, originalOutline;

const time = new Date();
const namespace = 'org.openuserjs.sjehuda.paperclip';

// FIXME set hotkey
document.onkeyup = function(e) {
  //if (e.ctrlKey && e.shiftKey && e.which == 49) { // Ctrl + Shift + 1
  if (e.metaKey && e.shiftKey && e.which == 83) { // Command + Shift + S
    console.info('Saving selection to HTML.')
    createPage();
  }
};

// event listener
// event "click" and "mouseup" are the most sensible, albeit not accurate
// event "mousemove" is the most manipulative (per user), yet (almost) the most accurate
// event "select" seem to work only inside element input
window.addEventListener('click',event => {
//document.addEventListener('click',event => {
  let selection = document.getSelection();
  let btn = document.getElementById(namespace);
  if (!btn && selection.toString().length) {
    btn = createButton(event.pageX, event.pageY);
    document.body.append(btn);
  } else
  if (btn && !selection.toString().length) {
    btn.remove();
  }
}, {passive: true});

// TODO declare variables once
// NOTE consider "mousedown"
// NOTE consider moving this functionality into function createButton()
window.addEventListener('mousemove',function(){
  let selection = document.getSelection();
  let btn = document.getElementById(namespace);
  if (btn && !selection.toString().length) {
    btn.remove();
  }
});

function createButton(x, y) {
  // create element
  let btn = document.createElement(namespace);
  // set content
  btn.id = namespace;
  btn.textContent = '📎'; // 🖇️ 💾
  // set position
  btn.style.position = 'absolute';
  btn.style.left = x + 5 + 'px';
  btn.style.top = y + 'px';
  // set appearance
  btn.style.fontFamily = 'none'; // emoji
  btn.style.background = 'repeating-linear-gradient(45deg, black, transparent 100px)'; // black, cornflowerblue, grey, rosybrown
  btn.style.border = 'ridge';
  btn.style.borderColor = 'rosybrown';
  btn.style.borderRadius = '50%';
  btn.style.padding = '3px';
  //btn.style.marginTop = '100px';
  //btn.style.marginLeft = '10px';
  btn.style.minWidth = '30px';
  btn.style.minHeight = '30px';
  //btn.style.width = '10px';
  //btn.style.height = '10px';
  btn.style.fontSize = '20px';
  btn.style.zIndex = 10000;
  btn.style.opacity = 0.7;
  btn.onmouseover = () => {
    drawBorder();
    btn.style.opacity = 1;
    };
  btn.onmouseleave = () => { // onmouseout
    resetStyle();
    btn.style.opacity = 0.7;
  };
  // center character
  btn.style.justifyContent = 'center';
  btn.style.alignItems = 'center';
  btn.style.display = 'flex';
  // disable selection marks
  btn.style.outline = 'white'; // none
  btn.style.userSelect = 'none';
  btn.style.cursor = 'default';
  // set button behaviour
  btn.onclick = () => {
    resetStyle();
    createPage();
  };
  return btn;
}

function drawBorder() {
  let sel = getSelectedText();
  originalColor = sel.style.color;
  originalOutline = sel.style.outline;
  originalBackground = sel.style.background;
  // Draw border around input without affecting style, layout or spacing
  // https://overflow.adminforge.de/questions/29990319/draw-border-around-input-without-affecting-style-layout-or-spacing
  //sel.style.outline = '3px solid';
  //sel.style.background = 'lightgoldenrodyellow';
  //sel.style.outline = '3px dashed';
  //sel.style.background = 'rgba(250,250,210,0.3)';
  //sel.style.outline = '3px double darkblue';
  //sel.style.background = 'rgba(210,250,250,0.8)';
  sel.style.outline = '2px double rosybrown';
  //sel.style.background = 'rgba(250,250,210,0.7)';
  sel.style.background = 'rgb(250 250 210)';
  sel.style.color = 'black'; // DarkRed
}

// TODO remove attribute 'style' of first element after 'body'
// FIXME
// http://gothicrichard.synthasite.com/what-i-fond-on-the-net.php
// https://darknetdiaries.com/episode/65/
function resetStyle() {
  let sel = getSelectedText();
  sel.style.color = originalColor;
  sel.style.outline = originalOutline;
  sel.style.background = originalBackground;
}

function createPage() {

  var template, domParser, data, meta;
  template = '<!DOCTYPE html>';
  domParser = new DOMParser();
  data = domParser.parseFromString(template, 'text/html');

  // set title
  if (document.title.length > 0) {
    data.title = document.title;
  }

  // set base
  base = data.createElement('base');
  base.href = data.head.baseURI; // location.href;
  data.head.append(base);

  const metaTag = [
    'url',
    'date',
    'creator',
    'user-agent',
    //'connection-type',
    'content-type-sourced',
    'charset-sourced'
    //'character-count'
    //'word-count'
  ];

  const metaValue = [
    location.href,
    time,
    namespace,
    navigator.userAgent,
    //navigator.connection.effectiveType,
    document.contentType,
    document.charset
  ];

  for (let i = 0; i < metaTag.length; i++) {
    meta = document.createElement('meta');
    meta.name = metaTag[i];
    meta.content = metaValue[i];
    data.head.append(meta);
  }

  const metaData = [
    //'content-type',
    'viewport',
    'description',
    'keywords',
    'generator'
  ];

  for (let i = 0; i < metaData.length; i++) {

    meta = document.createElement('meta');
    meta.name = metaData[i] + '-imported';

       try {
         meta.content = document.querySelector('meta[name="' + metaData[i] + '" i]')
           // .querySelector('meta[http-equiv="' + metaData[i] + '" i]')
           .content;
       }
       catch(err) {
         console.warn(metaData[i] + ': Not found.');
         continue;
       }

    data.head.append(meta);
  }

  data.body.innerHTML = getSelectedText().outerHTML;
  data = listMediaElements(data);
  data = removeAttributes(data);
  data = removeMediaElements(data);
//data = replaceMediaByLinks(data);
  data = correctLinks(data);
  data = removeEmptyElements(data);
  data = removeCommentNodes(data);
  data = new XMLSerializer().serializeToString(data);
//data = formatPage(data);
//data = minify(data);
//data = removeComments(data);
  data = removeMultipleWhiteSpace(data);
  savePage(data,createFilename());

}

function replaceMediaByLinks(data) {
  for (const imgElement of data.querySelectorAll('img')) {
    // Create a new <a> element
    const aElement = data.createElement('a');
    aElement.setAttribute.href = imgElement.src;

    // Copy the attributes and contents of the <img> element to the new <a> element
    for (let i = 0, l = imgElement.attributes.length; i < l; i++) {
      const name = imgElement.attributes.item(i).name;
      const value = imgElement.attributes.item(i).value;
      aElement.setAttribute(name, value);
    }
    aElement.textContent = imgElement.src;

    // Replace the <img> element with the new <a> element
    imgElement.parentNode.replaceChild(aElement, imgElement);
  }
  return data;
}

function listMediaElements(data) {

  const elements = [
    'audio', 'embed', 'img', 'video',
    'frame', 'frameset', 'iframe',
  ];

  for (let i = 0; i < elements.length; i++) {
    for (const element of data.querySelectorAll(elements[i])) {
      const attributes = ['src', 'data-img-url'];
      for (const attribute of attributes) {
        if (element.getAttribute(attribute)) {
          meta = data.createElement('meta');
          meta.name = `extracted-media-${elements[i]}`;
          meta.content = element.getAttribute(attribute);
          data.head.append(meta);
        }
      }
    }
  }
  return data;
}

function removeMediaElements(data) {
  // TODO Remove span and preserve its contents
  // Movespan content to its parent element/node
  // https://overflow.lunar.icu/questions/9848465/js-remove-a-tag-without-deleting-content
  // Remove graphics, media and scripts

  // TODO Replace "iframe" by "a href"

  const elements = [
    'audio', 'embed', 'img', 'video', 'button',
    'form', 'frame', 'frameset', 'iframe', 'textarea',
    'svg', 'input', 'path',
    'script', 'style',
    'select',
  ];

  for (let i = 0; i < elements.length; i++) {
    for (const element of data.querySelectorAll(elements[i])) {
      element.remove();
    }
  }

  return data;
}

// Remove all attributes
function removeAttributes(data) {
  // https://stackoverflow.com/questions/1870441/remove-all-attributes
  const removeAttributes = (element) => {
    for (let i = 0; i < element.attributes.length; i++) {
      if (element.attributes[i].name != 'href' &&
          element.attributes[i].name != 'name' &&
          element.attributes[i].name != 'id') {
        element.removeAttribute(element.attributes[i].name);
      }
    }
  };

  for (const element of data.querySelectorAll('body *')) {
    removeAttributes(element);
  }

  return data;
}

// Correct links for offline usage
function correctLinks(data) {
  for (const element of data.querySelectorAll('a')) {
    //if (element.hash) {
    //if (element.hostname + element.pathname == location.hostname + location.pathname) {
    if (element.href.startsWith(element.baseURI + '#')) {
      element.href = element.hash;
    }
  }
  return data;
}

function removeEmptyElements (data) {
  for (const element of data.body.querySelectorAll('*')) {
    if (/^\s*$/.test(element.outerText)) {
      element.remove();
    }
  }
  return data;
}

function removeCommentNodes(data) {
  const nodeIterator = data.createNodeIterator(
    data,  // Starting node, usually the document body
    NodeFilter.SHOW_ALL,  // NodeFilter to show all node types
    null,  
    false  
  );

  let currentNode;
  // Loop through each node in the node iterator
  while (currentNode = nodeIterator.nextNode()) {
    if (currentNode.nodeName == '#comment') {
      currentNode.remove();
      console.log(currentNode.nodeName);
    }
  }
  return data;
}

function removeComments(str) {
  return str.replace(/<!--[\s\S]*?-->/g, '');
}

function removeMultipleWhiteSpace(str) {
  //return str.replace(/\s+/g, ' ');
  //return str.replace(/(?<!<code>)\s+(?![^<]*<\/code>)/g, " ");
  return str.replace(/(<(code|pre|code-[^\s]+)[^>]*>.*?<\/\2>)|(\s+)/gs, function(match, p1, p2, p3) {
  if (p1) { // if the match is a code block
    return p1; // return the complete code block as is
  } else { // if the match is whitespace outside of a code block
    return " "; // replace with a single space
  }
});
}

// Get parent element of beginning (and end) of selected text
// https://stackoverflow.com/questions/32515175/get-parent-element-of-beginning-and-end-of-selected-text
function getSelectedText() {
  var selection = document.getSelection();
  var selectionBegin = selection.anchorNode.parentNode;
  var selectionEnd = selection.focusNode.parentNode;
  var selectionCommon =
    findFirstCommonAncestor
    (
      selectionBegin,
      selectionEnd
    );
  return selectionCommon;
}

// find common parent
// https://stackoverflow.com/questions/2453742/whats-the-best-way-to-find-the-first-common-parent-of-two-dom-nodes-in-javascri
function findFirstCommonAncestor(nodeA, nodeB) {
  let range = new Range();
  range.setStart(nodeA, 0);
  range.setEnd(nodeB, 0);
  // There's a compilication, if nodeA is positioned after
  // nodeB in the document, we created a collapsed range.
  // That means the start and end of the range are at the
  // same position. In that case `range.commonAncestorContainer`
  // would likely just be `nodeB.parentNode`.
  if(range.collapsed) {
    // The old switcheroo does the trick.
    range.setStart(nodeB, 0);
    range.setEnd(nodeA, 0);
  }
  return range.commonAncestorContainer;
}

// minify html
// /questions/23284784/javascript-minify-html-regex
// TODO Don't apply on code/pre
function minify( s ){
  return s ? s
    .replace(/\>[\r\n ]+\</g, "><")  // Removes new lines and irrelevant spaces which might affect layout, and are better gone
    .replace(/(<.*?>)|\s+/g, (m, $1) => $1 ? $1 : ' ')
    .trim()
    : "";
}

// format html
// /questions/3913355/how-to-format-tidy-beautify-in-javascript
// TODO Don't inset span in code/pre
function formatPage(html) {
  var tab = '\t';
  var result = '';
  var indent= '';

  html.split(/>\s*</).forEach(function(element) {

    if (element.match( /^\/\w/ )) {
        indent = indent.substring(tab.length);
    }

    result += indent + '<' + element + '>\r\n';

    if (element.match( /^<?\w[^>]*[^\/]$/ ) && !element.startsWith("input")  ) { 
      indent += tab;              
    }

  });

  return result.substring(1, result.length-3);

}

function createFilename() {

  let day, now, timestamp, title, filename;

  day = time
    .toISOString()
    .split('T')[0];

  now = [
    time.getHours(),
    time.getMinutes(),
    time.getSeconds()
  ];

  for (let i = 0; i < now.length; i++) { 
    if (now[i] < 10) {now[i] = '0' + now[i];}
  }

  timestamp = [
    day,
    now.join('-')
  ];

/*
  address = [
    location.hostname,
    location.pathname.replace(/\//g,'_')
  ]

  filename =
    address.join('') +
    '_' +
    timestamp.join('_') +
    '.html';
*/

  if (document.title) {
    title = document.title;
  } else {
    title = location.pathname.split('/');
    title = title[title.length-1];
  }

  title = title.replace(/ /g, '_');

  filename =
    title + // TODO replace whitespace by underscore
    '_' +
    timestamp.join('_') +
    '.html';

  return filename.toLowerCase();

}

// export file
// https://stackoverflow.com/questions/4545311/download-a-file-by-jquery-ajax
// https://stackoverflow.com/questions/43135852/javascript-export-to-text-file
var savePage = (function () {
  var a = document.createElement("a");
  // document.body.appendChild(a);
  // a.style = "display: none";
  return function (data, fileName) {
    var blob = new Blob([data], {type: "text/html"}),
        url = window.URL.createObjectURL(blob);
    a.href = url;
    a.download = fileName;
    a.click();
    window.URL.revokeObjectURL(url);
  };
}());

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址