Paste html: Clean html from Google Docs (#2476)
This commit is contained in:
parent
b9430f0bda
commit
b1f92f4126
@ -5,9 +5,10 @@ import type { ApiAttachment, ApiFormattedText, ApiMessage } from '../../../../ap
|
||||
import { ApiMessageEntityTypes } from '../../../../api/types';
|
||||
|
||||
import buildAttachment from '../helpers/buildAttachment';
|
||||
import { EDITABLE_INPUT_ID, EDITABLE_INPUT_MODAL_ID } from '../../../../config';
|
||||
import { DEBUG, EDITABLE_INPUT_ID, EDITABLE_INPUT_MODAL_ID } from '../../../../config';
|
||||
import getFilesFromDataTransferItems from '../helpers/getFilesFromDataTransferItems';
|
||||
import parseMessageInput, { ENTITY_CLASS_BY_NODE_NAME } from '../../../../util/parseMessageInput';
|
||||
import cleanDocsHtml from '../../../../lib/cleanDocsHtml';
|
||||
import { containsCustomEmoji, stripCustomEmoji } from '../../../../global/helpers/symbols';
|
||||
|
||||
const MAX_MESSAGE_LENGTH = 4096;
|
||||
@ -16,6 +17,14 @@ const STYLE_TAG_REGEX = /<style>(.*?)<\/style>/gs;
|
||||
|
||||
function preparePastedHtml(html: string) {
|
||||
let fragment = document.createElement('div');
|
||||
try {
|
||||
html = cleanDocsHtml(html);
|
||||
} catch (err) {
|
||||
if (DEBUG) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.error(err);
|
||||
}
|
||||
}
|
||||
fragment.innerHTML = html.replace(/\u00a0/g, ' ').replace(STYLE_TAG_REGEX, ''); // Strip   and styles
|
||||
|
||||
const textContents = fragment.querySelectorAll<HTMLDivElement>('.text-content');
|
||||
|
||||
183
src/lib/cleanDocsHtml.ts
Normal file
183
src/lib/cleanDocsHtml.ts
Normal file
@ -0,0 +1,183 @@
|
||||
// Utility for cleaning html code from Google Docs.
|
||||
// Original source from DocsSoap:
|
||||
// https://www.npmjs.com/package/docs-soap
|
||||
|
||||
const GDOCS_ELEMENT_ID_REGEXP = /id="docs-internal-guid/i;
|
||||
|
||||
const GDOCS_STYLES = {
|
||||
BOLD: '700',
|
||||
ITALIC: 'italic',
|
||||
UNDERLINE: 'underline',
|
||||
STRIKETHROUGH: 'line-through',
|
||||
SUPERSCRIPT: 'super',
|
||||
SUBSCRIPT: 'sub',
|
||||
};
|
||||
|
||||
const ELEMENTS = {
|
||||
ANCHOR: 'a',
|
||||
BOLD: 'strong',
|
||||
ITALIC: 'em',
|
||||
UNDERLINE: 'u',
|
||||
STRIKETHROUGH: 'del',
|
||||
SUPERSCRIPT: 'sup',
|
||||
SUBSCRIPT: 'sub',
|
||||
};
|
||||
|
||||
const headers = [
|
||||
'H1',
|
||||
'H2',
|
||||
'H3',
|
||||
'H4',
|
||||
'H5',
|
||||
'H6',
|
||||
];
|
||||
|
||||
function parseHtml(html: string): HTMLElement {
|
||||
const parser = new DOMParser();
|
||||
const doc = parser.parseFromString(html, 'text/html');
|
||||
|
||||
return doc.body;
|
||||
}
|
||||
|
||||
function wrapNodeAnchor(node: Node, href: string): HTMLAnchorElement {
|
||||
const anchor = document.createElement(ELEMENTS.ANCHOR) as HTMLAnchorElement;
|
||||
anchor.href = href;
|
||||
anchor.appendChild(node.cloneNode(true));
|
||||
|
||||
return anchor;
|
||||
}
|
||||
|
||||
function wrapNodeInline(node: Node, style: string): Node {
|
||||
const el = document.createElement(style);
|
||||
el.appendChild(node.cloneNode(true));
|
||||
|
||||
return el;
|
||||
}
|
||||
|
||||
function wrapNode(inner: HTMLElement, result: Node): Node {
|
||||
let newNode = result.cloneNode(true);
|
||||
if (!inner) {
|
||||
return newNode;
|
||||
}
|
||||
if (inner.style && inner.style.fontWeight === GDOCS_STYLES.BOLD) {
|
||||
newNode = wrapNodeInline(newNode, ELEMENTS.BOLD);
|
||||
}
|
||||
if (inner.style && inner.style.fontStyle === GDOCS_STYLES.ITALIC) {
|
||||
newNode = wrapNodeInline(newNode, ELEMENTS.ITALIC);
|
||||
}
|
||||
if (inner.style && inner.style.textDecoration === GDOCS_STYLES.UNDERLINE) {
|
||||
newNode = wrapNodeInline(newNode, ELEMENTS.UNDERLINE);
|
||||
}
|
||||
if (inner.style && inner.style.textDecoration === GDOCS_STYLES.STRIKETHROUGH) {
|
||||
newNode = wrapNodeInline(newNode, ELEMENTS.STRIKETHROUGH);
|
||||
}
|
||||
if (inner.style && inner.style.verticalAlign === GDOCS_STYLES.SUPERSCRIPT) {
|
||||
newNode = wrapNodeInline(newNode, ELEMENTS.SUPERSCRIPT);
|
||||
}
|
||||
if (inner.style && inner.style.verticalAlign === GDOCS_STYLES.SUBSCRIPT) {
|
||||
newNode = wrapNodeInline(newNode, ELEMENTS.SUBSCRIPT);
|
||||
}
|
||||
|
||||
return newNode;
|
||||
}
|
||||
|
||||
function applyBlockStyles(dirty: Node): Node {
|
||||
const node = dirty.cloneNode(true);
|
||||
let newNode = document.createTextNode(node.textContent || '') as Node;
|
||||
let styledNode = document.createTextNode('') as Node;
|
||||
if ('style' in node.childNodes[0] && Boolean(node.childNodes[0].style)) {
|
||||
styledNode = node.childNodes[0];
|
||||
}
|
||||
if (node.childNodes[0] && node.childNodes[0].nodeName === 'A') {
|
||||
newNode = wrapNodeAnchor(newNode.cloneNode(true), (node.childNodes[0] as HTMLAnchorElement).href);
|
||||
styledNode = node.childNodes[0].childNodes[0];
|
||||
}
|
||||
newNode = wrapNode(styledNode as HTMLElement, newNode);
|
||||
return newNode;
|
||||
}
|
||||
|
||||
function applyInlineStyles(dirty: Node): Node {
|
||||
const node = dirty.cloneNode(true);
|
||||
let newNode = document.createTextNode(node.textContent || '') as Node;
|
||||
let styledNode = node;
|
||||
if (node.nodeName === 'A') {
|
||||
newNode = wrapNodeAnchor(newNode, (node as HTMLAnchorElement).href);
|
||||
if ('style' in node.childNodes[0] && Boolean(node.childNodes[0].style)) {
|
||||
styledNode = node.childNodes[0];
|
||||
}
|
||||
}
|
||||
newNode = wrapNode(styledNode as HTMLElement, newNode);
|
||||
return newNode;
|
||||
}
|
||||
|
||||
function getCleanNode(node: Node): Node[] {
|
||||
if (node.childNodes && (node.childNodes.length <= 1 || node.nodeName === 'OL' || node.nodeName === 'UL')) {
|
||||
let newWrapper: Node | undefined;
|
||||
let newNode = document.createTextNode(node.textContent || '') as Node;
|
||||
if (node.nodeName === 'UL' || node.nodeName === 'OL' || node.nodeName === 'LI') {
|
||||
newWrapper = document.createElement(node.nodeName);
|
||||
newNode = document.createDocumentFragment();
|
||||
const items = [];
|
||||
for (let i = 0; i < node.childNodes.length; i++) {
|
||||
items.push(...getCleanNode(node.childNodes[i]));
|
||||
}
|
||||
items.map((i: Node): Node => newNode.appendChild(i));
|
||||
} else if (headers.indexOf(node.nodeName) !== -1) {
|
||||
newWrapper = document.createElement(node.nodeName);
|
||||
newNode = applyInlineStyles(node.childNodes[0]);
|
||||
} else if (node.nodeName === 'P') {
|
||||
newWrapper = document.createElement('p');
|
||||
newNode = applyBlockStyles(node);
|
||||
} else if (node.nodeName === 'BR') {
|
||||
newNode = node;
|
||||
} else {
|
||||
newWrapper = document.createElement('span');
|
||||
newNode = applyInlineStyles(node);
|
||||
}
|
||||
if (newWrapper) {
|
||||
newWrapper.appendChild(newNode);
|
||||
return [newWrapper];
|
||||
}
|
||||
|
||||
return [node.cloneNode(true)];
|
||||
}
|
||||
|
||||
if (node.childNodes) {
|
||||
const nodes = [];
|
||||
for (let i = 0; i < node.childNodes.length; i++) {
|
||||
nodes.push(...getCleanNode(node.childNodes[i]));
|
||||
}
|
||||
return nodes;
|
||||
}
|
||||
|
||||
return [node];
|
||||
}
|
||||
|
||||
function filterNode(node: Node): boolean {
|
||||
return node.nodeType !== 8; // Node.COMMENT_NODE = 8
|
||||
}
|
||||
|
||||
function getCleanDocument(dirty: HTMLElement): HTMLElement {
|
||||
const body = document.createElement('body');
|
||||
const nodes = dirty.childNodes;
|
||||
const filteredNodes = Array.from(nodes).filter(filterNode);
|
||||
const cleanNodes = [];
|
||||
|
||||
for (const node of filteredNodes) {
|
||||
cleanNodes.push(...getCleanNode(node));
|
||||
}
|
||||
|
||||
for (let i = 0; i < cleanNodes.length; i++) {
|
||||
body.appendChild(cleanNodes[i].cloneNode(true));
|
||||
}
|
||||
|
||||
return body;
|
||||
}
|
||||
|
||||
export default function cleanDocsHtml(clipboardContent: string): string {
|
||||
if (!clipboardContent.match(GDOCS_ELEMENT_ID_REGEXP)) {
|
||||
return parseHtml(clipboardContent.replace(/(\r\n|\n|\r)/, '')).innerHTML;
|
||||
}
|
||||
|
||||
return getCleanDocument(parseHtml(clipboardContent.replace(/(\r\n|\n|\r)/, ''))).innerHTML;
|
||||
}
|
||||
@ -8,6 +8,7 @@ export const ENTITY_CLASS_BY_NODE_NAME: Record<string, ApiMessageEntityTypes> =
|
||||
STRONG: ApiMessageEntityTypes.Bold,
|
||||
I: ApiMessageEntityTypes.Italic,
|
||||
EM: ApiMessageEntityTypes.Italic,
|
||||
INS: ApiMessageEntityTypes.Underline,
|
||||
U: ApiMessageEntityTypes.Underline,
|
||||
S: ApiMessageEntityTypes.Strike,
|
||||
STRIKE: ApiMessageEntityTypes.Strike,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user