Paste html: Clean html from Google Docs (#2476)
This commit is contained in:
parent
b9430f0bda
commit
b1f92f4126
@ -5,9 +5,10 @@ import type { ApiAttachment, ApiFormattedText, ApiMessage } from '../../../../ap
|
|||||||
import { ApiMessageEntityTypes } from '../../../../api/types';
|
import { ApiMessageEntityTypes } from '../../../../api/types';
|
||||||
|
|
||||||
import buildAttachment from '../helpers/buildAttachment';
|
import buildAttachment from '../helpers/buildAttachment';
|
||||||
import { EDITABLE_INPUT_ID, EDITABLE_INPUT_MODAL_ID } from '../../../../config';
|
import { DEBUG, EDITABLE_INPUT_ID, EDITABLE_INPUT_MODAL_ID } from '../../../../config';
|
||||||
import getFilesFromDataTransferItems from '../helpers/getFilesFromDataTransferItems';
|
import getFilesFromDataTransferItems from '../helpers/getFilesFromDataTransferItems';
|
||||||
import parseMessageInput, { ENTITY_CLASS_BY_NODE_NAME } from '../../../../util/parseMessageInput';
|
import parseMessageInput, { ENTITY_CLASS_BY_NODE_NAME } from '../../../../util/parseMessageInput';
|
||||||
|
import cleanDocsHtml from '../../../../lib/cleanDocsHtml';
|
||||||
import { containsCustomEmoji, stripCustomEmoji } from '../../../../global/helpers/symbols';
|
import { containsCustomEmoji, stripCustomEmoji } from '../../../../global/helpers/symbols';
|
||||||
|
|
||||||
const MAX_MESSAGE_LENGTH = 4096;
|
const MAX_MESSAGE_LENGTH = 4096;
|
||||||
@ -16,6 +17,14 @@ const STYLE_TAG_REGEX = /<style>(.*?)<\/style>/gs;
|
|||||||
|
|
||||||
function preparePastedHtml(html: string) {
|
function preparePastedHtml(html: string) {
|
||||||
let fragment = document.createElement('div');
|
let fragment = document.createElement('div');
|
||||||
|
try {
|
||||||
|
html = cleanDocsHtml(html);
|
||||||
|
} catch (err) {
|
||||||
|
if (DEBUG) {
|
||||||
|
// eslint-disable-next-line no-console
|
||||||
|
console.error(err);
|
||||||
|
}
|
||||||
|
}
|
||||||
fragment.innerHTML = html.replace(/\u00a0/g, ' ').replace(STYLE_TAG_REGEX, ''); // Strip   and styles
|
fragment.innerHTML = html.replace(/\u00a0/g, ' ').replace(STYLE_TAG_REGEX, ''); // Strip   and styles
|
||||||
|
|
||||||
const textContents = fragment.querySelectorAll<HTMLDivElement>('.text-content');
|
const textContents = fragment.querySelectorAll<HTMLDivElement>('.text-content');
|
||||||
|
|||||||
183
src/lib/cleanDocsHtml.ts
Normal file
183
src/lib/cleanDocsHtml.ts
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
// Utility for cleaning html code from Google Docs.
|
||||||
|
// Original source from DocsSoap:
|
||||||
|
// https://www.npmjs.com/package/docs-soap
|
||||||
|
|
||||||
|
const GDOCS_ELEMENT_ID_REGEXP = /id="docs-internal-guid/i;
|
||||||
|
|
||||||
|
const GDOCS_STYLES = {
|
||||||
|
BOLD: '700',
|
||||||
|
ITALIC: 'italic',
|
||||||
|
UNDERLINE: 'underline',
|
||||||
|
STRIKETHROUGH: 'line-through',
|
||||||
|
SUPERSCRIPT: 'super',
|
||||||
|
SUBSCRIPT: 'sub',
|
||||||
|
};
|
||||||
|
|
||||||
|
const ELEMENTS = {
|
||||||
|
ANCHOR: 'a',
|
||||||
|
BOLD: 'strong',
|
||||||
|
ITALIC: 'em',
|
||||||
|
UNDERLINE: 'u',
|
||||||
|
STRIKETHROUGH: 'del',
|
||||||
|
SUPERSCRIPT: 'sup',
|
||||||
|
SUBSCRIPT: 'sub',
|
||||||
|
};
|
||||||
|
|
||||||
|
const headers = [
|
||||||
|
'H1',
|
||||||
|
'H2',
|
||||||
|
'H3',
|
||||||
|
'H4',
|
||||||
|
'H5',
|
||||||
|
'H6',
|
||||||
|
];
|
||||||
|
|
||||||
|
function parseHtml(html: string): HTMLElement {
|
||||||
|
const parser = new DOMParser();
|
||||||
|
const doc = parser.parseFromString(html, 'text/html');
|
||||||
|
|
||||||
|
return doc.body;
|
||||||
|
}
|
||||||
|
|
||||||
|
function wrapNodeAnchor(node: Node, href: string): HTMLAnchorElement {
|
||||||
|
const anchor = document.createElement(ELEMENTS.ANCHOR) as HTMLAnchorElement;
|
||||||
|
anchor.href = href;
|
||||||
|
anchor.appendChild(node.cloneNode(true));
|
||||||
|
|
||||||
|
return anchor;
|
||||||
|
}
|
||||||
|
|
||||||
|
function wrapNodeInline(node: Node, style: string): Node {
|
||||||
|
const el = document.createElement(style);
|
||||||
|
el.appendChild(node.cloneNode(true));
|
||||||
|
|
||||||
|
return el;
|
||||||
|
}
|
||||||
|
|
||||||
|
function wrapNode(inner: HTMLElement, result: Node): Node {
|
||||||
|
let newNode = result.cloneNode(true);
|
||||||
|
if (!inner) {
|
||||||
|
return newNode;
|
||||||
|
}
|
||||||
|
if (inner.style && inner.style.fontWeight === GDOCS_STYLES.BOLD) {
|
||||||
|
newNode = wrapNodeInline(newNode, ELEMENTS.BOLD);
|
||||||
|
}
|
||||||
|
if (inner.style && inner.style.fontStyle === GDOCS_STYLES.ITALIC) {
|
||||||
|
newNode = wrapNodeInline(newNode, ELEMENTS.ITALIC);
|
||||||
|
}
|
||||||
|
if (inner.style && inner.style.textDecoration === GDOCS_STYLES.UNDERLINE) {
|
||||||
|
newNode = wrapNodeInline(newNode, ELEMENTS.UNDERLINE);
|
||||||
|
}
|
||||||
|
if (inner.style && inner.style.textDecoration === GDOCS_STYLES.STRIKETHROUGH) {
|
||||||
|
newNode = wrapNodeInline(newNode, ELEMENTS.STRIKETHROUGH);
|
||||||
|
}
|
||||||
|
if (inner.style && inner.style.verticalAlign === GDOCS_STYLES.SUPERSCRIPT) {
|
||||||
|
newNode = wrapNodeInline(newNode, ELEMENTS.SUPERSCRIPT);
|
||||||
|
}
|
||||||
|
if (inner.style && inner.style.verticalAlign === GDOCS_STYLES.SUBSCRIPT) {
|
||||||
|
newNode = wrapNodeInline(newNode, ELEMENTS.SUBSCRIPT);
|
||||||
|
}
|
||||||
|
|
||||||
|
return newNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
function applyBlockStyles(dirty: Node): Node {
|
||||||
|
const node = dirty.cloneNode(true);
|
||||||
|
let newNode = document.createTextNode(node.textContent || '') as Node;
|
||||||
|
let styledNode = document.createTextNode('') as Node;
|
||||||
|
if ('style' in node.childNodes[0] && Boolean(node.childNodes[0].style)) {
|
||||||
|
styledNode = node.childNodes[0];
|
||||||
|
}
|
||||||
|
if (node.childNodes[0] && node.childNodes[0].nodeName === 'A') {
|
||||||
|
newNode = wrapNodeAnchor(newNode.cloneNode(true), (node.childNodes[0] as HTMLAnchorElement).href);
|
||||||
|
styledNode = node.childNodes[0].childNodes[0];
|
||||||
|
}
|
||||||
|
newNode = wrapNode(styledNode as HTMLElement, newNode);
|
||||||
|
return newNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
function applyInlineStyles(dirty: Node): Node {
|
||||||
|
const node = dirty.cloneNode(true);
|
||||||
|
let newNode = document.createTextNode(node.textContent || '') as Node;
|
||||||
|
let styledNode = node;
|
||||||
|
if (node.nodeName === 'A') {
|
||||||
|
newNode = wrapNodeAnchor(newNode, (node as HTMLAnchorElement).href);
|
||||||
|
if ('style' in node.childNodes[0] && Boolean(node.childNodes[0].style)) {
|
||||||
|
styledNode = node.childNodes[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
newNode = wrapNode(styledNode as HTMLElement, newNode);
|
||||||
|
return newNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getCleanNode(node: Node): Node[] {
|
||||||
|
if (node.childNodes && (node.childNodes.length <= 1 || node.nodeName === 'OL' || node.nodeName === 'UL')) {
|
||||||
|
let newWrapper: Node | undefined;
|
||||||
|
let newNode = document.createTextNode(node.textContent || '') as Node;
|
||||||
|
if (node.nodeName === 'UL' || node.nodeName === 'OL' || node.nodeName === 'LI') {
|
||||||
|
newWrapper = document.createElement(node.nodeName);
|
||||||
|
newNode = document.createDocumentFragment();
|
||||||
|
const items = [];
|
||||||
|
for (let i = 0; i < node.childNodes.length; i++) {
|
||||||
|
items.push(...getCleanNode(node.childNodes[i]));
|
||||||
|
}
|
||||||
|
items.map((i: Node): Node => newNode.appendChild(i));
|
||||||
|
} else if (headers.indexOf(node.nodeName) !== -1) {
|
||||||
|
newWrapper = document.createElement(node.nodeName);
|
||||||
|
newNode = applyInlineStyles(node.childNodes[0]);
|
||||||
|
} else if (node.nodeName === 'P') {
|
||||||
|
newWrapper = document.createElement('p');
|
||||||
|
newNode = applyBlockStyles(node);
|
||||||
|
} else if (node.nodeName === 'BR') {
|
||||||
|
newNode = node;
|
||||||
|
} else {
|
||||||
|
newWrapper = document.createElement('span');
|
||||||
|
newNode = applyInlineStyles(node);
|
||||||
|
}
|
||||||
|
if (newWrapper) {
|
||||||
|
newWrapper.appendChild(newNode);
|
||||||
|
return [newWrapper];
|
||||||
|
}
|
||||||
|
|
||||||
|
return [node.cloneNode(true)];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (node.childNodes) {
|
||||||
|
const nodes = [];
|
||||||
|
for (let i = 0; i < node.childNodes.length; i++) {
|
||||||
|
nodes.push(...getCleanNode(node.childNodes[i]));
|
||||||
|
}
|
||||||
|
return nodes;
|
||||||
|
}
|
||||||
|
|
||||||
|
return [node];
|
||||||
|
}
|
||||||
|
|
||||||
|
function filterNode(node: Node): boolean {
|
||||||
|
return node.nodeType !== 8; // Node.COMMENT_NODE = 8
|
||||||
|
}
|
||||||
|
|
||||||
|
function getCleanDocument(dirty: HTMLElement): HTMLElement {
|
||||||
|
const body = document.createElement('body');
|
||||||
|
const nodes = dirty.childNodes;
|
||||||
|
const filteredNodes = Array.from(nodes).filter(filterNode);
|
||||||
|
const cleanNodes = [];
|
||||||
|
|
||||||
|
for (const node of filteredNodes) {
|
||||||
|
cleanNodes.push(...getCleanNode(node));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let i = 0; i < cleanNodes.length; i++) {
|
||||||
|
body.appendChild(cleanNodes[i].cloneNode(true));
|
||||||
|
}
|
||||||
|
|
||||||
|
return body;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function cleanDocsHtml(clipboardContent: string): string {
|
||||||
|
if (!clipboardContent.match(GDOCS_ELEMENT_ID_REGEXP)) {
|
||||||
|
return parseHtml(clipboardContent.replace(/(\r\n|\n|\r)/, '')).innerHTML;
|
||||||
|
}
|
||||||
|
|
||||||
|
return getCleanDocument(parseHtml(clipboardContent.replace(/(\r\n|\n|\r)/, ''))).innerHTML;
|
||||||
|
}
|
||||||
@ -8,6 +8,7 @@ export const ENTITY_CLASS_BY_NODE_NAME: Record<string, ApiMessageEntityTypes> =
|
|||||||
STRONG: ApiMessageEntityTypes.Bold,
|
STRONG: ApiMessageEntityTypes.Bold,
|
||||||
I: ApiMessageEntityTypes.Italic,
|
I: ApiMessageEntityTypes.Italic,
|
||||||
EM: ApiMessageEntityTypes.Italic,
|
EM: ApiMessageEntityTypes.Italic,
|
||||||
|
INS: ApiMessageEntityTypes.Underline,
|
||||||
U: ApiMessageEntityTypes.Underline,
|
U: ApiMessageEntityTypes.Underline,
|
||||||
S: ApiMessageEntityTypes.Strike,
|
S: ApiMessageEntityTypes.Strike,
|
||||||
STRIKE: ApiMessageEntityTypes.Strike,
|
STRIKE: ApiMessageEntityTypes.Strike,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user