import type { ApiFormattedText, ApiMessageEntity } from '../api/types'; import { ApiMessageEntityTypes } from '../api/types'; import { RE_LINK_TEMPLATE } from '../config'; import { IS_EMOJI_SUPPORTED } from './browser/windowEnvironment'; export const ENTITY_CLASS_BY_NODE_NAME: Record = { B: ApiMessageEntityTypes.Bold, STRONG: ApiMessageEntityTypes.Bold, I: ApiMessageEntityTypes.Italic, EM: ApiMessageEntityTypes.Italic, INS: ApiMessageEntityTypes.Underline, U: ApiMessageEntityTypes.Underline, S: ApiMessageEntityTypes.Strike, STRIKE: ApiMessageEntityTypes.Strike, DEL: ApiMessageEntityTypes.Strike, CODE: ApiMessageEntityTypes.Code, PRE: ApiMessageEntityTypes.Pre, BLOCKQUOTE: ApiMessageEntityTypes.Blockquote, }; const MAX_TAG_DEEPNESS = 3; export default function parseHtmlAsFormattedText( html: string, withMarkdownLinks = false, skipMarkdown = false, ): ApiFormattedText { const fragment = document.createElement('div'); fragment.innerHTML = skipMarkdown ? html : withMarkdownLinks ? parseMarkdown(parseMarkdownLinks(html)) : parseMarkdown(html); fixImageContent(fragment); const text = fragment.innerText.trim().replace(/\u200b+/g, ''); const trimShift = fragment.innerText.indexOf(text[0]); let textIndex = -trimShift; let recursionDeepness = 0; const entities: ApiMessageEntity[] = []; function addEntity(node: ChildNode) { if (node.nodeType === Node.COMMENT_NODE) return; const { index, entity } = getEntityDataFromNode(node, text, textIndex); if (entity) { textIndex = index; entities.push(entity); } else if (node.textContent) { // Skip newlines on the beginning if (index === 0 && node.textContent.trim() === '') { return; } textIndex += node.textContent.length; } if (node.hasChildNodes() && recursionDeepness <= MAX_TAG_DEEPNESS) { recursionDeepness += 1; Array.from(node.childNodes).forEach(addEntity); } } Array.from(fragment.childNodes).forEach((node) => { recursionDeepness = 1; addEntity(node); }); return { text, entities: entities.length ? entities : undefined, }; } export function fixImageContent(fragment: HTMLDivElement) { fragment.querySelectorAll('img').forEach((node) => { if (node.dataset.documentId) { // Custom Emoji node.textContent = (node).alt || ''; } else { // Regular emoji with image fallback node.replaceWith(node.alt || ''); } }); } function parseMarkdown(html: string) { let parsedHtml = html.slice(0); // Strip redundant nbsp's parsedHtml = parsedHtml.replace(/ /g, ' '); // Replace

with newline (new line in Safari) parsedHtml = parsedHtml.replace(/
]*)?><\/div>/g, '\n'); // Replace
with newline parsedHtml = parsedHtml.replace(/]*)?>/g, '\n'); // Strip redundant
tags parsedHtml = parsedHtml.replace(/<\/div>(\s*)
/g, '\n'); parsedHtml = parsedHtml.replace(/
/g, '\n'); parsedHtml = parsedHtml.replace(/<\/div>/g, ''); // Pre parsedHtml = parsedHtml.replace(/^`{3}(.*?)[\n\r](.*?[\n\r]?)`{3}/gms, '
$2
'); parsedHtml = parsedHtml.replace(/^`{3}[\n\r]?(.*?)[\n\r]?`{3}/gms, '
$1
'); parsedHtml = parsedHtml.replace(/[`]{3}([^`]+)[`]{3}/g, '
$1
'); // Code parsedHtml = parsedHtml.replace( /(?!<(code|pre)[^<]*|<\/)[`]{1}([^`\n]+)[`]{1}(?![^<]*<\/(code|pre)>)/g, '$2', ); // Custom Emoji markdown tag if (!IS_EMOJI_SUPPORTED) { // Prepare alt text for custom emoji parsedHtml = parsedHtml.replace(/\[]+alt="([^"]+)"[^>]*>]/gm, '[$1]'); } parsedHtml = parsedHtml.replace( /(?!<(?:code|pre)[^<]*|<\/)\[([^\]\n]+)\]\(customEmoji:(\d+)\)(?![^<]*<\/(?:code|pre)>)/g, '$1', ); // Other simple markdown parsedHtml = parsedHtml.replace( /(?!<(code|pre)[^<]*|<\/)[*]{2}([^*\n]+)[*]{2}(?![^<]*<\/(code|pre)>)/g, '$2', ); parsedHtml = parsedHtml.replace( /(?!<(code|pre)[^<]*|<\/)[_]{2}([^_\n]+)[_]{2}(?![^<]*<\/(code|pre)>)/g, '$2', ); parsedHtml = parsedHtml.replace( /(?!<(code|pre)[^<]*|<\/)[~]{2}([^~\n]+)[~]{2}(?![^<]*<\/(code|pre)>)/g, '$2', ); parsedHtml = parsedHtml.replace( /(?!<(code|pre)[^<]*|<\/)[|]{2}([^|\n]+)[|]{2}(?![^<]*<\/(code|pre)>)/g, `$2`, ); return parsedHtml; } function parseMarkdownLinks(html: string) { return html.replace(new RegExp(`\\[([^\\]]+?)]\\((${RE_LINK_TEMPLATE}+?)\\)`, 'g'), (_, text, link) => { const url = link.includes('://') ? link : link.includes('@') ? `mailto:${link}` : `https://${link}`; return `${text}`; }); } function getEntityDataFromNode( node: ChildNode, rawText: string, textIndex: number, ): { index: number; entity?: ApiMessageEntity } { const type = getEntityTypeFromNode(node); if (!type || !node.textContent) { return { index: textIndex, entity: undefined, }; } const rawIndex = rawText.indexOf(node.textContent, textIndex); // In some cases, last text entity ends with a newline (which gets trimmed from `rawText`). // In this case, `rawIndex` would return `-1`, so we use `textIndex` instead. const index = rawIndex >= 0 ? rawIndex : textIndex; const offset = rawText.substring(0, index).length; const { length } = rawText.substring(index, index + node.textContent.length); if (type === ApiMessageEntityTypes.TextUrl) { return { index, entity: { type, offset, length, url: (node as HTMLAnchorElement).href, }, }; } if (type === ApiMessageEntityTypes.MentionName) { return { index, entity: { type, offset, length, userId: (node as HTMLAnchorElement).dataset.userId!, }, }; } if (type === ApiMessageEntityTypes.Pre) { return { index, entity: { type, offset, length, language: (node as HTMLPreElement).dataset.language, }, }; } if (type === ApiMessageEntityTypes.CustomEmoji) { const nodeElement = node as HTMLElement; const documentId = nodeElement.dataset.documentId || nodeElement.getAttribute('emoji-id'); if (!documentId) { return { index, entity: undefined, }; } return { index, entity: { type, offset, length, documentId, }, }; } if (type === ApiMessageEntityTypes.Timestamp) { const timestamp = Number((node as HTMLElement).dataset.timestamp); if (Number.isNaN(timestamp)) { return { index, entity: undefined, }; } return { index, entity: { type, offset, length, timestamp, }, }; } if (type === ApiMessageEntityTypes.FormattedDate) { const date = Number((node as HTMLElement).dataset.unix); if (Number.isNaN(date)) { return { index, entity: undefined, }; } const format = (node as HTMLElement).dataset.format; const relative = format?.includes('r') || undefined; const dayOfWeek = format?.includes('w') || undefined; const shortDate = format?.includes('d') || undefined; const longDate = format?.includes('D') || undefined; const shortTime = format?.includes('t') || undefined; const longTime = format?.includes('T') || undefined; return { index, entity: { type, offset, length, date, relative, dayOfWeek, shortDate, longDate, shortTime, longTime }, }; } if (type === ApiMessageEntityTypes.DiffInsert || type === ApiMessageEntityTypes.DiffReplace || type === ApiMessageEntityTypes.DiffDelete) { return { index, entity: undefined, }; } return { index, entity: { type, offset, length, }, }; } function getEntityTypeFromNode(node: ChildNode): ApiMessageEntityTypes | undefined { if (node instanceof HTMLElement && node.dataset.entityType) { return node.dataset.entityType as ApiMessageEntityTypes; } if (ENTITY_CLASS_BY_NODE_NAME[node.nodeName]) { return ENTITY_CLASS_BY_NODE_NAME[node.nodeName]; } if (node.nodeName === 'A') { const anchor = node as HTMLAnchorElement; if (anchor.dataset.entityType === ApiMessageEntityTypes.MentionName) { return ApiMessageEntityTypes.MentionName; } if (anchor.dataset.entityType === ApiMessageEntityTypes.Url) { return ApiMessageEntityTypes.Url; } if (anchor.href.startsWith('mailto:')) { return ApiMessageEntityTypes.Email; } if (anchor.href.startsWith('tel:')) { return ApiMessageEntityTypes.Phone; } if (anchor.href !== anchor.textContent) { return ApiMessageEntityTypes.TextUrl; } return ApiMessageEntityTypes.Url; } if (node.nodeName === 'SPAN') { return (node as HTMLElement).dataset.entityType as any; } if (node.nodeName === 'IMG') { if ((node as HTMLImageElement).dataset.documentId) { return ApiMessageEntityTypes.CustomEmoji; } } if (node.nodeName === 'TG-TIME') { return ApiMessageEntityTypes.FormattedDate; } if (node.nodeName === 'TG-EMOJI') { return ApiMessageEntityTypes.CustomEmoji; } return undefined; }