331 lines
9.2 KiB
TypeScript
331 lines
9.2 KiB
TypeScript
import type { ApiFormattedText, ApiMessageEntity } from '../api/types';
|
|
import { ApiMessageEntityTypes } from '../api/types';
|
|
|
|
import { RE_LINK_TEMPLATE } from '../config';
|
|
import { IS_EMOJI_SUPPORTED } from './browser/windowEnvironment';
|
|
|
|
export const ENTITY_CLASS_BY_NODE_NAME: Record<string, ApiMessageEntityTypes> = {
|
|
B: ApiMessageEntityTypes.Bold,
|
|
STRONG: ApiMessageEntityTypes.Bold,
|
|
I: ApiMessageEntityTypes.Italic,
|
|
EM: ApiMessageEntityTypes.Italic,
|
|
INS: ApiMessageEntityTypes.Underline,
|
|
U: ApiMessageEntityTypes.Underline,
|
|
S: ApiMessageEntityTypes.Strike,
|
|
STRIKE: ApiMessageEntityTypes.Strike,
|
|
DEL: ApiMessageEntityTypes.Strike,
|
|
CODE: ApiMessageEntityTypes.Code,
|
|
PRE: ApiMessageEntityTypes.Pre,
|
|
BLOCKQUOTE: ApiMessageEntityTypes.Blockquote,
|
|
};
|
|
|
|
const MAX_TAG_DEEPNESS = 3;
|
|
|
|
export default function parseHtmlAsFormattedText(
|
|
html: string, withMarkdownLinks = false, skipMarkdown = false,
|
|
): ApiFormattedText {
|
|
const fragment = document.createElement('div');
|
|
fragment.innerHTML = skipMarkdown ? html
|
|
: withMarkdownLinks ? parseMarkdown(parseMarkdownLinks(html)) : parseMarkdown(html);
|
|
fixImageContent(fragment);
|
|
const text = fragment.innerText.trim().replace(/\u200b+/g, '');
|
|
const trimShift = fragment.innerText.indexOf(text[0]);
|
|
let textIndex = -trimShift;
|
|
let recursionDeepness = 0;
|
|
const entities: ApiMessageEntity[] = [];
|
|
|
|
function addEntity(node: ChildNode) {
|
|
if (node.nodeType === Node.COMMENT_NODE) return;
|
|
const { index, entity } = getEntityDataFromNode(node, text, textIndex);
|
|
|
|
if (entity) {
|
|
textIndex = index;
|
|
entities.push(entity);
|
|
} else if (node.textContent) {
|
|
// Skip newlines on the beginning
|
|
if (index === 0 && node.textContent.trim() === '') {
|
|
return;
|
|
}
|
|
textIndex += node.textContent.length;
|
|
}
|
|
|
|
if (node.hasChildNodes() && recursionDeepness <= MAX_TAG_DEEPNESS) {
|
|
recursionDeepness += 1;
|
|
Array.from(node.childNodes).forEach(addEntity);
|
|
}
|
|
}
|
|
|
|
Array.from(fragment.childNodes).forEach((node) => {
|
|
recursionDeepness = 1;
|
|
addEntity(node);
|
|
});
|
|
|
|
return {
|
|
text,
|
|
entities: entities.length ? entities : undefined,
|
|
};
|
|
}
|
|
|
|
export function fixImageContent(fragment: HTMLDivElement) {
|
|
fragment.querySelectorAll('img').forEach((node) => {
|
|
if (node.dataset.documentId) { // Custom Emoji
|
|
node.textContent = (node).alt || '';
|
|
} else { // Regular emoji with image fallback
|
|
node.replaceWith(node.alt || '');
|
|
}
|
|
});
|
|
}
|
|
|
|
function parseMarkdown(html: string) {
|
|
let parsedHtml = html.slice(0);
|
|
|
|
// Strip redundant nbsp's
|
|
parsedHtml = parsedHtml.replace(/ /g, ' ');
|
|
|
|
// Replace <div><br></div> with newline (new line in Safari)
|
|
parsedHtml = parsedHtml.replace(/<div><br([^>]*)?><\/div>/g, '\n');
|
|
// Replace <br> with newline
|
|
parsedHtml = parsedHtml.replace(/<br([^>]*)?>/g, '\n');
|
|
|
|
// Strip redundant <div> tags
|
|
parsedHtml = parsedHtml.replace(/<\/div>(\s*)<div>/g, '\n');
|
|
parsedHtml = parsedHtml.replace(/<div>/g, '\n');
|
|
parsedHtml = parsedHtml.replace(/<\/div>/g, '');
|
|
|
|
// Pre
|
|
parsedHtml = parsedHtml.replace(/^`{3}(.*?)[\n\r](.*?[\n\r]?)`{3}/gms, '<pre data-language="$1">$2</pre>');
|
|
parsedHtml = parsedHtml.replace(/^`{3}[\n\r]?(.*?)[\n\r]?`{3}/gms, '<pre>$1</pre>');
|
|
parsedHtml = parsedHtml.replace(/[`]{3}([^`]+)[`]{3}/g, '<pre>$1</pre>');
|
|
|
|
// Code
|
|
parsedHtml = parsedHtml.replace(
|
|
/(?!<(code|pre)[^<]*|<\/)[`]{1}([^`\n]+)[`]{1}(?![^<]*<\/(code|pre)>)/g,
|
|
'<code>$2</code>',
|
|
);
|
|
|
|
// Custom Emoji markdown tag
|
|
if (!IS_EMOJI_SUPPORTED) {
|
|
// Prepare alt text for custom emoji
|
|
parsedHtml = parsedHtml.replace(/\[<img[^>]+alt="([^"]+)"[^>]*>]/gm, '[$1]');
|
|
}
|
|
parsedHtml = parsedHtml.replace(
|
|
/(?!<(?:code|pre)[^<]*|<\/)\[([^\]\n]+)\]\(customEmoji:(\d+)\)(?![^<]*<\/(?:code|pre)>)/g,
|
|
'<img alt="$1" data-document-id="$2">',
|
|
);
|
|
|
|
// Other simple markdown
|
|
parsedHtml = parsedHtml.replace(
|
|
/(?!<(code|pre)[^<]*|<\/)[*]{2}([^*\n]+)[*]{2}(?![^<]*<\/(code|pre)>)/g,
|
|
'<b>$2</b>',
|
|
);
|
|
parsedHtml = parsedHtml.replace(
|
|
/(?!<(code|pre)[^<]*|<\/)[_]{2}([^_\n]+)[_]{2}(?![^<]*<\/(code|pre)>)/g,
|
|
'<i>$2</i>',
|
|
);
|
|
parsedHtml = parsedHtml.replace(
|
|
/(?!<(code|pre)[^<]*|<\/)[~]{2}([^~\n]+)[~]{2}(?![^<]*<\/(code|pre)>)/g,
|
|
'<s>$2</s>',
|
|
);
|
|
parsedHtml = parsedHtml.replace(
|
|
/(?!<(code|pre)[^<]*|<\/)[|]{2}([^|\n]+)[|]{2}(?![^<]*<\/(code|pre)>)/g,
|
|
`<span data-entity-type="${ApiMessageEntityTypes.Spoiler}">$2</span>`,
|
|
);
|
|
|
|
return parsedHtml;
|
|
}
|
|
|
|
function parseMarkdownLinks(html: string) {
|
|
return html.replace(new RegExp(`\\[([^\\]]+?)]\\((${RE_LINK_TEMPLATE}+?)\\)`, 'g'), (_, text, link) => {
|
|
const url = link.includes('://') ? link : link.includes('@') ? `mailto:${link}` : `https://${link}`;
|
|
return `<a href="${url}">${text}</a>`;
|
|
});
|
|
}
|
|
|
|
function getEntityDataFromNode(
|
|
node: ChildNode,
|
|
rawText: string,
|
|
textIndex: number,
|
|
): { index: number; entity?: ApiMessageEntity } {
|
|
const type = getEntityTypeFromNode(node);
|
|
|
|
if (!type || !node.textContent) {
|
|
return {
|
|
index: textIndex,
|
|
entity: undefined,
|
|
};
|
|
}
|
|
|
|
const rawIndex = rawText.indexOf(node.textContent, textIndex);
|
|
// In some cases, last text entity ends with a newline (which gets trimmed from `rawText`).
|
|
// In this case, `rawIndex` would return `-1`, so we use `textIndex` instead.
|
|
const index = rawIndex >= 0 ? rawIndex : textIndex;
|
|
const offset = rawText.substring(0, index).length;
|
|
const { length } = rawText.substring(index, index + node.textContent.length);
|
|
|
|
if (type === ApiMessageEntityTypes.TextUrl) {
|
|
return {
|
|
index,
|
|
entity: {
|
|
type,
|
|
offset,
|
|
length,
|
|
url: (node as HTMLAnchorElement).href,
|
|
},
|
|
};
|
|
}
|
|
if (type === ApiMessageEntityTypes.MentionName) {
|
|
return {
|
|
index,
|
|
entity: {
|
|
type,
|
|
offset,
|
|
length,
|
|
userId: (node as HTMLAnchorElement).dataset.userId!,
|
|
},
|
|
};
|
|
}
|
|
|
|
if (type === ApiMessageEntityTypes.Pre) {
|
|
return {
|
|
index,
|
|
entity: {
|
|
type,
|
|
offset,
|
|
length,
|
|
language: (node as HTMLPreElement).dataset.language,
|
|
},
|
|
};
|
|
}
|
|
|
|
if (type === ApiMessageEntityTypes.CustomEmoji) {
|
|
const nodeElement = node as HTMLElement;
|
|
const documentId = nodeElement.dataset.documentId || nodeElement.getAttribute('emoji-id');
|
|
if (!documentId) {
|
|
return {
|
|
index,
|
|
entity: undefined,
|
|
};
|
|
}
|
|
return {
|
|
index,
|
|
entity: {
|
|
type,
|
|
offset,
|
|
length,
|
|
documentId,
|
|
},
|
|
};
|
|
}
|
|
|
|
if (type === ApiMessageEntityTypes.Timestamp) {
|
|
const timestamp = Number((node as HTMLElement).dataset.timestamp);
|
|
if (Number.isNaN(timestamp)) {
|
|
return {
|
|
index,
|
|
entity: undefined,
|
|
};
|
|
}
|
|
|
|
return {
|
|
index,
|
|
entity: {
|
|
type,
|
|
offset,
|
|
length,
|
|
timestamp,
|
|
},
|
|
};
|
|
}
|
|
|
|
if (type === ApiMessageEntityTypes.FormattedDate) {
|
|
const date = Number((node as HTMLElement).dataset.unix);
|
|
if (Number.isNaN(date)) {
|
|
return {
|
|
index,
|
|
entity: undefined,
|
|
};
|
|
}
|
|
const format = (node as HTMLElement).dataset.format;
|
|
const relative = format?.includes('r') || undefined;
|
|
const dayOfWeek = format?.includes('w') || undefined;
|
|
const shortDate = format?.includes('d') || undefined;
|
|
const longDate = format?.includes('D') || undefined;
|
|
const shortTime = format?.includes('t') || undefined;
|
|
const longTime = format?.includes('T') || undefined;
|
|
|
|
return {
|
|
index,
|
|
entity: { type, offset, length, date, relative, dayOfWeek, shortDate, longDate, shortTime, longTime },
|
|
};
|
|
}
|
|
|
|
if (type === ApiMessageEntityTypes.DiffInsert
|
|
|| type === ApiMessageEntityTypes.DiffReplace
|
|
|| type === ApiMessageEntityTypes.DiffDelete) {
|
|
return {
|
|
index,
|
|
entity: undefined,
|
|
};
|
|
}
|
|
|
|
return {
|
|
index,
|
|
entity: {
|
|
type,
|
|
offset,
|
|
length,
|
|
},
|
|
};
|
|
}
|
|
|
|
function getEntityTypeFromNode(node: ChildNode): ApiMessageEntityTypes | undefined {
|
|
if (node instanceof HTMLElement && node.dataset.entityType) {
|
|
return node.dataset.entityType as ApiMessageEntityTypes;
|
|
}
|
|
|
|
if (ENTITY_CLASS_BY_NODE_NAME[node.nodeName]) {
|
|
return ENTITY_CLASS_BY_NODE_NAME[node.nodeName];
|
|
}
|
|
|
|
if (node.nodeName === 'A') {
|
|
const anchor = node as HTMLAnchorElement;
|
|
if (anchor.dataset.entityType === ApiMessageEntityTypes.MentionName) {
|
|
return ApiMessageEntityTypes.MentionName;
|
|
}
|
|
if (anchor.dataset.entityType === ApiMessageEntityTypes.Url) {
|
|
return ApiMessageEntityTypes.Url;
|
|
}
|
|
if (anchor.href.startsWith('mailto:')) {
|
|
return ApiMessageEntityTypes.Email;
|
|
}
|
|
if (anchor.href.startsWith('tel:')) {
|
|
return ApiMessageEntityTypes.Phone;
|
|
}
|
|
if (anchor.href !== anchor.textContent) {
|
|
return ApiMessageEntityTypes.TextUrl;
|
|
}
|
|
|
|
return ApiMessageEntityTypes.Url;
|
|
}
|
|
|
|
if (node.nodeName === 'SPAN') {
|
|
return (node as HTMLElement).dataset.entityType as any;
|
|
}
|
|
|
|
if (node.nodeName === 'IMG') {
|
|
if ((node as HTMLImageElement).dataset.documentId) {
|
|
return ApiMessageEntityTypes.CustomEmoji;
|
|
}
|
|
}
|
|
|
|
if (node.nodeName === 'TG-TIME') {
|
|
return ApiMessageEntityTypes.FormattedDate;
|
|
}
|
|
|
|
if (node.nodeName === 'TG-EMOJI') {
|
|
return ApiMessageEntityTypes.CustomEmoji;
|
|
}
|
|
|
|
return undefined;
|
|
}
|