From a1923f203c7f29cbcc289818e301b9238eda17c9 Mon Sep 17 00:00:00 2001 From: Alexander Zinchuk Date: Wed, 27 Nov 2024 20:33:48 +0400 Subject: [PATCH] Message: Voice-To-Text for Video Message (#5201) --- src/api/gramjs/apiBuilders/messageContent.ts | 4 ++ src/api/types/messages.ts | 1 + src/components/common/Audio.tsx | 27 ++++---- src/components/middle/message/Message.tsx | 39 ++++++----- .../middle/message/MessageMeta.scss | 2 +- src/components/middle/message/RoundVideo.scss | 27 ++++++++ src/components/middle/message/RoundVideo.tsx | 64 +++++++++++++++++-- .../middle/message/_message-content.scss | 17 +++++ .../message/helpers/buildContentClassName.ts | 8 ++- src/util/generateWaveform.ts | 6 ++ 10 files changed, 154 insertions(+), 41 deletions(-) create mode 100644 src/util/generateWaveform.ts diff --git a/src/api/gramjs/apiBuilders/messageContent.ts b/src/api/gramjs/apiBuilders/messageContent.ts index b1d7491d6..46e83bec1 100644 --- a/src/api/gramjs/apiBuilders/messageContent.ts +++ b/src/api/gramjs/apiBuilders/messageContent.ts @@ -28,6 +28,7 @@ import type { import type { UniversalMessage } from './messages'; import { SUPPORTED_PHOTO_CONTENT_TYPES, SUPPORTED_VIDEO_CONTENT_TYPES, VIDEO_WEBM_TYPE } from '../../../config'; +import { generateWaveform } from '../../../util/generateWaveform'; import { pick } from '../../../util/iteratees'; import { addMediaToLocalDb, addStoryToLocalDb, type MediaRepairContext, serializeBytes, @@ -217,6 +218,8 @@ export function buildVideoFromDocument(document: GramJs.Document, isSpoiler?: bo nosound, } = videoAttr; + const waveform = isRound ? generateWaveform(duration) : undefined; + return { mediaType: 'video', id: String(id), @@ -233,6 +236,7 @@ export function buildVideoFromDocument(document: GramJs.Document, isSpoiler?: bo isSpoiler, hasVideoPreview, previewPhotoSizes, + waveform, ...(nosound && { noSound: true }), }; } diff --git a/src/api/types/messages.ts b/src/api/types/messages.ts index b57dbee50..4cdb09d35 100644 --- a/src/api/types/messages.ts +++ b/src/api/types/messages.ts @@ -116,6 +116,7 @@ export interface ApiVideo { previewBlobUrl?: string; size: number; noSound?: boolean; + waveform?: number[]; } export interface ApiAudio { diff --git a/src/components/common/Audio.tsx b/src/components/common/Audio.tsx index 3f218b560..580560678 100644 --- a/src/components/common/Audio.tsx +++ b/src/components/common/Audio.tsx @@ -4,7 +4,9 @@ import React, { } from '../../lib/teact/teact'; import { getActions } from '../../global'; -import type { ApiAudio, ApiMessage, ApiVoice } from '../../api/types'; +import type { + ApiAudio, ApiMessage, ApiVideo, ApiVoice, +} from '../../api/types'; import type { BufferedRange } from '../../hooks/useBuffering'; import type { OldLangFn } from '../../hooks/useOldLang'; import type { ISettings } from '../../types'; @@ -118,6 +120,7 @@ const Audio: FC = ({ } = message; const audio = contentAudio || getMessageWebPageAudio(message); const media = (voice || video || audio)!; + const mediaSource = (voice || video); const isVoice = Boolean(voice || video); const isSeeking = useRef(false); // eslint-disable-next-line no-null/no-null @@ -186,7 +189,7 @@ const Audio: FC = ({ const waveformCanvasRef = useWaveformCanvas( theme, - voice, + mediaSource, (isMediaUnread && !isOwn && !isReverse) ? 1 : playProgress, isOwn, !noAvatars, @@ -462,10 +465,10 @@ const Audio: FC = ({ transferProgress, onDateClick ? handleDateClick : undefined, )} - {origin === AudioOrigin.SharedMedia && (voice || video) && renderWithTitle()} - {(origin === AudioOrigin.Inline || isInOneTimeModal) && voice && ( + {origin === AudioOrigin.SharedMedia && mediaSource && renderWithTitle()} + {(origin === AudioOrigin.Inline || isInOneTimeModal || isTranscribed) && mediaSource && ( renderVoice( - voice, + mediaSource, seekerRef, waveformCanvasRef, hasTtl ? reversePlayProgress : playProgress, @@ -553,7 +556,7 @@ function renderAudio( } function renderVoice( - voice: ApiVoice, + media: ApiVoice | ApiVideo, seekerRef: React.Ref, waveformCanvasRef: React.Ref, playProgress: number, @@ -604,7 +607,7 @@ function renderVoice( stroke-linejoin="round" rx="6" ry="6" - stroke="var(--accent-color)" + stroke="white" stroke-dashoffset="1" stroke-dasharray="32,68" /> @@ -618,7 +621,7 @@ function renderVoice( dir="auto" > {playProgress === 0 || playProgress === 1 - ? formatMediaDuration(voice.duration) : formatMediaDuration(voice.duration * playProgress)} + ? formatMediaDuration(media!.duration) : formatMediaDuration(media!.duration * playProgress)}

); @@ -626,7 +629,7 @@ function renderVoice( function useWaveformCanvas( theme: ISettings['theme'], - voice?: ApiVoice, + media?: ApiVoice | ApiVideo, playProgress = 0, isOwn = false, withAvatar = false, @@ -637,11 +640,11 @@ function useWaveformCanvas( const canvasRef = useRef(null); const { data: spikes, peak } = useMemo(() => { - if (!voice) { + if (!media) { return undefined; } - const { waveform, duration } = voice; + const { waveform, duration } = media; if (!waveform) { return { data: new Array(Math.min(duration, MAX_EMPTY_WAVEFORM_POINTS)).fill(0), @@ -655,7 +658,7 @@ function useWaveformCanvas( const decodedWaveform = decodeWaveform(new Uint8Array(waveform)); return interpolateArray(decodedWaveform, spikesCount); - }, [isMobile, voice, withAvatar]) || {}; + }, [isMobile, media, withAvatar]) || {}; useLayoutEffect(() => { const canvas = canvasRef.current; diff --git a/src/components/middle/message/Message.tsx b/src/components/middle/message/Message.tsx index 9cf7c0806..8131ec871 100644 --- a/src/components/middle/message/Message.tsx +++ b/src/components/middle/message/Message.tsx @@ -115,7 +115,6 @@ import { calculateDimensionsForMessageMedia, getStickerDimensions, REM, - ROUND_VIDEO_DIMENSIONS_PX, } from '../../common/helpers/mediaDimensions'; import { getPeerColorClass } from '../../common/helpers/peerColor'; import renderText from '../../common/helpers/renderText'; @@ -520,9 +519,11 @@ const Message: FC = ({ const messageReplyInfo = getMessageReplyInfo(message); const storyReplyInfo = getStoryReplyInfo(message); + const withVoiceTranscription = Boolean(!isTranscriptionHidden && (isTranscriptionError || transcribedText)); + const hasStoryReply = Boolean(storyReplyInfo); const hasThread = Boolean(repliesThreadInfo) && messageListType === 'thread'; - const isCustomShape = getMessageCustomShape(message); + const isCustomShape = !withVoiceTranscription && getMessageCustomShape(message); const hasAnimatedEmoji = isCustomShape && (animatedEmoji || animatedCustomEmoji); const hasReactions = reactionMessage?.reactions && !areReactionsEmpty(reactionMessage.reactions); const asForwarded = ( @@ -558,8 +559,6 @@ const Message: FC = ({ && forwardInfo.fromMessageId )); - const noUserColors = isOwn && !isCustomShape; - const hasFactCheck = Boolean(factCheck?.text); const hasForwardedCustomShape = asForwarded && isCustomShape; @@ -575,7 +574,8 @@ const Message: FC = ({ }); const messageSender = canShowSender ? sender : undefined; - const withVoiceTranscription = Boolean(!isTranscriptionHidden && (isTranscriptionError || transcribedText)); + + const noUserColors = isOwn && !isCustomShape; const shouldPreferOriginSender = forwardInfo && (isChatWithSelf || isRepliesChat || isAnonymousForwards || !messageSender); @@ -760,7 +760,7 @@ const Message: FC = ({ const withQuickReactionButton = !isTouchScreen && !phoneCall && !isInSelectMode && defaultReaction && !isInDocumentGroupNotLast && !isStoryMention && !hasTtl; - const hasOutsideReactions = hasReactions + const hasOutsideReactions = !withVoiceTranscription && hasReactions && (isCustomShape || ((photo || video || storyData || (location?.mediaType === 'geo')) && !hasText)); const contentClassName = buildContentClassName(message, album, { @@ -904,20 +904,11 @@ const Message: FC = ({ if (!isAlbum && (photo || video || invoice?.extendedMedia)) { let width: number | undefined; - if (photo) { - width = calculateMediaDimensions({ - media: photo, - isOwn, - asForwarded, - noAvatars, - isMobile, - }).width; - } else if (video) { - if (isRoundVideo) { - width = ROUND_VIDEO_DIMENSIONS_PX; - } else { + if (photo || video) { + const media = (photo || video); + if (media && !isRoundVideo) { width = calculateMediaDimensions({ - media: video, + media, isOwn, asForwarded, noAvatars, @@ -1193,16 +1184,22 @@ const Message: FC = ({ chatId={chatId} /> )} - {!isAlbum && isRoundVideo && ( + {!isAlbum && isRoundVideo && !withVoiceTranscription && ( )} - {(audio || voice) && ( + {(audio || voice || withVoiceTranscription) && (