From a1923f203c7f29cbcc289818e301b9238eda17c9 Mon Sep 17 00:00:00 2001
From: Alexander Zinchuk
Date: Wed, 27 Nov 2024 20:33:48 +0400
Subject: [PATCH] Message: Voice-To-Text for Video Message (#5201)
---
src/api/gramjs/apiBuilders/messageContent.ts | 4 ++
src/api/types/messages.ts | 1 +
src/components/common/Audio.tsx | 27 ++++----
src/components/middle/message/Message.tsx | 39 ++++++-----
.../middle/message/MessageMeta.scss | 2 +-
src/components/middle/message/RoundVideo.scss | 27 ++++++++
src/components/middle/message/RoundVideo.tsx | 64 +++++++++++++++++--
.../middle/message/_message-content.scss | 17 +++++
.../message/helpers/buildContentClassName.ts | 8 ++-
src/util/generateWaveform.ts | 6 ++
10 files changed, 154 insertions(+), 41 deletions(-)
create mode 100644 src/util/generateWaveform.ts
diff --git a/src/api/gramjs/apiBuilders/messageContent.ts b/src/api/gramjs/apiBuilders/messageContent.ts
index b1d7491d6..46e83bec1 100644
--- a/src/api/gramjs/apiBuilders/messageContent.ts
+++ b/src/api/gramjs/apiBuilders/messageContent.ts
@@ -28,6 +28,7 @@ import type {
import type { UniversalMessage } from './messages';
import { SUPPORTED_PHOTO_CONTENT_TYPES, SUPPORTED_VIDEO_CONTENT_TYPES, VIDEO_WEBM_TYPE } from '../../../config';
+import { generateWaveform } from '../../../util/generateWaveform';
import { pick } from '../../../util/iteratees';
import {
addMediaToLocalDb, addStoryToLocalDb, type MediaRepairContext, serializeBytes,
@@ -217,6 +218,8 @@ export function buildVideoFromDocument(document: GramJs.Document, isSpoiler?: bo
nosound,
} = videoAttr;
+ const waveform = isRound ? generateWaveform(duration) : undefined;
+
return {
mediaType: 'video',
id: String(id),
@@ -233,6 +236,7 @@ export function buildVideoFromDocument(document: GramJs.Document, isSpoiler?: bo
isSpoiler,
hasVideoPreview,
previewPhotoSizes,
+ waveform,
...(nosound && { noSound: true }),
};
}
diff --git a/src/api/types/messages.ts b/src/api/types/messages.ts
index b57dbee50..4cdb09d35 100644
--- a/src/api/types/messages.ts
+++ b/src/api/types/messages.ts
@@ -116,6 +116,7 @@ export interface ApiVideo {
previewBlobUrl?: string;
size: number;
noSound?: boolean;
+ waveform?: number[];
}
export interface ApiAudio {
diff --git a/src/components/common/Audio.tsx b/src/components/common/Audio.tsx
index 3f218b560..580560678 100644
--- a/src/components/common/Audio.tsx
+++ b/src/components/common/Audio.tsx
@@ -4,7 +4,9 @@ import React, {
} from '../../lib/teact/teact';
import { getActions } from '../../global';
-import type { ApiAudio, ApiMessage, ApiVoice } from '../../api/types';
+import type {
+ ApiAudio, ApiMessage, ApiVideo, ApiVoice,
+} from '../../api/types';
import type { BufferedRange } from '../../hooks/useBuffering';
import type { OldLangFn } from '../../hooks/useOldLang';
import type { ISettings } from '../../types';
@@ -118,6 +120,7 @@ const Audio: FC = ({
} = message;
const audio = contentAudio || getMessageWebPageAudio(message);
const media = (voice || video || audio)!;
+ const mediaSource = (voice || video);
const isVoice = Boolean(voice || video);
const isSeeking = useRef(false);
// eslint-disable-next-line no-null/no-null
@@ -186,7 +189,7 @@ const Audio: FC = ({
const waveformCanvasRef = useWaveformCanvas(
theme,
- voice,
+ mediaSource,
(isMediaUnread && !isOwn && !isReverse) ? 1 : playProgress,
isOwn,
!noAvatars,
@@ -462,10 +465,10 @@ const Audio: FC = ({
transferProgress,
onDateClick ? handleDateClick : undefined,
)}
- {origin === AudioOrigin.SharedMedia && (voice || video) && renderWithTitle()}
- {(origin === AudioOrigin.Inline || isInOneTimeModal) && voice && (
+ {origin === AudioOrigin.SharedMedia && mediaSource && renderWithTitle()}
+ {(origin === AudioOrigin.Inline || isInOneTimeModal || isTranscribed) && mediaSource && (
renderVoice(
- voice,
+ mediaSource,
seekerRef,
waveformCanvasRef,
hasTtl ? reversePlayProgress : playProgress,
@@ -553,7 +556,7 @@ function renderAudio(
}
function renderVoice(
- voice: ApiVoice,
+ media: ApiVoice | ApiVideo,
seekerRef: React.Ref,
waveformCanvasRef: React.Ref,
playProgress: number,
@@ -604,7 +607,7 @@ function renderVoice(
stroke-linejoin="round"
rx="6"
ry="6"
- stroke="var(--accent-color)"
+ stroke="white"
stroke-dashoffset="1"
stroke-dasharray="32,68"
/>
@@ -618,7 +621,7 @@ function renderVoice(
dir="auto"
>
{playProgress === 0 || playProgress === 1
- ? formatMediaDuration(voice.duration) : formatMediaDuration(voice.duration * playProgress)}
+ ? formatMediaDuration(media!.duration) : formatMediaDuration(media!.duration * playProgress)}
);
@@ -626,7 +629,7 @@ function renderVoice(
function useWaveformCanvas(
theme: ISettings['theme'],
- voice?: ApiVoice,
+ media?: ApiVoice | ApiVideo,
playProgress = 0,
isOwn = false,
withAvatar = false,
@@ -637,11 +640,11 @@ function useWaveformCanvas(
const canvasRef = useRef(null);
const { data: spikes, peak } = useMemo(() => {
- if (!voice) {
+ if (!media) {
return undefined;
}
- const { waveform, duration } = voice;
+ const { waveform, duration } = media;
if (!waveform) {
return {
data: new Array(Math.min(duration, MAX_EMPTY_WAVEFORM_POINTS)).fill(0),
@@ -655,7 +658,7 @@ function useWaveformCanvas(
const decodedWaveform = decodeWaveform(new Uint8Array(waveform));
return interpolateArray(decodedWaveform, spikesCount);
- }, [isMobile, voice, withAvatar]) || {};
+ }, [isMobile, media, withAvatar]) || {};
useLayoutEffect(() => {
const canvas = canvasRef.current;
diff --git a/src/components/middle/message/Message.tsx b/src/components/middle/message/Message.tsx
index 9cf7c0806..8131ec871 100644
--- a/src/components/middle/message/Message.tsx
+++ b/src/components/middle/message/Message.tsx
@@ -115,7 +115,6 @@ import {
calculateDimensionsForMessageMedia,
getStickerDimensions,
REM,
- ROUND_VIDEO_DIMENSIONS_PX,
} from '../../common/helpers/mediaDimensions';
import { getPeerColorClass } from '../../common/helpers/peerColor';
import renderText from '../../common/helpers/renderText';
@@ -520,9 +519,11 @@ const Message: FC = ({
const messageReplyInfo = getMessageReplyInfo(message);
const storyReplyInfo = getStoryReplyInfo(message);
+ const withVoiceTranscription = Boolean(!isTranscriptionHidden && (isTranscriptionError || transcribedText));
+
const hasStoryReply = Boolean(storyReplyInfo);
const hasThread = Boolean(repliesThreadInfo) && messageListType === 'thread';
- const isCustomShape = getMessageCustomShape(message);
+ const isCustomShape = !withVoiceTranscription && getMessageCustomShape(message);
const hasAnimatedEmoji = isCustomShape && (animatedEmoji || animatedCustomEmoji);
const hasReactions = reactionMessage?.reactions && !areReactionsEmpty(reactionMessage.reactions);
const asForwarded = (
@@ -558,8 +559,6 @@ const Message: FC = ({
&& forwardInfo.fromMessageId
));
- const noUserColors = isOwn && !isCustomShape;
-
const hasFactCheck = Boolean(factCheck?.text);
const hasForwardedCustomShape = asForwarded && isCustomShape;
@@ -575,7 +574,8 @@ const Message: FC = ({
});
const messageSender = canShowSender ? sender : undefined;
- const withVoiceTranscription = Boolean(!isTranscriptionHidden && (isTranscriptionError || transcribedText));
+
+ const noUserColors = isOwn && !isCustomShape;
const shouldPreferOriginSender = forwardInfo
&& (isChatWithSelf || isRepliesChat || isAnonymousForwards || !messageSender);
@@ -760,7 +760,7 @@ const Message: FC = ({
const withQuickReactionButton = !isTouchScreen && !phoneCall && !isInSelectMode && defaultReaction
&& !isInDocumentGroupNotLast && !isStoryMention && !hasTtl;
- const hasOutsideReactions = hasReactions
+ const hasOutsideReactions = !withVoiceTranscription && hasReactions
&& (isCustomShape || ((photo || video || storyData || (location?.mediaType === 'geo')) && !hasText));
const contentClassName = buildContentClassName(message, album, {
@@ -904,20 +904,11 @@ const Message: FC = ({
if (!isAlbum && (photo || video || invoice?.extendedMedia)) {
let width: number | undefined;
- if (photo) {
- width = calculateMediaDimensions({
- media: photo,
- isOwn,
- asForwarded,
- noAvatars,
- isMobile,
- }).width;
- } else if (video) {
- if (isRoundVideo) {
- width = ROUND_VIDEO_DIMENSIONS_PX;
- } else {
+ if (photo || video) {
+ const media = (photo || video);
+ if (media && !isRoundVideo) {
width = calculateMediaDimensions({
- media: video,
+ media,
isOwn,
asForwarded,
noAvatars,
@@ -1193,16 +1184,22 @@ const Message: FC = ({
chatId={chatId}
/>
)}
- {!isAlbum && isRoundVideo && (
+ {!isAlbum && isRoundVideo && !withVoiceTranscription && (
)}
- {(audio || voice) && (
+ {(audio || voice || withVoiceTranscription) && (