Message: Voice-To-Text for Video Message (#5201)

This commit is contained in:
Alexander Zinchuk 2024-11-27 20:33:48 +04:00
parent c91287635e
commit a1923f203c
10 changed files with 154 additions and 41 deletions

View File

@ -28,6 +28,7 @@ import type {
import type { UniversalMessage } from './messages';
import { SUPPORTED_PHOTO_CONTENT_TYPES, SUPPORTED_VIDEO_CONTENT_TYPES, VIDEO_WEBM_TYPE } from '../../../config';
import { generateWaveform } from '../../../util/generateWaveform';
import { pick } from '../../../util/iteratees';
import {
addMediaToLocalDb, addStoryToLocalDb, type MediaRepairContext, serializeBytes,
@ -217,6 +218,8 @@ export function buildVideoFromDocument(document: GramJs.Document, isSpoiler?: bo
nosound,
} = videoAttr;
const waveform = isRound ? generateWaveform(duration) : undefined;
return {
mediaType: 'video',
id: String(id),
@ -233,6 +236,7 @@ export function buildVideoFromDocument(document: GramJs.Document, isSpoiler?: bo
isSpoiler,
hasVideoPreview,
previewPhotoSizes,
waveform,
...(nosound && { noSound: true }),
};
}

View File

@ -116,6 +116,7 @@ export interface ApiVideo {
previewBlobUrl?: string;
size: number;
noSound?: boolean;
waveform?: number[];
}
export interface ApiAudio {

View File

@ -4,7 +4,9 @@ import React, {
} from '../../lib/teact/teact';
import { getActions } from '../../global';
import type { ApiAudio, ApiMessage, ApiVoice } from '../../api/types';
import type {
ApiAudio, ApiMessage, ApiVideo, ApiVoice,
} from '../../api/types';
import type { BufferedRange } from '../../hooks/useBuffering';
import type { OldLangFn } from '../../hooks/useOldLang';
import type { ISettings } from '../../types';
@ -118,6 +120,7 @@ const Audio: FC<OwnProps> = ({
} = message;
const audio = contentAudio || getMessageWebPageAudio(message);
const media = (voice || video || audio)!;
const mediaSource = (voice || video);
const isVoice = Boolean(voice || video);
const isSeeking = useRef<boolean>(false);
// eslint-disable-next-line no-null/no-null
@ -186,7 +189,7 @@ const Audio: FC<OwnProps> = ({
const waveformCanvasRef = useWaveformCanvas(
theme,
voice,
mediaSource,
(isMediaUnread && !isOwn && !isReverse) ? 1 : playProgress,
isOwn,
!noAvatars,
@ -462,10 +465,10 @@ const Audio: FC<OwnProps> = ({
transferProgress,
onDateClick ? handleDateClick : undefined,
)}
{origin === AudioOrigin.SharedMedia && (voice || video) && renderWithTitle()}
{(origin === AudioOrigin.Inline || isInOneTimeModal) && voice && (
{origin === AudioOrigin.SharedMedia && mediaSource && renderWithTitle()}
{(origin === AudioOrigin.Inline || isInOneTimeModal || isTranscribed) && mediaSource && (
renderVoice(
voice,
mediaSource,
seekerRef,
waveformCanvasRef,
hasTtl ? reversePlayProgress : playProgress,
@ -553,7 +556,7 @@ function renderAudio(
}
function renderVoice(
voice: ApiVoice,
media: ApiVoice | ApiVideo,
seekerRef: React.Ref<HTMLDivElement>,
waveformCanvasRef: React.Ref<HTMLCanvasElement>,
playProgress: number,
@ -604,7 +607,7 @@ function renderVoice(
stroke-linejoin="round"
rx="6"
ry="6"
stroke="var(--accent-color)"
stroke="white"
stroke-dashoffset="1"
stroke-dasharray="32,68"
/>
@ -618,7 +621,7 @@ function renderVoice(
dir="auto"
>
{playProgress === 0 || playProgress === 1
? formatMediaDuration(voice.duration) : formatMediaDuration(voice.duration * playProgress)}
? formatMediaDuration(media!.duration) : formatMediaDuration(media!.duration * playProgress)}
</p>
</div>
);
@ -626,7 +629,7 @@ function renderVoice(
function useWaveformCanvas(
theme: ISettings['theme'],
voice?: ApiVoice,
media?: ApiVoice | ApiVideo,
playProgress = 0,
isOwn = false,
withAvatar = false,
@ -637,11 +640,11 @@ function useWaveformCanvas(
const canvasRef = useRef<HTMLCanvasElement>(null);
const { data: spikes, peak } = useMemo(() => {
if (!voice) {
if (!media) {
return undefined;
}
const { waveform, duration } = voice;
const { waveform, duration } = media;
if (!waveform) {
return {
data: new Array(Math.min(duration, MAX_EMPTY_WAVEFORM_POINTS)).fill(0),
@ -655,7 +658,7 @@ function useWaveformCanvas(
const decodedWaveform = decodeWaveform(new Uint8Array(waveform));
return interpolateArray(decodedWaveform, spikesCount);
}, [isMobile, voice, withAvatar]) || {};
}, [isMobile, media, withAvatar]) || {};
useLayoutEffect(() => {
const canvas = canvasRef.current;

View File

@ -115,7 +115,6 @@ import {
calculateDimensionsForMessageMedia,
getStickerDimensions,
REM,
ROUND_VIDEO_DIMENSIONS_PX,
} from '../../common/helpers/mediaDimensions';
import { getPeerColorClass } from '../../common/helpers/peerColor';
import renderText from '../../common/helpers/renderText';
@ -520,9 +519,11 @@ const Message: FC<OwnProps & StateProps> = ({
const messageReplyInfo = getMessageReplyInfo(message);
const storyReplyInfo = getStoryReplyInfo(message);
const withVoiceTranscription = Boolean(!isTranscriptionHidden && (isTranscriptionError || transcribedText));
const hasStoryReply = Boolean(storyReplyInfo);
const hasThread = Boolean(repliesThreadInfo) && messageListType === 'thread';
const isCustomShape = getMessageCustomShape(message);
const isCustomShape = !withVoiceTranscription && getMessageCustomShape(message);
const hasAnimatedEmoji = isCustomShape && (animatedEmoji || animatedCustomEmoji);
const hasReactions = reactionMessage?.reactions && !areReactionsEmpty(reactionMessage.reactions);
const asForwarded = (
@ -558,8 +559,6 @@ const Message: FC<OwnProps & StateProps> = ({
&& forwardInfo.fromMessageId
));
const noUserColors = isOwn && !isCustomShape;
const hasFactCheck = Boolean(factCheck?.text);
const hasForwardedCustomShape = asForwarded && isCustomShape;
@ -575,7 +574,8 @@ const Message: FC<OwnProps & StateProps> = ({
});
const messageSender = canShowSender ? sender : undefined;
const withVoiceTranscription = Boolean(!isTranscriptionHidden && (isTranscriptionError || transcribedText));
const noUserColors = isOwn && !isCustomShape;
const shouldPreferOriginSender = forwardInfo
&& (isChatWithSelf || isRepliesChat || isAnonymousForwards || !messageSender);
@ -760,7 +760,7 @@ const Message: FC<OwnProps & StateProps> = ({
const withQuickReactionButton = !isTouchScreen && !phoneCall && !isInSelectMode && defaultReaction
&& !isInDocumentGroupNotLast && !isStoryMention && !hasTtl;
const hasOutsideReactions = hasReactions
const hasOutsideReactions = !withVoiceTranscription && hasReactions
&& (isCustomShape || ((photo || video || storyData || (location?.mediaType === 'geo')) && !hasText));
const contentClassName = buildContentClassName(message, album, {
@ -904,20 +904,11 @@ const Message: FC<OwnProps & StateProps> = ({
if (!isAlbum && (photo || video || invoice?.extendedMedia)) {
let width: number | undefined;
if (photo) {
width = calculateMediaDimensions({
media: photo,
isOwn,
asForwarded,
noAvatars,
isMobile,
}).width;
} else if (video) {
if (isRoundVideo) {
width = ROUND_VIDEO_DIMENSIONS_PX;
} else {
if (photo || video) {
const media = (photo || video);
if (media && !isRoundVideo) {
width = calculateMediaDimensions({
media: video,
media,
isOwn,
asForwarded,
noAvatars,
@ -1193,16 +1184,22 @@ const Message: FC<OwnProps & StateProps> = ({
chatId={chatId}
/>
)}
{!isAlbum && isRoundVideo && (
{!isAlbum && isRoundVideo && !withVoiceTranscription && (
<RoundVideo
message={message}
observeIntersection={observeIntersectionForLoading}
canAutoLoad={canAutoLoadMedia}
isDownloading={isDownloading}
onReadMedia={shouldReadMedia ? handleReadMedia : undefined}
onHideTranscription={setTranscriptionHidden}
isTranscriptionError={isTranscriptionError}
isTranscribed={Boolean(transcribedText)}
canTranscribe={canTranscribeVoice && !hasTtl}
isTranscriptionHidden={isTranscriptionHidden}
isTranscribing={isTranscribing}
/>
)}
{(audio || voice) && (
{(audio || voice || withVoiceTranscription) && (
<Audio
theme={theme}
message={message}

View File

@ -102,7 +102,7 @@
right: 0.25rem;
left: auto;
height: 1.125rem;
padding: 0 0.3125rem 0 0.375rem;
padding: 0 0.3125rem 0 0.6875rem;
.MessageOutgoingStatus .icon {
background: transparent;

View File

@ -67,4 +67,31 @@
z-index: var(--z-badge);
}
}
.loading-svg {
position: absolute;
top: 0;
left: 0;
border-radius: 0.5rem;
width: 1.875rem;
height: 1.3125rem;
}
.loading-rect {
animation: 1s linear loader-rectangle infinite;
}
.transcribe-button {
position: absolute;
width: 1.875rem;
height: 1.3125rem;
bottom: 1.625rem;
right: 0.1875rem;
border-radius: 0.5rem;
background: var(--pattern-color);
&:hover {
background: var(--pattern-color) !important;
opacity: 0.8;
}
}
}

View File

@ -1,6 +1,7 @@
import type { FC } from '../../../lib/teact/teact';
import React, {
useEffect, useLayoutEffect, useRef, useSignal, useState,
useEffect, useLayoutEffect,
useRef, useSignal, useState,
} from '../../../lib/teact/teact';
import { getActions } from '../../../global';
@ -47,6 +48,12 @@ type OwnProps = {
observeIntersection?: ObserveFn;
onStop?: NoneToVoidFunction;
onReadMedia?: NoneToVoidFunction;
onHideTranscription?: (isHidden: boolean) => void;
isTranscriptionError?: boolean;
canTranscribe?: boolean;
isTranscribed?: boolean;
isTranscriptionHidden?: boolean;
isTranscribing?: boolean;
};
const PROGRESS_CENTER = ROUND_VIDEO_DIMENSIONS_PX / 2;
@ -65,6 +72,12 @@ const RoundVideo: FC<OwnProps> = ({
observeIntersection,
onStop,
onReadMedia,
isTranscriptionError,
isTranscribed,
canTranscribe,
onHideTranscription,
isTranscriptionHidden,
isTranscribing,
}) => {
// eslint-disable-next-line no-null/no-null
const ref = useRef<HTMLDivElement>(null);
@ -73,12 +86,12 @@ const RoundVideo: FC<OwnProps> = ({
// eslint-disable-next-line no-null/no-null
const circleRef = useRef<SVGCircleElement>(null);
const video = message.content.video!;
const { cancelMediaDownload, openOneTimeMediaModal } = getActions();
const { cancelMediaDownload, openOneTimeMediaModal, transcribeAudio } = getActions();
const isIntersecting = useIsIntersecting(ref, observeIntersection);
const video = message.content.video!;
const [isLoadAllowed, setIsLoadAllowed] = useState(canAutoLoad);
const shouldLoad = Boolean(isLoadAllowed && isIntersecting);
const { mediaData, loadProgress } = useMediaWithLoadProgress(
@ -181,7 +194,11 @@ const RoundVideo: FC<OwnProps> = ({
togglePlaying();
}, [isInOneTimeModal]);
const handleClick = useLastCallback(() => {
const handleClick = useLastCallback((event) => {
if (event.target.closest('.transcribe-button')) {
return;
}
if (!mediaData) {
setIsLoadAllowed((isAllowed) => !isAllowed);
@ -207,6 +224,10 @@ const RoundVideo: FC<OwnProps> = ({
setProgress(playerEl.currentTime / playerEl.duration);
});
const handleTranscribe = useLastCallback(() => {
transcribeAudio({ chatId: message.chatId, messageId: message.id });
});
function renderPlayWrapper() {
return (
<div className="play-wrapper">
@ -224,6 +245,14 @@ const RoundVideo: FC<OwnProps> = ({
);
}
const handleButtonClick = useLastCallback(() => {
if ((isTranscribed || isTranscriptionError) && onHideTranscription) {
onHideTranscription(!isTranscriptionHidden);
} else if (!isTranscribing) {
handleTranscribe();
}
});
return (
<div
ref={ref}
@ -298,6 +327,31 @@ const RoundVideo: FC<OwnProps> = ({
{(!isActivated || playerRef.current!.paused) && <Icon name="muted" />}
</div>
)}
{canTranscribe && (
<Button
onClick={handleButtonClick}
className="transcribe-button"
>
{isTranscribed || isTranscriptionError ? <Icon name="down" /> : <Icon name="transcribe" />}
{isTranscribing && (
<svg viewBox="0 0 32 24" className="loading-svg">
<rect
className="loading-rect"
fill="transparent"
width="32"
height="24"
stroke-width="3"
stroke-linejoin="round"
rx="6"
ry="6"
stroke="white"
stroke-dashoffset="1"
stroke-dasharray="32,68"
/>
</svg>
)}
</Button>
)}
</div>
);
};

View File

@ -930,6 +930,12 @@
.Message:not(.own) & {
--hover-color: var(--color-reply-hover);
--active-color: var(--color-reply-active);
&.has-subheader {
.MessageMeta {
right: 5.6875rem;
}
}
}
.Message.own & {
@ -949,9 +955,20 @@
}
}
}
&.has-subheader {
.RoundVideo .transcribe-button {
right: 0.8125rem;
}
}
}
}
.message-content.voice,
.message-content.video {
width: min-content;
}
.forwarded-message {
.message-content.contact &,
.message-content.poll &,

View File

@ -98,6 +98,7 @@ export function buildContentClassName(
if (isCustomShape) {
classNames.push('custom-shape');
if (isRoundVideo) {
classNames.push('round');
}
@ -106,8 +107,10 @@ export function buildContentClassName(
classNames.push('has-comment-counter');
}
}
if (isMedia) {
if (isMedia && !withVoiceTranscription) {
classNames.push('media');
} else if (video) {
classNames.push('video');
} else if (audio) {
classNames.push('audio');
} else if (voice) {
@ -178,7 +181,8 @@ export function buildContentClassName(
classNames.push('has-background');
}
if (hasSubheader || asForwarded || isViaBot || !isMediaWithNoText || forceSenderName || hasFactCheck) {
if (hasSubheader || asForwarded || isViaBot || !isMediaWithNoText
|| forceSenderName || hasFactCheck || withVoiceTranscription) {
classNames.push('has-solid-background');
}

View File

@ -0,0 +1,6 @@
import { MAX_EMPTY_WAVEFORM_POINTS } from '../components/common/helpers/waveform';
export function generateWaveform(duration: number) {
const arr = Math.min(Math.round(duration), MAX_EMPTY_WAVEFORM_POINTS);
return Array.from({ length: arr }, () => Math.floor(Math.random() * 256));
}