webui: support video files as input (#22830)
This commit is contained in:
Vendored
+2
@@ -39,6 +39,7 @@ import type {
|
||||
DatabaseMessage,
|
||||
DatabaseMessageExtra,
|
||||
DatabaseMessageExtraAudioFile,
|
||||
DatabaseMessageExtraVideoFile,
|
||||
DatabaseMessageExtraImageFile,
|
||||
DatabaseMessageExtraTextFile,
|
||||
DatabaseMessageExtraPdfFile,
|
||||
@@ -102,6 +103,7 @@ declare global {
|
||||
DatabaseMessage,
|
||||
DatabaseMessageExtra,
|
||||
DatabaseMessageExtraAudioFile,
|
||||
DatabaseMessageExtraVideoFile,
|
||||
DatabaseMessageExtraImageFile,
|
||||
DatabaseMessageExtraTextFile,
|
||||
DatabaseMessageExtraPdfFile,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
<script lang="ts">
|
||||
import { Eye, Mic } from '@lucide/svelte';
|
||||
import { Eye, Mic, Video } from '@lucide/svelte';
|
||||
import { ModelModality } from '$lib/enums';
|
||||
|
||||
interface Props {
|
||||
@@ -11,7 +11,7 @@
|
||||
</script>
|
||||
|
||||
{#each modalities as modality (modality)}
|
||||
{#if modality === ModelModality.VISION || modality === ModelModality.AUDIO}
|
||||
{#if modality === ModelModality.VISION || modality === ModelModality.AUDIO || modality === ModelModality.VIDEO}
|
||||
<span
|
||||
class={[
|
||||
'inline-flex items-center gap-1 rounded-md bg-muted px-2 py-1 text-xs font-medium',
|
||||
@@ -21,7 +21,11 @@
|
||||
{#if modality === ModelModality.VISION}
|
||||
<Eye class="h-3 w-3" />
|
||||
|
||||
Vision
|
||||
Vision (Image)
|
||||
{:else if modality === ModelModality.VIDEO}
|
||||
<Video class="h-3 w-3" />
|
||||
|
||||
Vision (Video)
|
||||
{:else}
|
||||
<Mic class="h-3 w-3" />
|
||||
|
||||
|
||||
+11
-1
@@ -1,10 +1,12 @@
|
||||
<script lang="ts">
|
||||
import { X } from '@lucide/svelte';
|
||||
import { X, Music, Video } from '@lucide/svelte';
|
||||
import {
|
||||
formatFileSize,
|
||||
getFileTypeLabel,
|
||||
getPreviewText,
|
||||
isPdfFile,
|
||||
isAudioFile,
|
||||
isVideoFile,
|
||||
isTextFile
|
||||
} from '$lib/utils';
|
||||
import { ActionIcon } from '$lib/components/app';
|
||||
@@ -38,6 +40,8 @@
|
||||
}: Props = $props();
|
||||
|
||||
let isPdf = $derived(isPdfFile(attachment, uploadedFile));
|
||||
let isAudio = $derived(isAudioFile(attachment, uploadedFile));
|
||||
let isVideo = $derived(isVideoFile(attachment, uploadedFile));
|
||||
let isPdfWithContent = $derived(isPdf && !!textContent);
|
||||
|
||||
let isText = $derived(isTextFile(attachment, uploadedFile));
|
||||
@@ -102,7 +106,13 @@
|
||||
<div
|
||||
class="flex h-8 w-8 items-center justify-center rounded bg-primary/10 text-xs font-medium text-primary"
|
||||
>
|
||||
{#if isAudio}
|
||||
<Music class="h-4 w-4 text-white/70" />
|
||||
{:else if isVideo}
|
||||
<Video class="h-4 w-4 text-white/70" />
|
||||
{:else}
|
||||
{fileTypeLabel}
|
||||
{/if}
|
||||
</div>
|
||||
{/snippet}
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
getAttachmentDisplayItems,
|
||||
getLanguageFromFilename,
|
||||
isAudioFile,
|
||||
isVideoFile,
|
||||
isImageFile,
|
||||
isMcpPrompt,
|
||||
isMcpResource,
|
||||
@@ -29,6 +30,7 @@
|
||||
textContent?: string;
|
||||
isImage: boolean;
|
||||
isAudio: boolean;
|
||||
isVideo: boolean;
|
||||
}
|
||||
|
||||
interface Props {
|
||||
@@ -54,7 +56,8 @@
|
||||
(item): PreviewItem => ({
|
||||
...item,
|
||||
isImage: isImageFile(item.attachment, item.uploadedFile),
|
||||
isAudio: isAudioFile(item.attachment, item.uploadedFile)
|
||||
isAudio: isAudioFile(item.attachment, item.uploadedFile),
|
||||
isVideo: isVideoFile(item.attachment, item.uploadedFile)
|
||||
})
|
||||
)
|
||||
);
|
||||
@@ -102,6 +105,9 @@
|
||||
let isAudio = $derived(
|
||||
currentItem ? isAudioFile(currentItem.attachment, currentItem.uploadedFile) : false
|
||||
);
|
||||
let isVideo = $derived(
|
||||
currentItem ? isVideoFile(currentItem.attachment, currentItem.uploadedFile) : false
|
||||
);
|
||||
let isImage = $derived(
|
||||
currentItem ? isImageFile(currentItem.attachment, currentItem.uploadedFile) : false
|
||||
);
|
||||
@@ -148,6 +154,20 @@
|
||||
: null
|
||||
);
|
||||
|
||||
let videoSrc = $derived(
|
||||
isVideo && currentItem
|
||||
? (currentItem.uploadedFile?.preview ??
|
||||
(currentItem.attachment &&
|
||||
'mimeType' in currentItem.attachment &&
|
||||
'base64Data' in currentItem.attachment
|
||||
? createBase64DataUrl(
|
||||
currentItem.attachment.mimeType,
|
||||
currentItem.attachment.base64Data
|
||||
)
|
||||
: null))
|
||||
: null
|
||||
);
|
||||
|
||||
export function prev() {
|
||||
currentIndex = currentIndex > 0 ? currentIndex - 1 : allItems.length - 1;
|
||||
}
|
||||
@@ -173,11 +193,13 @@
|
||||
{currentItem}
|
||||
{isImage}
|
||||
{isAudio}
|
||||
{isVideo}
|
||||
{isPdf}
|
||||
{isText}
|
||||
{displayPreview}
|
||||
{displayTextContent}
|
||||
{audioSrc}
|
||||
{videoSrc}
|
||||
{language}
|
||||
{hasVisionModality}
|
||||
{activeModelId}
|
||||
|
||||
+12
-3
@@ -1,9 +1,10 @@
|
||||
<script lang="ts">
|
||||
import type { ChatAttachmentDisplayItem } from '$lib/types';
|
||||
import { Image, Music, FileText, FileIcon } from '@lucide/svelte';
|
||||
import { Image, Music, Video, FileText, FileIcon } from '@lucide/svelte';
|
||||
import ChatAttachmentsPreviewCurrentItemPdf from './ChatAttachmentsPreviewCurrentItemPdf.svelte';
|
||||
import ChatAttachmentsPreviewCurrentItemImage from './ChatAttachmentsPreviewCurrentItemImage.svelte';
|
||||
import ChatAttachmentsPreviewCurrentItemAudio from './ChatAttachmentsPreviewCurrentItemAudio.svelte';
|
||||
import ChatAttachmentsPreviewCurrentItemVideo from './ChatAttachmentsPreviewCurrentItemVideo.svelte';
|
||||
import ChatAttachmentsPreviewCurrentItemText from './ChatAttachmentsPreviewCurrentItemText.svelte';
|
||||
import ChatAttachmentsPreviewCurrentItemUnavailable from './ChatAttachmentsPreviewCurrentItemUnavailable.svelte';
|
||||
|
||||
@@ -11,11 +12,13 @@
|
||||
currentItem: ChatAttachmentDisplayItem | null;
|
||||
isImage: boolean;
|
||||
isAudio: boolean;
|
||||
isVideo: boolean;
|
||||
isPdf: boolean;
|
||||
isText: boolean;
|
||||
displayPreview: string | undefined;
|
||||
displayTextContent: string | undefined;
|
||||
audioSrc: string | null;
|
||||
videoSrc: string | null;
|
||||
language: string;
|
||||
hasVisionModality: boolean;
|
||||
activeModelId?: string;
|
||||
@@ -25,21 +28,25 @@
|
||||
currentItem,
|
||||
isImage,
|
||||
isAudio,
|
||||
isVideo,
|
||||
isPdf,
|
||||
isText,
|
||||
displayPreview,
|
||||
displayTextContent,
|
||||
audioSrc,
|
||||
videoSrc,
|
||||
language,
|
||||
hasVisionModality,
|
||||
activeModelId
|
||||
}: Props = $props();
|
||||
|
||||
let IconComponent = $derived(
|
||||
isImage ? Image : isText || isPdf ? FileText : isAudio ? Music : FileIcon
|
||||
isImage ? Image : isText || isPdf ? FileText : isAudio ? Music : isVideo ? Video : FileIcon
|
||||
);
|
||||
|
||||
let isUnavailable = $derived(!isPdf && !isImage && !(isText && displayTextContent) && !isAudio);
|
||||
let isUnavailable = $derived(
|
||||
!isPdf && !isImage && !(isText && displayTextContent) && !isAudio && !isVideo
|
||||
);
|
||||
</script>
|
||||
|
||||
{#if currentItem}
|
||||
@@ -58,6 +65,8 @@
|
||||
<ChatAttachmentsPreviewCurrentItemText {displayTextContent} {language} />
|
||||
{:else if isAudio}
|
||||
<ChatAttachmentsPreviewCurrentItemAudio {currentItem} {audioSrc} />
|
||||
{:else if isVideo}
|
||||
<ChatAttachmentsPreviewCurrentItemVideo {currentItem} {videoSrc} />
|
||||
{:else if isUnavailable}
|
||||
<ChatAttachmentsPreviewCurrentItemUnavailable {IconComponent} />
|
||||
{/if}
|
||||
|
||||
+26
@@ -0,0 +1,26 @@
|
||||
<script lang="ts">
|
||||
import { Video } from '@lucide/svelte';
|
||||
|
||||
interface Props {
|
||||
currentItem: { name?: string } | null;
|
||||
videoSrc: string | null;
|
||||
}
|
||||
|
||||
let { currentItem, videoSrc }: Props = $props();
|
||||
</script>
|
||||
|
||||
<div class="flex flex-1 items-center justify-center p-8">
|
||||
<div class="w-full max-w-md text-center">
|
||||
<Video class="mx-auto mb-4 h-16 w-16 text-white/50" />
|
||||
|
||||
{#if videoSrc}
|
||||
<video controls class="mb-4 w-full" src={videoSrc}>
|
||||
Your browser does not support the video element.
|
||||
</video>
|
||||
{:else}
|
||||
<p class="mb-4 text-white/70">Video preview not available</p>
|
||||
{/if}
|
||||
|
||||
<p class="text-sm text-white/50">{currentItem?.name || 'Video'}</p>
|
||||
</div>
|
||||
</div>
|
||||
+4
-1
@@ -1,5 +1,5 @@
|
||||
<script lang="ts">
|
||||
import { Music, FileText } from '@lucide/svelte';
|
||||
import { Music, Video, FileText } from '@lucide/svelte';
|
||||
import { HorizontalScrollCarousel } from '$lib/components/app/misc';
|
||||
|
||||
interface PreviewItem {
|
||||
@@ -7,6 +7,7 @@
|
||||
name: string;
|
||||
isImage: boolean;
|
||||
isAudio: boolean;
|
||||
isVideo: boolean;
|
||||
preview?: string;
|
||||
}
|
||||
|
||||
@@ -49,6 +50,8 @@
|
||||
>
|
||||
{#if item.isAudio}
|
||||
<Music class="h-4 w-4 text-white/70" />
|
||||
{:else if item.isVideo}
|
||||
<Video class="h-4 w-4 text-white/70" />
|
||||
{:else}
|
||||
<FileText class="h-4 w-4 text-white/70" />
|
||||
{/if}
|
||||
|
||||
+3
@@ -23,6 +23,7 @@
|
||||
class?: string;
|
||||
disabled?: boolean;
|
||||
hasAudioModality?: boolean;
|
||||
hasVideoModality?: boolean;
|
||||
hasVisionModality?: boolean;
|
||||
hasMcpPromptsSupport?: boolean;
|
||||
hasMcpResourcesSupport?: boolean;
|
||||
@@ -37,6 +38,7 @@
|
||||
class: className = '',
|
||||
disabled = false,
|
||||
hasAudioModality = false,
|
||||
hasVideoModality = false,
|
||||
hasVisionModality = false,
|
||||
hasMcpPromptsSupport = false,
|
||||
hasMcpResourcesSupport = false,
|
||||
@@ -58,6 +60,7 @@
|
||||
() => ({
|
||||
hasVisionModality,
|
||||
hasAudioModality,
|
||||
hasVideoModality,
|
||||
hasMcpPromptsSupport,
|
||||
hasMcpResourcesSupport
|
||||
}),
|
||||
|
||||
+3
@@ -19,6 +19,7 @@
|
||||
class?: string;
|
||||
disabled?: boolean;
|
||||
hasAudioModality?: boolean;
|
||||
hasVideoModality?: boolean;
|
||||
hasVisionModality?: boolean;
|
||||
hasMcpPromptsSupport?: boolean;
|
||||
hasMcpResourcesSupport?: boolean;
|
||||
@@ -34,6 +35,7 @@
|
||||
disabled = false,
|
||||
hasAudioModality = false,
|
||||
hasVisionModality = false,
|
||||
hasVideoModality = false,
|
||||
hasMcpPromptsSupport = false,
|
||||
hasMcpResourcesSupport = false,
|
||||
onFileUpload,
|
||||
@@ -49,6 +51,7 @@
|
||||
() => ({
|
||||
hasVisionModality,
|
||||
hasAudioModality,
|
||||
hasVideoModality,
|
||||
hasMcpPromptsSupport,
|
||||
hasMcpResourcesSupport
|
||||
}),
|
||||
|
||||
+4
@@ -7,6 +7,7 @@
|
||||
interface Props {
|
||||
disabled?: boolean;
|
||||
hasAudioModality?: boolean;
|
||||
hasVideoModality?: boolean;
|
||||
hasMcpPromptsSupport?: boolean;
|
||||
hasMcpResourcesSupport?: boolean;
|
||||
hasVisionModality?: boolean;
|
||||
@@ -20,6 +21,7 @@
|
||||
let {
|
||||
disabled = false,
|
||||
hasAudioModality = false,
|
||||
hasVideoModality = false,
|
||||
hasMcpPromptsSupport = false,
|
||||
hasMcpResourcesSupport = false,
|
||||
hasVisionModality = false,
|
||||
@@ -37,6 +39,7 @@
|
||||
<ChatFormActionAddSheet
|
||||
{disabled}
|
||||
{hasAudioModality}
|
||||
{hasVideoModality}
|
||||
{hasVisionModality}
|
||||
{hasMcpPromptsSupport}
|
||||
{hasMcpResourcesSupport}
|
||||
@@ -52,6 +55,7 @@
|
||||
<ChatFormActionAddDropdown
|
||||
{disabled}
|
||||
{hasAudioModality}
|
||||
{hasVideoModality}
|
||||
{hasVisionModality}
|
||||
{hasMcpPromptsSupport}
|
||||
{hasMcpResourcesSupport}
|
||||
|
||||
+6
@@ -11,6 +11,7 @@
|
||||
disabled?: boolean;
|
||||
forceForegroundText?: boolean;
|
||||
hasAudioModality?: boolean;
|
||||
hasVideoModality?: boolean;
|
||||
hasVisionModality?: boolean;
|
||||
hasModelSelected?: boolean;
|
||||
isSelectedModelInCache?: boolean;
|
||||
@@ -23,6 +24,7 @@
|
||||
disabled = false,
|
||||
forceForegroundText = false,
|
||||
hasAudioModality = $bindable(false),
|
||||
hasVideoModality = $bindable(false),
|
||||
hasVisionModality = $bindable(false),
|
||||
hasModelSelected = $bindable(false),
|
||||
isSelectedModelInCache = $bindable(true),
|
||||
@@ -95,6 +97,10 @@
|
||||
hasAudioModality = activeModelId ? modelsStore.modelSupportsAudio(activeModelId) : false;
|
||||
});
|
||||
|
||||
$effect(() => {
|
||||
hasVideoModality = activeModelId ? modelsStore.modelSupportsVideo(activeModelId) : false;
|
||||
});
|
||||
|
||||
$effect(() => {
|
||||
void modelPropsVersion;
|
||||
|
||||
|
||||
@@ -66,6 +66,7 @@
|
||||
});
|
||||
|
||||
let hasAudioModality = $state(false);
|
||||
let hasVideoModality = $state(false);
|
||||
let hasVisionModality = $state(false);
|
||||
let hasModelSelected = $state(false);
|
||||
let isSelectedModelInCache = $state(true);
|
||||
@@ -94,6 +95,7 @@
|
||||
<ChatFormActionsAdd
|
||||
{disabled}
|
||||
{hasAudioModality}
|
||||
{hasVideoModality}
|
||||
{hasVisionModality}
|
||||
{hasMcpPromptsSupport}
|
||||
{hasMcpResourcesSupport}
|
||||
@@ -111,6 +113,7 @@
|
||||
{disabled}
|
||||
bind:this={selectorModelRef}
|
||||
bind:hasAudioModality
|
||||
bind:hasVideoModality
|
||||
bind:hasVisionModality
|
||||
bind:hasModelSelected
|
||||
bind:isSelectedModelInCache
|
||||
|
||||
@@ -144,6 +144,16 @@
|
||||
return false;
|
||||
});
|
||||
|
||||
let hasVideoModality = $derived.by(() => {
|
||||
if (activeModelId) {
|
||||
void modelPropsVersion;
|
||||
|
||||
return modelsStore.modelSupportsVideo(activeModelId);
|
||||
}
|
||||
|
||||
return false;
|
||||
});
|
||||
|
||||
let hasVisionModality = $derived.by(() => {
|
||||
if (activeModelId) {
|
||||
void modelPropsVersion;
|
||||
@@ -284,7 +294,11 @@
|
||||
}
|
||||
|
||||
// Use model-specific capabilities for file validation
|
||||
const capabilities = { hasVision: hasVisionModality, hasAudio: hasAudioModality };
|
||||
const capabilities = {
|
||||
hasVision: hasVisionModality,
|
||||
hasAudio: hasAudioModality,
|
||||
hasVideo: hasVideoModality
|
||||
};
|
||||
const { supportedFiles, unsupportedFiles, modalityReasons } = filterFilesByModalities(
|
||||
generallySupported,
|
||||
capabilities
|
||||
@@ -297,6 +311,7 @@
|
||||
|
||||
if (hasVisionModality) supportedTypes.push('images');
|
||||
if (hasAudioModality) supportedTypes.push('audio files');
|
||||
if (hasVideoModality) supportedTypes.push('video files');
|
||||
|
||||
fileErrorData = {
|
||||
generallyUnsupported,
|
||||
|
||||
@@ -52,6 +52,15 @@ export const ATTACHMENT_FILE_ITEMS: AttachmentMenuItem[] = [
|
||||
disabledTooltip: 'Audio files processing requires an audio model',
|
||||
action: AttachmentAction.FILE_UPLOAD
|
||||
},
|
||||
{
|
||||
id: AttachmentMenuItemId.VIDEO,
|
||||
label: 'Video Files',
|
||||
icon: FILE_TYPE_ICONS.video,
|
||||
class: 'video-button',
|
||||
enabledWhen: AttachmentItemEnabledWhen.HAS_VIDEO_MODALITY,
|
||||
disabledTooltip: 'Video files processing requires a video model',
|
||||
action: AttachmentAction.FILE_UPLOAD
|
||||
},
|
||||
{
|
||||
id: AttachmentMenuItemId.TEXT,
|
||||
label: 'Text Files',
|
||||
|
||||
@@ -8,13 +8,15 @@ import {
|
||||
FileText as FileTextIcon,
|
||||
Image as ImageIcon,
|
||||
Eye as VisionIcon,
|
||||
Mic as AudioIcon
|
||||
Mic as AudioIcon,
|
||||
Video as VideoIcon
|
||||
} from '@lucide/svelte';
|
||||
import { FileTypeCategory, ModelModality } from '$lib/enums';
|
||||
|
||||
export const FILE_TYPE_ICONS = {
|
||||
[FileTypeCategory.IMAGE]: ImageIcon,
|
||||
[FileTypeCategory.AUDIO]: AudioIcon,
|
||||
[FileTypeCategory.VIDEO]: VideoIcon,
|
||||
[FileTypeCategory.TEXT]: FileTextIcon,
|
||||
[FileTypeCategory.PDF]: FileIcon
|
||||
} as const;
|
||||
@@ -23,10 +25,12 @@ export const DEFAULT_FILE_ICON = FileIcon;
|
||||
|
||||
export const MODALITY_ICONS = {
|
||||
[ModelModality.VISION]: VisionIcon,
|
||||
[ModelModality.AUDIO]: AudioIcon
|
||||
[ModelModality.AUDIO]: AudioIcon,
|
||||
[ModelModality.VIDEO]: VideoIcon
|
||||
} as const;
|
||||
|
||||
export const MODALITY_LABELS = {
|
||||
[ModelModality.VISION]: 'Vision',
|
||||
[ModelModality.AUDIO]: 'Audio'
|
||||
[ModelModality.AUDIO]: 'Audio',
|
||||
[ModelModality.VIDEO]: 'Video'
|
||||
} as const;
|
||||
|
||||
@@ -13,10 +13,12 @@ import {
|
||||
FileTypePdf,
|
||||
FileTypeText,
|
||||
MimeTypeAudio,
|
||||
MimeTypeVideo,
|
||||
MimeTypeImage,
|
||||
MimeTypeApplication,
|
||||
MimeTypeText
|
||||
} from '$lib/enums';
|
||||
import { FileExtensionVideo, FileTypeVideo } from '$lib/enums/files';
|
||||
|
||||
// File type configuration using enums
|
||||
export const AUDIO_FILE_TYPES = {
|
||||
@@ -30,6 +32,17 @@ export const AUDIO_FILE_TYPES = {
|
||||
}
|
||||
} as const;
|
||||
|
||||
export const VIDEO_FILE_TYPES = {
|
||||
[FileTypeVideo.MP4]: {
|
||||
extensions: [FileExtensionVideo.MP4],
|
||||
mimeTypes: [MimeTypeVideo.MP4]
|
||||
},
|
||||
[FileTypeVideo.OGG]: {
|
||||
extensions: [FileExtensionVideo.OGG],
|
||||
mimeTypes: [MimeTypeVideo.OGG]
|
||||
}
|
||||
} as const;
|
||||
|
||||
export const IMAGE_FILE_TYPES = {
|
||||
[FileTypeImage.JPEG]: {
|
||||
extensions: [FileExtensionImage.JPG, FileExtensionImage.JPEG],
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
export enum AttachmentType {
|
||||
AUDIO = 'AUDIO',
|
||||
IMAGE = 'IMAGE',
|
||||
VIDEO = 'VIDEO',
|
||||
MCP_PROMPT = 'MCP_PROMPT',
|
||||
MCP_RESOURCE = 'MCP_RESOURCE',
|
||||
PDF = 'PDF',
|
||||
@@ -18,6 +19,7 @@ export enum AttachmentType {
|
||||
export enum AttachmentMenuItemId {
|
||||
IMAGES = 'images',
|
||||
AUDIO = 'audio',
|
||||
VIDEO = 'video',
|
||||
TEXT = 'text',
|
||||
PDF = 'pdf',
|
||||
SYSTEM_MESSAGE = 'system-message',
|
||||
@@ -31,7 +33,8 @@ export enum AttachmentMenuItemId {
|
||||
export enum AttachmentItemEnabledWhen {
|
||||
ALWAYS = 'always',
|
||||
HAS_VISION_MODALITY = 'hasVisionModality',
|
||||
HAS_AUDIO_MODALITY = 'hasAudioModality'
|
||||
HAS_AUDIO_MODALITY = 'hasAudioModality',
|
||||
HAS_VIDEO_MODALITY = 'hasVideoModality'
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -39,7 +39,8 @@ export enum MessageType {
|
||||
export enum ContentPartType {
|
||||
TEXT = 'text',
|
||||
IMAGE_URL = 'image_url',
|
||||
INPUT_AUDIO = 'input_audio'
|
||||
INPUT_AUDIO = 'input_audio',
|
||||
INPUT_VIDEO = 'input_video'
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
export enum FileTypeCategory {
|
||||
IMAGE = 'image',
|
||||
AUDIO = 'audio',
|
||||
VIDEO = 'video',
|
||||
PDF = 'pdf',
|
||||
TEXT = 'text'
|
||||
}
|
||||
@@ -33,6 +34,11 @@ export enum FileTypeAudio {
|
||||
WEBM = 'webm'
|
||||
}
|
||||
|
||||
export enum FileTypeVideo {
|
||||
MP4 = 'mp4',
|
||||
OGG = 'ogg'
|
||||
}
|
||||
|
||||
export enum FileTypePdf {
|
||||
PDF = 'pdf'
|
||||
}
|
||||
@@ -92,6 +98,11 @@ export enum FileExtensionAudio {
|
||||
WAV = '.wav'
|
||||
}
|
||||
|
||||
export enum FileExtensionVideo {
|
||||
MP4 = '.mp4',
|
||||
OGG = '.ogg'
|
||||
}
|
||||
|
||||
export enum FileExtensionPdf {
|
||||
PDF = '.pdf'
|
||||
}
|
||||
@@ -176,6 +187,11 @@ export enum MimeTypeAudio {
|
||||
WEBM_OPUS = 'audio/webm;codecs=opus'
|
||||
}
|
||||
|
||||
export enum MimeTypeVideo {
|
||||
MP4 = 'video/mp4',
|
||||
OGG = 'video/ogg'
|
||||
}
|
||||
|
||||
export enum MimeTypeImage {
|
||||
JPEG = 'image/jpeg',
|
||||
JPG = 'image/jpg',
|
||||
|
||||
@@ -34,6 +34,7 @@ export {
|
||||
UriPattern,
|
||||
MimeTypeApplication,
|
||||
MimeTypeAudio,
|
||||
MimeTypeVideo,
|
||||
MimeTypeImage,
|
||||
MimeTypeText,
|
||||
SpecialFileType
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
export enum ModelModality {
|
||||
TEXT = 'TEXT',
|
||||
AUDIO = 'AUDIO',
|
||||
VISION = 'VISION'
|
||||
VISION = 'VISION',
|
||||
VIDEO = 'VIDEO'
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import { AttachmentAction } from '$lib/enums';
|
||||
export interface AttachmentModalityFlags {
|
||||
hasVisionModality: boolean;
|
||||
hasAudioModality: boolean;
|
||||
hasVideoModality: boolean;
|
||||
hasMcpPromptsSupport: boolean;
|
||||
hasMcpResourcesSupport: boolean;
|
||||
}
|
||||
|
||||
@@ -884,6 +884,25 @@ export class ChatService {
|
||||
});
|
||||
}
|
||||
|
||||
const videoFiles = message.extra.filter(
|
||||
(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraVideoFile =>
|
||||
extra.type === AttachmentType.VIDEO
|
||||
);
|
||||
|
||||
for (const video of videoFiles) {
|
||||
contentParts.push({
|
||||
type: ContentPartType.INPUT_VIDEO,
|
||||
input_video: {
|
||||
data: video.base64Data,
|
||||
format: video.mimeType.includes('mp4')
|
||||
? 'mp4'
|
||||
: video.mimeType.includes('ogg')
|
||||
? 'ogg'
|
||||
: 'auto'
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
const pdfFiles = message.extra.filter(
|
||||
(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraPdfFile =>
|
||||
extra.type === AttachmentType.PDF
|
||||
|
||||
@@ -148,7 +148,8 @@ class ModelsStore {
|
||||
if (props?.modalities) {
|
||||
return {
|
||||
vision: props.modalities.vision ?? false,
|
||||
audio: props.modalities.audio ?? false
|
||||
audio: props.modalities.audio ?? false,
|
||||
video: props.modalities.video ?? false
|
||||
};
|
||||
}
|
||||
|
||||
@@ -169,6 +170,13 @@ class ModelsStore {
|
||||
return this.getModelModalities(modelId)?.audio ?? false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a model supports video modality
|
||||
*/
|
||||
modelSupportsVideo(modelId: string): boolean {
|
||||
return this.getModelModalities(modelId)?.video ?? false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get model modalities as an array of ModelModality enum values
|
||||
*/
|
||||
@@ -180,6 +188,7 @@ class ModelsStore {
|
||||
|
||||
if (modalities.vision) result.push(ModelModality.VISION);
|
||||
if (modalities.audio) result.push(ModelModality.AUDIO);
|
||||
if (modalities.video) result.push(ModelModality.VIDEO);
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -316,7 +325,8 @@ class ModelsStore {
|
||||
if (serverStore.isModelMode && this.models.length > 0 && serverProps?.modalities) {
|
||||
const modalities: ModelModalities = {
|
||||
vision: serverProps.modalities.vision ?? false,
|
||||
audio: serverProps.modalities.audio ?? false
|
||||
audio: serverProps.modalities.audio ?? false,
|
||||
video: serverProps.modalities.video ?? false
|
||||
};
|
||||
this.modelPropsCache.set(this.models[0].model, serverProps);
|
||||
this.models = this.models.map((model, index) =>
|
||||
@@ -410,7 +420,8 @@ class ModelsStore {
|
||||
|
||||
const modalities: ModelModalities = {
|
||||
vision: props.modalities.vision ?? false,
|
||||
audio: props.modalities.audio ?? false
|
||||
audio: props.modalities.audio ?? false,
|
||||
video: props.modalities.video ?? false
|
||||
};
|
||||
|
||||
return { ...model, modalities };
|
||||
@@ -529,7 +540,8 @@ class ModelsStore {
|
||||
|
||||
const modalities: ModelModalities = {
|
||||
vision: props.modalities.vision ?? false,
|
||||
audio: props.modalities.audio ?? false
|
||||
audio: props.modalities.audio ?? false,
|
||||
video: props.modalities.video ?? false
|
||||
};
|
||||
|
||||
this.models = this.models.map((model) =>
|
||||
|
||||
Vendored
+5
@@ -22,6 +22,10 @@ export interface ApiChatMessageContentPart {
|
||||
data: string;
|
||||
format: 'wav' | 'mp3';
|
||||
};
|
||||
input_video?: {
|
||||
data: string;
|
||||
format: 'mp4' | 'ogg' | 'auto';
|
||||
};
|
||||
}
|
||||
|
||||
export interface ApiContextSizeError {
|
||||
@@ -190,6 +194,7 @@ export interface ApiLlamaCppServerProps {
|
||||
modalities: {
|
||||
vision: boolean;
|
||||
audio: boolean;
|
||||
video: boolean;
|
||||
};
|
||||
chat_template: string;
|
||||
bos_token: string;
|
||||
|
||||
Vendored
+6
-1
@@ -64,4 +64,9 @@ export interface ParsedClipboardContent {
|
||||
mcpPromptAttachments: ClipboardMcpPromptAttachment[];
|
||||
}
|
||||
|
||||
export type MimeTypeUnion = MimeTypeAudio | MimeTypeImage | MimeTypeApplication | MimeTypeText;
|
||||
export type MimeTypeUnion =
|
||||
| MimeTypeAudio
|
||||
| MimeTypeVideo
|
||||
| MimeTypeImage
|
||||
| MimeTypeApplication
|
||||
| MimeTypeText;
|
||||
|
||||
Vendored
+9
@@ -23,6 +23,14 @@ export interface DatabaseMessageExtraAudioFile {
|
||||
mimeType: string;
|
||||
}
|
||||
|
||||
export interface DatabaseMessageExtraVideoFile {
|
||||
type: AttachmentType.VIDEO;
|
||||
name: string;
|
||||
size?: number;
|
||||
base64Data: string;
|
||||
mimeType: string;
|
||||
}
|
||||
|
||||
export interface DatabaseMessageExtraImageFile {
|
||||
type: AttachmentType.IMAGE;
|
||||
name: string;
|
||||
@@ -82,6 +90,7 @@ export type DatabaseMessageExtra =
|
||||
| DatabaseMessageExtraImageFile
|
||||
| DatabaseMessageExtraTextFile
|
||||
| DatabaseMessageExtraAudioFile
|
||||
| DatabaseMessageExtraVideoFile
|
||||
| DatabaseMessageExtraPdfFile
|
||||
| DatabaseMessageExtraMcpPrompt
|
||||
| DatabaseMessageExtraMcpResource
|
||||
|
||||
@@ -55,6 +55,7 @@ export type {
|
||||
McpServerOverride,
|
||||
DatabaseConversation,
|
||||
DatabaseMessageExtraAudioFile,
|
||||
DatabaseMessageExtraVideoFile,
|
||||
DatabaseMessageExtraImageFile,
|
||||
DatabaseMessageExtraLegacyContext,
|
||||
DatabaseMessageExtraMcpPrompt,
|
||||
|
||||
Vendored
+2
@@ -3,6 +3,7 @@ import type { ApiModelDataEntry, ApiModelDetails } from '$lib/types/api';
|
||||
export interface ModelModalities {
|
||||
vision: boolean;
|
||||
audio: boolean;
|
||||
video: boolean;
|
||||
}
|
||||
|
||||
export interface ModelOption {
|
||||
@@ -35,4 +36,5 @@ export interface ParsedModelId {
|
||||
export interface ModalityCapabilities {
|
||||
hasVision: boolean;
|
||||
hasAudio: boolean;
|
||||
hasVideo: boolean;
|
||||
}
|
||||
|
||||
@@ -103,3 +103,24 @@ export function isAudioFile(
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if an attachment or uploaded file is a video file
|
||||
* @param uploadedFile - Optional uploaded file
|
||||
* @param attachment - Optional database attachment
|
||||
* @returns true if the file is a video file
|
||||
*/
|
||||
export function isVideoFile(
|
||||
attachment?: DatabaseMessageExtra,
|
||||
uploadedFile?: ChatUploadedFile
|
||||
): boolean {
|
||||
if (uploadedFile) {
|
||||
return getUploadedFileCategory(uploadedFile) === FileTypeCategory.VIDEO;
|
||||
}
|
||||
|
||||
if (attachment) {
|
||||
return attachment.type === AttachmentType.VIDEO;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -89,6 +89,21 @@ export async function parseFilesToMessageExtras(
|
||||
} catch (error) {
|
||||
console.error(`Failed to process audio file ${file.name}:`, error);
|
||||
}
|
||||
} else if (getFileTypeCategory(file.type) === FileTypeCategory.VIDEO) {
|
||||
// Process video files (MP4, etc)
|
||||
try {
|
||||
const base64Data = await readFileAsBase64(file.file);
|
||||
|
||||
extras.push({
|
||||
type: AttachmentType.VIDEO,
|
||||
name: file.name,
|
||||
size: file.size,
|
||||
base64Data: base64Data,
|
||||
mimeType: file.type
|
||||
});
|
||||
} catch (error) {
|
||||
console.error(`Failed to process video file ${file.name}:`, error);
|
||||
}
|
||||
} else if (getFileTypeCategory(file.type) === FileTypeCategory.PDF) {
|
||||
try {
|
||||
// Always get base64 data for preview functionality
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import {
|
||||
AUDIO_FILE_TYPES,
|
||||
VIDEO_FILE_TYPES,
|
||||
IMAGE_FILE_TYPES,
|
||||
PDF_FILE_TYPES,
|
||||
TEXT_FILE_TYPES
|
||||
@@ -12,6 +13,7 @@ import {
|
||||
FileTypeCategory,
|
||||
MimeTypeApplication,
|
||||
MimeTypeAudio,
|
||||
MimeTypeVideo,
|
||||
MimeTypeImage,
|
||||
MimeTypeText
|
||||
} from '$lib/enums';
|
||||
@@ -35,6 +37,11 @@ export function getFileTypeCategory(mimeType: string): FileTypeCategory | null {
|
||||
case MimeTypeAudio.WEBM_OPUS:
|
||||
return FileTypeCategory.AUDIO;
|
||||
|
||||
// Video
|
||||
case MimeTypeVideo.MP4:
|
||||
case MimeTypeVideo.OGG:
|
||||
return FileTypeCategory.VIDEO;
|
||||
|
||||
// PDF
|
||||
case MimeTypeApplication.PDF:
|
||||
return FileTypeCategory.PDF;
|
||||
@@ -179,6 +186,12 @@ export function getFileTypeByExtension(filename: string): string | null {
|
||||
}
|
||||
}
|
||||
|
||||
for (const [key, type] of Object.entries(VIDEO_FILE_TYPES)) {
|
||||
if ((type.extensions as readonly string[]).includes(extension)) {
|
||||
return `${FileTypeCategory.VIDEO}:${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
for (const [key, type] of Object.entries(PDF_FILE_TYPES)) {
|
||||
if ((type.extensions as readonly string[]).includes(extension)) {
|
||||
return `${FileTypeCategory.PDF}:${key}`;
|
||||
|
||||
@@ -14,7 +14,7 @@ export { validateApiKey } from './api-key-validation';
|
||||
|
||||
// Attachment utilities
|
||||
export { getAttachmentDisplayItems, isMcpPrompt, isMcpResource } from './attachment-display';
|
||||
export { isTextFile, isImageFile, isPdfFile, isAudioFile } from './attachment-type';
|
||||
export { isTextFile, isImageFile, isPdfFile, isAudioFile, isVideoFile } from './attachment-type';
|
||||
|
||||
// Textarea utilities
|
||||
export { default as autoResizeTextarea } from './autoresize-textarea';
|
||||
|
||||
@@ -45,6 +45,10 @@ export function isFileTypeSupportedByModel(
|
||||
// Audio files require audio support
|
||||
return capabilities.hasAudio;
|
||||
|
||||
case FileTypeCategory.VIDEO:
|
||||
// Video files require video support
|
||||
return capabilities.hasVideo;
|
||||
|
||||
default:
|
||||
// Unknown categories - be conservative and allow
|
||||
return true;
|
||||
@@ -69,7 +73,7 @@ export function filterFilesByModalities(
|
||||
const unsupportedFiles: File[] = [];
|
||||
const modalityReasons: Record<string, string> = {};
|
||||
|
||||
const { hasVision, hasAudio } = capabilities;
|
||||
const { hasVision, hasAudio, hasVideo } = capabilities;
|
||||
|
||||
for (const file of files) {
|
||||
const category = getFileTypeCategory(file.type);
|
||||
@@ -91,6 +95,13 @@ export function filterFilesByModalities(
|
||||
}
|
||||
break;
|
||||
|
||||
case FileTypeCategory.VIDEO:
|
||||
if (!hasVideo) {
|
||||
isSupported = false;
|
||||
reason = 'Video files require a video-capable model';
|
||||
}
|
||||
break;
|
||||
|
||||
case FileTypeCategory.TEXT:
|
||||
case FileTypeCategory.PDF:
|
||||
// Always supported
|
||||
@@ -127,7 +138,7 @@ export function generateModalityErrorMessage(
|
||||
): string {
|
||||
if (unsupportedFiles.length === 0) return '';
|
||||
|
||||
const { hasVision, hasAudio } = capabilities;
|
||||
const { hasVision, hasAudio, hasVideo } = capabilities;
|
||||
|
||||
let message = '';
|
||||
|
||||
@@ -144,6 +155,7 @@ export function generateModalityErrorMessage(
|
||||
const supportedTypes: string[] = ['text files', 'PDFs'];
|
||||
if (hasVision) supportedTypes.push('images');
|
||||
if (hasAudio) supportedTypes.push('audio files');
|
||||
if (hasVideo) supportedTypes.push('video files');
|
||||
|
||||
message += ` This model supports: ${supportedTypes.join(', ')}.`;
|
||||
|
||||
|
||||
@@ -117,6 +117,10 @@ export async function processFilesToChatUploaded(
|
||||
// Generate preview URL for audio files
|
||||
const preview = await readFileAsDataURL(file);
|
||||
results.push({ ...base, preview });
|
||||
} else if (getFileTypeCategory(file.type) === FileTypeCategory.VIDEO) {
|
||||
// Generate preview URL for video files
|
||||
const preview = await readFileAsDataURL(file);
|
||||
results.push({ ...base, preview });
|
||||
} else {
|
||||
// Fallback: treat unknown files as text
|
||||
try {
|
||||
|
||||
@@ -15,7 +15,8 @@ export function mockServerProps(props: Partial<ApiLlamaCppServerProps>): void {
|
||||
model_path: props.model_path || 'test-model',
|
||||
modalities: {
|
||||
vision: props.modalities?.vision ?? false,
|
||||
audio: props.modalities?.audio ?? false
|
||||
audio: props.modalities?.audio ?? false,
|
||||
video: props.modalities?.video ?? false
|
||||
},
|
||||
...props
|
||||
} as ApiLlamaCppServerProps;
|
||||
@@ -26,11 +27,14 @@ export function mockServerProps(props: Partial<ApiLlamaCppServerProps>): void {
|
||||
// Also mock modelsStore methods for modality checking
|
||||
const vision = props.modalities?.vision ?? false;
|
||||
const audio = props.modalities?.audio ?? false;
|
||||
const video = props.modalities?.video ?? false;
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
(modelsStore as any).modelSupportsVision = () => vision;
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
(modelsStore as any).modelSupportsAudio = () => audio;
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
(modelsStore as any).modelSupportsVideo = () => video;
|
||||
|
||||
// Mock models list with a test model so activeModelId can be resolved
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
@@ -55,7 +59,8 @@ export function resetServerStore(): void {
|
||||
model_path: '',
|
||||
modalities: {
|
||||
vision: false,
|
||||
audio: false
|
||||
audio: false,
|
||||
video: false
|
||||
}
|
||||
} as ApiLlamaCppServerProps;
|
||||
(serverStore as unknown as { error: string }).error = '';
|
||||
@@ -76,6 +81,6 @@ export const mockConfigs = {
|
||||
modalities: { vision: true, audio: true }
|
||||
},
|
||||
noModalities: {
|
||||
modalities: { vision: false, audio: false }
|
||||
modalities: { vision: false, audio: false, video: false }
|
||||
}
|
||||
} as const;
|
||||
|
||||
Reference in New Issue
Block a user