webui: support video files as input (#22830)

This commit is contained in:
Judd
2026-05-17 08:13:44 +08:00
committed by GitHub
parent b64739ea39
commit 4f13cb7424
36 changed files with 310 additions and 28 deletions
+2
View File
@@ -39,6 +39,7 @@ import type {
DatabaseMessage,
DatabaseMessageExtra,
DatabaseMessageExtraAudioFile,
DatabaseMessageExtraVideoFile,
DatabaseMessageExtraImageFile,
DatabaseMessageExtraTextFile,
DatabaseMessageExtraPdfFile,
@@ -102,6 +103,7 @@ declare global {
DatabaseMessage,
DatabaseMessageExtra,
DatabaseMessageExtraAudioFile,
DatabaseMessageExtraVideoFile,
DatabaseMessageExtraImageFile,
DatabaseMessageExtraTextFile,
DatabaseMessageExtraPdfFile,
@@ -1,5 +1,5 @@
<script lang="ts">
import { Eye, Mic } from '@lucide/svelte';
import { Eye, Mic, Video } from '@lucide/svelte';
import { ModelModality } from '$lib/enums';
interface Props {
@@ -11,7 +11,7 @@
</script>
{#each modalities as modality (modality)}
{#if modality === ModelModality.VISION || modality === ModelModality.AUDIO}
{#if modality === ModelModality.VISION || modality === ModelModality.AUDIO || modality === ModelModality.VIDEO}
<span
class={[
'inline-flex items-center gap-1 rounded-md bg-muted px-2 py-1 text-xs font-medium',
@@ -21,7 +21,11 @@
{#if modality === ModelModality.VISION}
<Eye class="h-3 w-3" />
Vision
Vision (Image)
{:else if modality === ModelModality.VIDEO}
<Video class="h-3 w-3" />
Vision (Video)
{:else}
<Mic class="h-3 w-3" />
@@ -1,10 +1,12 @@
<script lang="ts">
import { X } from '@lucide/svelte';
import { X, Music, Video } from '@lucide/svelte';
import {
formatFileSize,
getFileTypeLabel,
getPreviewText,
isPdfFile,
isAudioFile,
isVideoFile,
isTextFile
} from '$lib/utils';
import { ActionIcon } from '$lib/components/app';
@@ -38,6 +40,8 @@
}: Props = $props();
let isPdf = $derived(isPdfFile(attachment, uploadedFile));
let isAudio = $derived(isAudioFile(attachment, uploadedFile));
let isVideo = $derived(isVideoFile(attachment, uploadedFile));
let isPdfWithContent = $derived(isPdf && !!textContent);
let isText = $derived(isTextFile(attachment, uploadedFile));
@@ -102,7 +106,13 @@
<div
class="flex h-8 w-8 items-center justify-center rounded bg-primary/10 text-xs font-medium text-primary"
>
{#if isAudio}
<Music class="h-4 w-4 text-white/70" />
{:else if isVideo}
<Video class="h-4 w-4 text-white/70" />
{:else}
{fileTypeLabel}
{/if}
</div>
{/snippet}
@@ -12,6 +12,7 @@
getAttachmentDisplayItems,
getLanguageFromFilename,
isAudioFile,
isVideoFile,
isImageFile,
isMcpPrompt,
isMcpResource,
@@ -29,6 +30,7 @@
textContent?: string;
isImage: boolean;
isAudio: boolean;
isVideo: boolean;
}
interface Props {
@@ -54,7 +56,8 @@
(item): PreviewItem => ({
...item,
isImage: isImageFile(item.attachment, item.uploadedFile),
isAudio: isAudioFile(item.attachment, item.uploadedFile)
isAudio: isAudioFile(item.attachment, item.uploadedFile),
isVideo: isVideoFile(item.attachment, item.uploadedFile)
})
)
);
@@ -102,6 +105,9 @@
let isAudio = $derived(
currentItem ? isAudioFile(currentItem.attachment, currentItem.uploadedFile) : false
);
let isVideo = $derived(
currentItem ? isVideoFile(currentItem.attachment, currentItem.uploadedFile) : false
);
let isImage = $derived(
currentItem ? isImageFile(currentItem.attachment, currentItem.uploadedFile) : false
);
@@ -148,6 +154,20 @@
: null
);
let videoSrc = $derived(
isVideo && currentItem
? (currentItem.uploadedFile?.preview ??
(currentItem.attachment &&
'mimeType' in currentItem.attachment &&
'base64Data' in currentItem.attachment
? createBase64DataUrl(
currentItem.attachment.mimeType,
currentItem.attachment.base64Data
)
: null))
: null
);
export function prev() {
currentIndex = currentIndex > 0 ? currentIndex - 1 : allItems.length - 1;
}
@@ -173,11 +193,13 @@
{currentItem}
{isImage}
{isAudio}
{isVideo}
{isPdf}
{isText}
{displayPreview}
{displayTextContent}
{audioSrc}
{videoSrc}
{language}
{hasVisionModality}
{activeModelId}
@@ -1,9 +1,10 @@
<script lang="ts">
import type { ChatAttachmentDisplayItem } from '$lib/types';
import { Image, Music, FileText, FileIcon } from '@lucide/svelte';
import { Image, Music, Video, FileText, FileIcon } from '@lucide/svelte';
import ChatAttachmentsPreviewCurrentItemPdf from './ChatAttachmentsPreviewCurrentItemPdf.svelte';
import ChatAttachmentsPreviewCurrentItemImage from './ChatAttachmentsPreviewCurrentItemImage.svelte';
import ChatAttachmentsPreviewCurrentItemAudio from './ChatAttachmentsPreviewCurrentItemAudio.svelte';
import ChatAttachmentsPreviewCurrentItemVideo from './ChatAttachmentsPreviewCurrentItemVideo.svelte';
import ChatAttachmentsPreviewCurrentItemText from './ChatAttachmentsPreviewCurrentItemText.svelte';
import ChatAttachmentsPreviewCurrentItemUnavailable from './ChatAttachmentsPreviewCurrentItemUnavailable.svelte';
@@ -11,11 +12,13 @@
currentItem: ChatAttachmentDisplayItem | null;
isImage: boolean;
isAudio: boolean;
isVideo: boolean;
isPdf: boolean;
isText: boolean;
displayPreview: string | undefined;
displayTextContent: string | undefined;
audioSrc: string | null;
videoSrc: string | null;
language: string;
hasVisionModality: boolean;
activeModelId?: string;
@@ -25,21 +28,25 @@
currentItem,
isImage,
isAudio,
isVideo,
isPdf,
isText,
displayPreview,
displayTextContent,
audioSrc,
videoSrc,
language,
hasVisionModality,
activeModelId
}: Props = $props();
let IconComponent = $derived(
isImage ? Image : isText || isPdf ? FileText : isAudio ? Music : FileIcon
isImage ? Image : isText || isPdf ? FileText : isAudio ? Music : isVideo ? Video : FileIcon
);
let isUnavailable = $derived(!isPdf && !isImage && !(isText && displayTextContent) && !isAudio);
let isUnavailable = $derived(
!isPdf && !isImage && !(isText && displayTextContent) && !isAudio && !isVideo
);
</script>
{#if currentItem}
@@ -58,6 +65,8 @@
<ChatAttachmentsPreviewCurrentItemText {displayTextContent} {language} />
{:else if isAudio}
<ChatAttachmentsPreviewCurrentItemAudio {currentItem} {audioSrc} />
{:else if isVideo}
<ChatAttachmentsPreviewCurrentItemVideo {currentItem} {videoSrc} />
{:else if isUnavailable}
<ChatAttachmentsPreviewCurrentItemUnavailable {IconComponent} />
{/if}
@@ -0,0 +1,26 @@
<script lang="ts">
import { Video } from '@lucide/svelte';
interface Props {
currentItem: { name?: string } | null;
videoSrc: string | null;
}
let { currentItem, videoSrc }: Props = $props();
</script>
<div class="flex flex-1 items-center justify-center p-8">
<div class="w-full max-w-md text-center">
<Video class="mx-auto mb-4 h-16 w-16 text-white/50" />
{#if videoSrc}
<video controls class="mb-4 w-full" src={videoSrc}>
Your browser does not support the video element.
</video>
{:else}
<p class="mb-4 text-white/70">Video preview not available</p>
{/if}
<p class="text-sm text-white/50">{currentItem?.name || 'Video'}</p>
</div>
</div>
@@ -1,5 +1,5 @@
<script lang="ts">
import { Music, FileText } from '@lucide/svelte';
import { Music, Video, FileText } from '@lucide/svelte';
import { HorizontalScrollCarousel } from '$lib/components/app/misc';
interface PreviewItem {
@@ -7,6 +7,7 @@
name: string;
isImage: boolean;
isAudio: boolean;
isVideo: boolean;
preview?: string;
}
@@ -49,6 +50,8 @@
>
{#if item.isAudio}
<Music class="h-4 w-4 text-white/70" />
{:else if item.isVideo}
<Video class="h-4 w-4 text-white/70" />
{:else}
<FileText class="h-4 w-4 text-white/70" />
{/if}
@@ -23,6 +23,7 @@
class?: string;
disabled?: boolean;
hasAudioModality?: boolean;
hasVideoModality?: boolean;
hasVisionModality?: boolean;
hasMcpPromptsSupport?: boolean;
hasMcpResourcesSupport?: boolean;
@@ -37,6 +38,7 @@
class: className = '',
disabled = false,
hasAudioModality = false,
hasVideoModality = false,
hasVisionModality = false,
hasMcpPromptsSupport = false,
hasMcpResourcesSupport = false,
@@ -58,6 +60,7 @@
() => ({
hasVisionModality,
hasAudioModality,
hasVideoModality,
hasMcpPromptsSupport,
hasMcpResourcesSupport
}),
@@ -19,6 +19,7 @@
class?: string;
disabled?: boolean;
hasAudioModality?: boolean;
hasVideoModality?: boolean;
hasVisionModality?: boolean;
hasMcpPromptsSupport?: boolean;
hasMcpResourcesSupport?: boolean;
@@ -34,6 +35,7 @@
disabled = false,
hasAudioModality = false,
hasVisionModality = false,
hasVideoModality = false,
hasMcpPromptsSupport = false,
hasMcpResourcesSupport = false,
onFileUpload,
@@ -49,6 +51,7 @@
() => ({
hasVisionModality,
hasAudioModality,
hasVideoModality,
hasMcpPromptsSupport,
hasMcpResourcesSupport
}),
@@ -7,6 +7,7 @@
interface Props {
disabled?: boolean;
hasAudioModality?: boolean;
hasVideoModality?: boolean;
hasMcpPromptsSupport?: boolean;
hasMcpResourcesSupport?: boolean;
hasVisionModality?: boolean;
@@ -20,6 +21,7 @@
let {
disabled = false,
hasAudioModality = false,
hasVideoModality = false,
hasMcpPromptsSupport = false,
hasMcpResourcesSupport = false,
hasVisionModality = false,
@@ -37,6 +39,7 @@
<ChatFormActionAddSheet
{disabled}
{hasAudioModality}
{hasVideoModality}
{hasVisionModality}
{hasMcpPromptsSupport}
{hasMcpResourcesSupport}
@@ -52,6 +55,7 @@
<ChatFormActionAddDropdown
{disabled}
{hasAudioModality}
{hasVideoModality}
{hasVisionModality}
{hasMcpPromptsSupport}
{hasMcpResourcesSupport}
@@ -11,6 +11,7 @@
disabled?: boolean;
forceForegroundText?: boolean;
hasAudioModality?: boolean;
hasVideoModality?: boolean;
hasVisionModality?: boolean;
hasModelSelected?: boolean;
isSelectedModelInCache?: boolean;
@@ -23,6 +24,7 @@
disabled = false,
forceForegroundText = false,
hasAudioModality = $bindable(false),
hasVideoModality = $bindable(false),
hasVisionModality = $bindable(false),
hasModelSelected = $bindable(false),
isSelectedModelInCache = $bindable(true),
@@ -95,6 +97,10 @@
hasAudioModality = activeModelId ? modelsStore.modelSupportsAudio(activeModelId) : false;
});
$effect(() => {
hasVideoModality = activeModelId ? modelsStore.modelSupportsVideo(activeModelId) : false;
});
$effect(() => {
void modelPropsVersion;
@@ -66,6 +66,7 @@
});
let hasAudioModality = $state(false);
let hasVideoModality = $state(false);
let hasVisionModality = $state(false);
let hasModelSelected = $state(false);
let isSelectedModelInCache = $state(true);
@@ -94,6 +95,7 @@
<ChatFormActionsAdd
{disabled}
{hasAudioModality}
{hasVideoModality}
{hasVisionModality}
{hasMcpPromptsSupport}
{hasMcpResourcesSupport}
@@ -111,6 +113,7 @@
{disabled}
bind:this={selectorModelRef}
bind:hasAudioModality
bind:hasVideoModality
bind:hasVisionModality
bind:hasModelSelected
bind:isSelectedModelInCache
@@ -144,6 +144,16 @@
return false;
});
let hasVideoModality = $derived.by(() => {
if (activeModelId) {
void modelPropsVersion;
return modelsStore.modelSupportsVideo(activeModelId);
}
return false;
});
let hasVisionModality = $derived.by(() => {
if (activeModelId) {
void modelPropsVersion;
@@ -284,7 +294,11 @@
}
// Use model-specific capabilities for file validation
const capabilities = { hasVision: hasVisionModality, hasAudio: hasAudioModality };
const capabilities = {
hasVision: hasVisionModality,
hasAudio: hasAudioModality,
hasVideo: hasVideoModality
};
const { supportedFiles, unsupportedFiles, modalityReasons } = filterFilesByModalities(
generallySupported,
capabilities
@@ -297,6 +311,7 @@
if (hasVisionModality) supportedTypes.push('images');
if (hasAudioModality) supportedTypes.push('audio files');
if (hasVideoModality) supportedTypes.push('video files');
fileErrorData = {
generallyUnsupported,
@@ -52,6 +52,15 @@ export const ATTACHMENT_FILE_ITEMS: AttachmentMenuItem[] = [
disabledTooltip: 'Audio files processing requires an audio model',
action: AttachmentAction.FILE_UPLOAD
},
{
id: AttachmentMenuItemId.VIDEO,
label: 'Video Files',
icon: FILE_TYPE_ICONS.video,
class: 'video-button',
enabledWhen: AttachmentItemEnabledWhen.HAS_VIDEO_MODALITY,
disabledTooltip: 'Video files processing requires a video model',
action: AttachmentAction.FILE_UPLOAD
},
{
id: AttachmentMenuItemId.TEXT,
label: 'Text Files',
+7 -3
View File
@@ -8,13 +8,15 @@ import {
FileText as FileTextIcon,
Image as ImageIcon,
Eye as VisionIcon,
Mic as AudioIcon
Mic as AudioIcon,
Video as VideoIcon
} from '@lucide/svelte';
import { FileTypeCategory, ModelModality } from '$lib/enums';
export const FILE_TYPE_ICONS = {
[FileTypeCategory.IMAGE]: ImageIcon,
[FileTypeCategory.AUDIO]: AudioIcon,
[FileTypeCategory.VIDEO]: VideoIcon,
[FileTypeCategory.TEXT]: FileTextIcon,
[FileTypeCategory.PDF]: FileIcon
} as const;
@@ -23,10 +25,12 @@ export const DEFAULT_FILE_ICON = FileIcon;
export const MODALITY_ICONS = {
[ModelModality.VISION]: VisionIcon,
[ModelModality.AUDIO]: AudioIcon
[ModelModality.AUDIO]: AudioIcon,
[ModelModality.VIDEO]: VideoIcon
} as const;
export const MODALITY_LABELS = {
[ModelModality.VISION]: 'Vision',
[ModelModality.AUDIO]: 'Audio'
[ModelModality.AUDIO]: 'Audio',
[ModelModality.VIDEO]: 'Video'
} as const;
@@ -13,10 +13,12 @@ import {
FileTypePdf,
FileTypeText,
MimeTypeAudio,
MimeTypeVideo,
MimeTypeImage,
MimeTypeApplication,
MimeTypeText
} from '$lib/enums';
import { FileExtensionVideo, FileTypeVideo } from '$lib/enums/files';
// File type configuration using enums
export const AUDIO_FILE_TYPES = {
@@ -30,6 +32,17 @@ export const AUDIO_FILE_TYPES = {
}
} as const;
export const VIDEO_FILE_TYPES = {
[FileTypeVideo.MP4]: {
extensions: [FileExtensionVideo.MP4],
mimeTypes: [MimeTypeVideo.MP4]
},
[FileTypeVideo.OGG]: {
extensions: [FileExtensionVideo.OGG],
mimeTypes: [MimeTypeVideo.OGG]
}
} as const;
export const IMAGE_FILE_TYPES = {
[FileTypeImage.JPEG]: {
extensions: [FileExtensionImage.JPG, FileExtensionImage.JPEG],
+4 -1
View File
@@ -4,6 +4,7 @@
export enum AttachmentType {
AUDIO = 'AUDIO',
IMAGE = 'IMAGE',
VIDEO = 'VIDEO',
MCP_PROMPT = 'MCP_PROMPT',
MCP_RESOURCE = 'MCP_RESOURCE',
PDF = 'PDF',
@@ -18,6 +19,7 @@ export enum AttachmentType {
export enum AttachmentMenuItemId {
IMAGES = 'images',
AUDIO = 'audio',
VIDEO = 'video',
TEXT = 'text',
PDF = 'pdf',
SYSTEM_MESSAGE = 'system-message',
@@ -31,7 +33,8 @@ export enum AttachmentMenuItemId {
export enum AttachmentItemEnabledWhen {
ALWAYS = 'always',
HAS_VISION_MODALITY = 'hasVisionModality',
HAS_AUDIO_MODALITY = 'hasAudioModality'
HAS_AUDIO_MODALITY = 'hasAudioModality',
HAS_VIDEO_MODALITY = 'hasVideoModality'
}
/**
+2 -1
View File
@@ -39,7 +39,8 @@ export enum MessageType {
export enum ContentPartType {
TEXT = 'text',
IMAGE_URL = 'image_url',
INPUT_AUDIO = 'input_audio'
INPUT_AUDIO = 'input_audio',
INPUT_VIDEO = 'input_video'
}
/**
+16
View File
@@ -7,6 +7,7 @@
export enum FileTypeCategory {
IMAGE = 'image',
AUDIO = 'audio',
VIDEO = 'video',
PDF = 'pdf',
TEXT = 'text'
}
@@ -33,6 +34,11 @@ export enum FileTypeAudio {
WEBM = 'webm'
}
export enum FileTypeVideo {
MP4 = 'mp4',
OGG = 'ogg'
}
export enum FileTypePdf {
PDF = 'pdf'
}
@@ -92,6 +98,11 @@ export enum FileExtensionAudio {
WAV = '.wav'
}
export enum FileExtensionVideo {
MP4 = '.mp4',
OGG = '.ogg'
}
export enum FileExtensionPdf {
PDF = '.pdf'
}
@@ -176,6 +187,11 @@ export enum MimeTypeAudio {
WEBM_OPUS = 'audio/webm;codecs=opus'
}
export enum MimeTypeVideo {
MP4 = 'video/mp4',
OGG = 'video/ogg'
}
export enum MimeTypeImage {
JPEG = 'image/jpeg',
JPG = 'image/jpg',
+1
View File
@@ -34,6 +34,7 @@ export {
UriPattern,
MimeTypeApplication,
MimeTypeAudio,
MimeTypeVideo,
MimeTypeImage,
MimeTypeText,
SpecialFileType
+2 -1
View File
@@ -1,5 +1,6 @@
export enum ModelModality {
TEXT = 'TEXT',
AUDIO = 'AUDIO',
VISION = 'VISION'
VISION = 'VISION',
VIDEO = 'VIDEO'
}
@@ -4,6 +4,7 @@ import { AttachmentAction } from '$lib/enums';
export interface AttachmentModalityFlags {
hasVisionModality: boolean;
hasAudioModality: boolean;
hasVideoModality: boolean;
hasMcpPromptsSupport: boolean;
hasMcpResourcesSupport: boolean;
}
+19
View File
@@ -884,6 +884,25 @@ export class ChatService {
});
}
const videoFiles = message.extra.filter(
(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraVideoFile =>
extra.type === AttachmentType.VIDEO
);
for (const video of videoFiles) {
contentParts.push({
type: ContentPartType.INPUT_VIDEO,
input_video: {
data: video.base64Data,
format: video.mimeType.includes('mp4')
? 'mp4'
: video.mimeType.includes('ogg')
? 'ogg'
: 'auto'
}
});
}
const pdfFiles = message.extra.filter(
(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraPdfFile =>
extra.type === AttachmentType.PDF
+16 -4
View File
@@ -148,7 +148,8 @@ class ModelsStore {
if (props?.modalities) {
return {
vision: props.modalities.vision ?? false,
audio: props.modalities.audio ?? false
audio: props.modalities.audio ?? false,
video: props.modalities.video ?? false
};
}
@@ -169,6 +170,13 @@ class ModelsStore {
return this.getModelModalities(modelId)?.audio ?? false;
}
/**
* Check if a model supports video modality
*/
modelSupportsVideo(modelId: string): boolean {
return this.getModelModalities(modelId)?.video ?? false;
}
/**
* Get model modalities as an array of ModelModality enum values
*/
@@ -180,6 +188,7 @@ class ModelsStore {
if (modalities.vision) result.push(ModelModality.VISION);
if (modalities.audio) result.push(ModelModality.AUDIO);
if (modalities.video) result.push(ModelModality.VIDEO);
return result;
}
@@ -316,7 +325,8 @@ class ModelsStore {
if (serverStore.isModelMode && this.models.length > 0 && serverProps?.modalities) {
const modalities: ModelModalities = {
vision: serverProps.modalities.vision ?? false,
audio: serverProps.modalities.audio ?? false
audio: serverProps.modalities.audio ?? false,
video: serverProps.modalities.video ?? false
};
this.modelPropsCache.set(this.models[0].model, serverProps);
this.models = this.models.map((model, index) =>
@@ -410,7 +420,8 @@ class ModelsStore {
const modalities: ModelModalities = {
vision: props.modalities.vision ?? false,
audio: props.modalities.audio ?? false
audio: props.modalities.audio ?? false,
video: props.modalities.video ?? false
};
return { ...model, modalities };
@@ -529,7 +540,8 @@ class ModelsStore {
const modalities: ModelModalities = {
vision: props.modalities.vision ?? false,
audio: props.modalities.audio ?? false
audio: props.modalities.audio ?? false,
video: props.modalities.video ?? false
};
this.models = this.models.map((model) =>
+5
View File
@@ -22,6 +22,10 @@ export interface ApiChatMessageContentPart {
data: string;
format: 'wav' | 'mp3';
};
input_video?: {
data: string;
format: 'mp4' | 'ogg' | 'auto';
};
}
export interface ApiContextSizeError {
@@ -190,6 +194,7 @@ export interface ApiLlamaCppServerProps {
modalities: {
vision: boolean;
audio: boolean;
video: boolean;
};
chat_template: string;
bos_token: string;
+6 -1
View File
@@ -64,4 +64,9 @@ export interface ParsedClipboardContent {
mcpPromptAttachments: ClipboardMcpPromptAttachment[];
}
export type MimeTypeUnion = MimeTypeAudio | MimeTypeImage | MimeTypeApplication | MimeTypeText;
export type MimeTypeUnion =
| MimeTypeAudio
| MimeTypeVideo
| MimeTypeImage
| MimeTypeApplication
| MimeTypeText;
+9
View File
@@ -23,6 +23,14 @@ export interface DatabaseMessageExtraAudioFile {
mimeType: string;
}
export interface DatabaseMessageExtraVideoFile {
type: AttachmentType.VIDEO;
name: string;
size?: number;
base64Data: string;
mimeType: string;
}
export interface DatabaseMessageExtraImageFile {
type: AttachmentType.IMAGE;
name: string;
@@ -82,6 +90,7 @@ export type DatabaseMessageExtra =
| DatabaseMessageExtraImageFile
| DatabaseMessageExtraTextFile
| DatabaseMessageExtraAudioFile
| DatabaseMessageExtraVideoFile
| DatabaseMessageExtraPdfFile
| DatabaseMessageExtraMcpPrompt
| DatabaseMessageExtraMcpResource
+1
View File
@@ -55,6 +55,7 @@ export type {
McpServerOverride,
DatabaseConversation,
DatabaseMessageExtraAudioFile,
DatabaseMessageExtraVideoFile,
DatabaseMessageExtraImageFile,
DatabaseMessageExtraLegacyContext,
DatabaseMessageExtraMcpPrompt,
+2
View File
@@ -3,6 +3,7 @@ import type { ApiModelDataEntry, ApiModelDetails } from '$lib/types/api';
export interface ModelModalities {
vision: boolean;
audio: boolean;
video: boolean;
}
export interface ModelOption {
@@ -35,4 +36,5 @@ export interface ParsedModelId {
export interface ModalityCapabilities {
hasVision: boolean;
hasAudio: boolean;
hasVideo: boolean;
}
+21
View File
@@ -103,3 +103,24 @@ export function isAudioFile(
return false;
}
/**
* Determines if an attachment or uploaded file is a video file
* @param uploadedFile - Optional uploaded file
* @param attachment - Optional database attachment
* @returns true if the file is a video file
*/
export function isVideoFile(
attachment?: DatabaseMessageExtra,
uploadedFile?: ChatUploadedFile
): boolean {
if (uploadedFile) {
return getUploadedFileCategory(uploadedFile) === FileTypeCategory.VIDEO;
}
if (attachment) {
return attachment.type === AttachmentType.VIDEO;
}
return false;
}
@@ -89,6 +89,21 @@ export async function parseFilesToMessageExtras(
} catch (error) {
console.error(`Failed to process audio file ${file.name}:`, error);
}
} else if (getFileTypeCategory(file.type) === FileTypeCategory.VIDEO) {
// Process video files (MP4, etc)
try {
const base64Data = await readFileAsBase64(file.file);
extras.push({
type: AttachmentType.VIDEO,
name: file.name,
size: file.size,
base64Data: base64Data,
mimeType: file.type
});
} catch (error) {
console.error(`Failed to process video file ${file.name}:`, error);
}
} else if (getFileTypeCategory(file.type) === FileTypeCategory.PDF) {
try {
// Always get base64 data for preview functionality
+13
View File
@@ -1,5 +1,6 @@
import {
AUDIO_FILE_TYPES,
VIDEO_FILE_TYPES,
IMAGE_FILE_TYPES,
PDF_FILE_TYPES,
TEXT_FILE_TYPES
@@ -12,6 +13,7 @@ import {
FileTypeCategory,
MimeTypeApplication,
MimeTypeAudio,
MimeTypeVideo,
MimeTypeImage,
MimeTypeText
} from '$lib/enums';
@@ -35,6 +37,11 @@ export function getFileTypeCategory(mimeType: string): FileTypeCategory | null {
case MimeTypeAudio.WEBM_OPUS:
return FileTypeCategory.AUDIO;
// Video
case MimeTypeVideo.MP4:
case MimeTypeVideo.OGG:
return FileTypeCategory.VIDEO;
// PDF
case MimeTypeApplication.PDF:
return FileTypeCategory.PDF;
@@ -179,6 +186,12 @@ export function getFileTypeByExtension(filename: string): string | null {
}
}
for (const [key, type] of Object.entries(VIDEO_FILE_TYPES)) {
if ((type.extensions as readonly string[]).includes(extension)) {
return `${FileTypeCategory.VIDEO}:${key}`;
}
}
for (const [key, type] of Object.entries(PDF_FILE_TYPES)) {
if ((type.extensions as readonly string[]).includes(extension)) {
return `${FileTypeCategory.PDF}:${key}`;
+1 -1
View File
@@ -14,7 +14,7 @@ export { validateApiKey } from './api-key-validation';
// Attachment utilities
export { getAttachmentDisplayItems, isMcpPrompt, isMcpResource } from './attachment-display';
export { isTextFile, isImageFile, isPdfFile, isAudioFile } from './attachment-type';
export { isTextFile, isImageFile, isPdfFile, isAudioFile, isVideoFile } from './attachment-type';
// Textarea utilities
export { default as autoResizeTextarea } from './autoresize-textarea';
@@ -45,6 +45,10 @@ export function isFileTypeSupportedByModel(
// Audio files require audio support
return capabilities.hasAudio;
case FileTypeCategory.VIDEO:
// Video files require video support
return capabilities.hasVideo;
default:
// Unknown categories - be conservative and allow
return true;
@@ -69,7 +73,7 @@ export function filterFilesByModalities(
const unsupportedFiles: File[] = [];
const modalityReasons: Record<string, string> = {};
const { hasVision, hasAudio } = capabilities;
const { hasVision, hasAudio, hasVideo } = capabilities;
for (const file of files) {
const category = getFileTypeCategory(file.type);
@@ -91,6 +95,13 @@ export function filterFilesByModalities(
}
break;
case FileTypeCategory.VIDEO:
if (!hasVideo) {
isSupported = false;
reason = 'Video files require a video-capable model';
}
break;
case FileTypeCategory.TEXT:
case FileTypeCategory.PDF:
// Always supported
@@ -127,7 +138,7 @@ export function generateModalityErrorMessage(
): string {
if (unsupportedFiles.length === 0) return '';
const { hasVision, hasAudio } = capabilities;
const { hasVision, hasAudio, hasVideo } = capabilities;
let message = '';
@@ -144,6 +155,7 @@ export function generateModalityErrorMessage(
const supportedTypes: string[] = ['text files', 'PDFs'];
if (hasVision) supportedTypes.push('images');
if (hasAudio) supportedTypes.push('audio files');
if (hasVideo) supportedTypes.push('video files');
message += ` This model supports: ${supportedTypes.join(', ')}.`;
@@ -117,6 +117,10 @@ export async function processFilesToChatUploaded(
// Generate preview URL for audio files
const preview = await readFileAsDataURL(file);
results.push({ ...base, preview });
} else if (getFileTypeCategory(file.type) === FileTypeCategory.VIDEO) {
// Generate preview URL for video files
const preview = await readFileAsDataURL(file);
results.push({ ...base, preview });
} else {
// Fallback: treat unknown files as text
try {
@@ -15,7 +15,8 @@ export function mockServerProps(props: Partial<ApiLlamaCppServerProps>): void {
model_path: props.model_path || 'test-model',
modalities: {
vision: props.modalities?.vision ?? false,
audio: props.modalities?.audio ?? false
audio: props.modalities?.audio ?? false,
video: props.modalities?.video ?? false
},
...props
} as ApiLlamaCppServerProps;
@@ -26,11 +27,14 @@ export function mockServerProps(props: Partial<ApiLlamaCppServerProps>): void {
// Also mock modelsStore methods for modality checking
const vision = props.modalities?.vision ?? false;
const audio = props.modalities?.audio ?? false;
const video = props.modalities?.video ?? false;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(modelsStore as any).modelSupportsVision = () => vision;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(modelsStore as any).modelSupportsAudio = () => audio;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(modelsStore as any).modelSupportsVideo = () => video;
// Mock models list with a test model so activeModelId can be resolved
// eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -55,7 +59,8 @@ export function resetServerStore(): void {
model_path: '',
modalities: {
vision: false,
audio: false
audio: false,
video: false
}
} as ApiLlamaCppServerProps;
(serverStore as unknown as { error: string }).error = '';
@@ -76,6 +81,6 @@ export const mockConfigs = {
modalities: { vision: true, audio: true }
},
noModalities: {
modalities: { vision: false, audio: false }
modalities: { vision: false, audio: false, video: false }
}
} as const;