ffaafde16f
* ggml-virtgpu-backend: validate the consistency of the received objects This patch adds consistency checks in the ggml-virtgpu-backend (running on the host side) to ensure that the data received from the guest is consistent (valid pointers, valid sizes and offsets). * ggml-virtgpu-backend: add fallback/skips for optional ggml backend methods ``` 1. bck->iface.synchronize(bck) 2. buft->iface.get_alloc_size(buft, op) 3. buft->iface.get_max_size(buft) ``` these three methods are optional in the GGML interface. `get_max_size` was already properly defaulted, but `backend sychronize` and `butf get_max_size` would have segfaulted the backend if not implemented. * ggml-virtgpu-backend: fix log format missing argument * ggml-virtgpu-backend: improve the abort message * ggml-virtgpu-backend: more safety checks * ggml-virtgpu-backend: new error code * ggml-virtgpu-backend: initialize all the error codes * ggml-virtgpu: add a missing comment generated by the code generator * ggml-virtgpu: add the '[virtgpu]' prefix to the device/buffer names * ggml-virtgpu: apir_device_buffer_from_ptr: improve the error message * ggml-virtgpu: shared: make it match the latest api_remoting.h of Virglrenderer APIR (still unmerged) * ggml-virtgpu: update the code generator to have dispatch_command_name in a host/guest shared file * ggml-virtgpu: REMOTE_CALL: fail if the backend returns an error * docs/backend/VirtGPU.md: indicate that the RAM+VRAM size is limed to 64 GB with libkrun * ggml-virtgpu: turn off clang-format header ordering for some of the files Compilation breaks when ordered alphabetically. * ggml-virtgpu: clang-format * ggml-virtgpu/backend/shared/api_remoting: better comments for the APIR return codes
72 lines
2.5 KiB
C++
72 lines
2.5 KiB
C++
#pragma once
|
|
|
|
#include "ggml-backend-impl.h"
|
|
#include "ggml-backend.h"
|
|
#include "ggml-impl.h"
|
|
#include "virtgpu.h"
|
|
|
|
#include <memory>
|
|
#include <string>
|
|
|
|
#define GGML_VIRTGPU_NAME "ggml-virtgpu"
|
|
#define GGML_VIRTGPU "ggml-virtgpu: "
|
|
|
|
// USE_ALWAYS_TRUE_SUPPORTS_OP: 1 is fast, 0 avoid micro-benchmark crashes
|
|
|
|
#define USE_ALWAYS_TRUE_SUPPORTS_OP 1
|
|
#define USE_METAL_GUEST_SUPPORTS_OP 0
|
|
|
|
#define DEV_TO_GPU(name) ((ggml_backend_remoting_device_context *) (name)->context)->gpu
|
|
|
|
#define BUFFER_TO_GGML_CONTEXT(name) ((ggml_backend_remoting_buffer_context *) (name)->context)
|
|
|
|
#define BUFFER_TO_APIR_CONTEXT(name) &((ggml_backend_remoting_buffer_context *) (name)->context)->apir_context
|
|
|
|
#define BUFFER_TO_HOST_HANDLE(name) ((ggml_backend_remoting_buffer_context *) (name)->context)->apir_context.host_handle
|
|
|
|
#define GET_DEVICE_CONTEXT() (ggml_backend_remoting_device_context *) ggml_backend_remoting_get_device(0)->context
|
|
|
|
#define BUFT_TO_GPU(name) ((ggml_backend_remoting_device_context *) (name)->device->context)->gpu
|
|
|
|
struct ggml_backend_remoting_device_context {
|
|
size_t device;
|
|
std::string name;
|
|
std::string description;
|
|
|
|
std::vector<std::tuple<void *, size_t, virtgpu_shmem *>> shared_memory;
|
|
|
|
virtgpu * gpu;
|
|
};
|
|
|
|
struct ggml_backend_remoting_buffer_context {
|
|
apir_buffer_context_t apir_context;
|
|
|
|
virtgpu * gpu;
|
|
|
|
void * base;
|
|
|
|
bool is_from_ptr;
|
|
};
|
|
|
|
extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface;
|
|
extern const ggml_backend_device_i ggml_backend_remoting_device_interface;
|
|
extern const ggml_backend_buffer_i ggml_backend_remoting_buffer_interface;
|
|
extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_from_ptr_type_interface;
|
|
extern const ggml_backend_buffer_i ggml_backend_remoting_buffer_from_ptr_interface;
|
|
|
|
ggml_backend_dev_t ggml_backend_remoting_get_device(size_t device);
|
|
ggml_backend_t ggml_backend_remoting_device_init(ggml_backend_dev_t dev, const char * params);
|
|
ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev);
|
|
|
|
static inline apir_buffer_type_host_handle_t ggml_buffer_type_to_apir_handle(ggml_backend_buffer_type_t buft) {
|
|
// in the backend, the buffer handle is the buffer pointer
|
|
return (apir_buffer_type_host_handle_t) buft->context;
|
|
}
|
|
|
|
static inline apir_buffer_host_handle_t ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) {
|
|
if (!buffer->context) {
|
|
GGML_ABORT(GGML_VIRTGPU "%s: no context available :/", __func__);
|
|
}
|
|
return BUFFER_TO_HOST_HANDLE(buffer);
|
|
}
|