Compare commits

..

16 Commits

Author SHA1 Message Date
Georgi Gerganov 15267192c0 llama : refactor tensor offloading as callback 2023-10-29 13:04:36 +02:00
Georgi Gerganov da936188d8 llama : move refact in correct place + optimize graph input 2023-10-29 11:48:58 +02:00
Georgi Gerganov 739b85c985 llama : try to fix build 2023-10-29 11:25:32 +02:00
Georgi Gerganov 25cfbf6776 llama : fix non-CUDA build 2023-10-29 11:12:03 +02:00
Georgi Gerganov b4ad03b3a7 llama : try to optimize offloading code 2023-10-29 10:33:11 +02:00
Georgi Gerganov 79617902ea llama : fix res_norm offloading 2023-10-29 09:20:35 +02:00
Georgi Gerganov e14aa46151 llama : do tensor offload only with CUDA 2023-10-29 08:03:46 +02:00
Georgi Gerganov 0dc05b8433 llama : factor graph input into a function 2023-10-29 07:52:43 +02:00
Georgi Gerganov 4e98897ede llama : support offloading result_norm + comments 2023-10-29 07:36:07 +02:00
Georgi Gerganov 51c4f9ee9f llama : comments 2023-10-28 22:50:08 +03:00
Georgi Gerganov 3af8771389 llama : update offload log messages to print node index 2023-10-28 22:36:44 +03:00
Georgi Gerganov 83d2c43791 llama : offload rest of the models
ggml-ci
2023-10-28 22:30:54 +03:00
Georgi Gerganov 38aca9e1ab llama : factor out tensor offloading outside the build call (wip)
ggml-ci
2023-10-28 21:22:31 +03:00
Georgi Gerganov 5946d98fc8 metal : disable kernel load log 2023-10-28 21:22:01 +03:00
Georgi Gerganov 8b2420d249 llama : factor out ggml-alloc from graph graph build functions
ggml-ci
2023-10-28 19:54:28 +03:00
Erik Scholz ff3bad83e2 flake : update flake.lock for newer transformers version + provide extra dev shell (#3797)
* flake : update flake.lock for newer transformers version + provide extra dev shell with torch and transformers (for most convert-xxx.py scripts)
2023-10-28 16:41:07 +02:00
5 changed files with 1197 additions and 1349 deletions
Generated
+3 -3
View File
@@ -20,11 +20,11 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1692913444,
"narHash": "sha256-1SvMQm2DwofNxXVtNWWtIcTh7GctEVrS/Xel/mdc6iY=",
"lastModified": 1698134075,
"narHash": "sha256-foCD+nuKzfh49bIoiCBur4+Fx1nozo+4C/6k8BYk4sg=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "18324978d632ffc55ef1d928e81630c620f4f447",
"rev": "8efd5d1e283604f75a808a20e6cde0ef313d07d4",
"type": "github"
},
"original": {
+7
View File
@@ -51,6 +51,9 @@
};
llama-python =
pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece ]);
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
llama-python-extra =
pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece torchWithoutCuda transformers ]);
postPatch = ''
substituteInPlace ./ggml-metal.m \
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
@@ -126,5 +129,9 @@
buildInputs = [ llama-python ];
packages = nativeBuildInputs ++ osSpecific;
};
devShells.extra = pkgs.mkShell {
buildInputs = [ llama-python-extra ];
packages = nativeBuildInputs ++ osSpecific;
};
});
}
+7 -4
View File
@@ -238,14 +238,17 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
// load kernels
{
NSError * error = nil;
#define GGML_METAL_ADD_KERNEL(name) \
ctx->function_##name = [ctx->library newFunctionWithName:@"kernel_"#name]; \
ctx->pipeline_##name = [ctx->device newComputePipelineStateWithFunction:ctx->function_##name error:&error]; \
/*
GGML_METAL_LOG_INFO("%s: loaded %-32s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name, \
(int) ctx->pipeline_##name.maxTotalThreadsPerThreadgroup, \
(int) ctx->pipeline_##name.threadExecutionWidth); \
*/
#define GGML_METAL_ADD_KERNEL(name) \
ctx->function_##name = [ctx->library newFunctionWithName:@"kernel_"#name]; \
ctx->pipeline_##name = [ctx->device newComputePipelineStateWithFunction:ctx->function_##name error:&error]; \
if (error) { \
GGML_METAL_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
GGML_METAL_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
return NULL; \
}
+1 -1
View File
@@ -709,7 +709,7 @@ extern "C" {
// Context tensor enumeration and lookup
GGML_API struct ggml_tensor * ggml_get_first_tensor(struct ggml_context * ctx);
GGML_API struct ggml_tensor * ggml_get_next_tensor (struct ggml_context * ctx, struct ggml_tensor * tensor);
GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
GGML_API struct ggml_tensor * ggml_get_tensor (struct ggml_context * ctx, const char * name);
GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
+1179 -1341
View File
File diff suppressed because it is too large Load Diff