Compare commits

..

3 Commits

Author SHA1 Message Date
Georgi Gerganov cb79f8a2d8 llama : add SKIP_KQ_KQV option 2023-10-22 09:58:29 +03:00
Georgi Gerganov ed9fde7a1e ggml : skip nops 2023-10-22 09:55:37 +03:00
Georgi Gerganov 2471d56a2e llama : profiling the attention compute 2023-10-22 09:22:54 +03:00
4 changed files with 31 additions and 4 deletions
-1
View File
@@ -632,7 +632,6 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
process_escapes(params.prompt);
process_escapes(params.input_prefix);
process_escapes(params.input_suffix);
process_escapes(sparams.cfg_negative_prompt);
for (auto & antiprompt : params.antiprompt) {
process_escapes(antiprompt);
}
-3
View File
@@ -761,9 +761,6 @@ int main(int argc, char ** argv) {
n_consumed = embd_inp.size();
embd_inp.insert(embd_inp.end(), inp_pfx.begin(), inp_pfx.end());
}
if (params.escape) {
process_escapes(buffer);
}
const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true);
const auto line_inp = ::llama_tokenize(ctx, buffer, false, false);
+4
View File
@@ -16602,6 +16602,10 @@ static void ggml_compute_forward_cross_entropy_loss_back(
static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) {
GGML_ASSERT(params);
if (tensor->op == GGML_OP_NONE) {
return;
}
#ifdef GGML_USE_CUBLAS
bool skip_cpu = ggml_cuda_compute_forward(params, tensor);
if (skip_cpu) {
+27
View File
@@ -5815,6 +5815,33 @@ static struct ggml_cgraph * llama_build_graph(
GGML_ASSERT(false);
}
#if 1
for (int i = 0; i < result->n_nodes; ++i) {
struct ggml_tensor * node = result->nodes[i];
if (getenv("SKIP_KQ_ALL")) {
if (
strcmp(node->name, "KQ") == 0 ||
strcmp(node->name, "KQ_scaled") == 0 ||
strcmp(node->name, "KQ_masked") == 0 ||
strcmp(node->name, "KQ_soft_max") == 0 ||
strcmp(node->name, "KQV") == 0 ||
false) {
//printf("skipping %s\n", dst->name);
node->op = GGML_OP_NONE;
}
}
if (getenv("SKIP_KQ_KQV")) {
if (
strcmp(node->name, "KQ") == 0 ||
strcmp(node->name, "KQV") == 0 ||
false) {
//printf("skipping %s\n", dst->name);
node->op = GGML_OP_NONE;
}
}
}
#endif
return result;
}