Compare commits
9 Commits
b1369
...
rev-sampling
| Author | SHA1 | Date | |
|---|---|---|---|
| 5261aee8d8 | |||
| 370359e5ba | |||
| 9e24cc6e2e | |||
| d28e572c02 | |||
| f3040beaab | |||
| 1a8c8795d6 | |||
| b016596d90 | |||
| 6b3ae4da92 | |||
| 57dd55e2c7 |
@@ -44,6 +44,7 @@ models-mnt
|
||||
/infill
|
||||
/libllama.so
|
||||
/llama-bench
|
||||
/llava
|
||||
/main
|
||||
/metal
|
||||
/perplexity
|
||||
|
||||
+3
-3
@@ -422,8 +422,7 @@ endif()
|
||||
if (LLAMA_ALL_WARNINGS)
|
||||
if (NOT MSVC)
|
||||
set(warning_flags -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
|
||||
set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int
|
||||
-Werror=implicit-function-declaration)
|
||||
set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration)
|
||||
set(cxx_flags -Wmissing-declarations -Wmissing-noreturn)
|
||||
set(host_cxx_flags "")
|
||||
|
||||
@@ -455,7 +454,8 @@ if (LLAMA_ALL_WARNINGS)
|
||||
set(c_flags ${c_flags} ${warning_flags})
|
||||
set(cxx_flags ${cxx_flags} ${warning_flags})
|
||||
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
|
||||
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags} ${host_cxx_flags}>")
|
||||
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
|
||||
"$<$<COMPILE_LANGUAGE:CXX>:${host_cxx_flags}>")
|
||||
|
||||
endif()
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# Define the default target now so that it is always the first target
|
||||
BUILD_TARGETS = \
|
||||
main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
|
||||
simple batched batched-bench save-load-state server embd-input-test gguf llama-bench baby-llama beam-search \
|
||||
simple batched batched-bench save-load-state server embd-input-test gguf llama-bench llava baby-llama beam-search \
|
||||
speculative infill benchmark-matmult parallel finetune export-lora tests/test-c.o
|
||||
|
||||
# Binaries only useful for tests
|
||||
@@ -627,6 +627,9 @@ convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggm
|
||||
llama-bench: examples/llama-bench/llama-bench.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
llava: examples/llava/llava.cpp examples/llava/llava-utils.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
|
||||
|
||||
baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
|
||||
@@ -279,7 +279,7 @@ In order to build llama.cpp you have three different options.
|
||||
On MacOS, Metal is enabled by default. Using Metal makes the computation run on the GPU.
|
||||
To disable the Metal build at compile time use the `LLAMA_NO_METAL=1` flag or the `LLAMA_METAL=OFF` cmake option.
|
||||
|
||||
When built with Metal support, you can explicitly disable GPU inference with the `--gpu-layers|-ngl 0` command-line
|
||||
When built with Metal support, you can explicitly disable GPU inference with the `--n-gpu-layers|-ngl 0` command-line
|
||||
argument.
|
||||
|
||||
### MPI Build
|
||||
|
||||
@@ -496,10 +496,12 @@ test $ret -eq 0 && gg_run ctest_debug
|
||||
test $ret -eq 0 && gg_run ctest_release
|
||||
|
||||
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
||||
if [ -z ${GG_BUILD_CUDA} ]; then
|
||||
test $ret -eq 0 && gg_run open_llama_3b_v2
|
||||
else
|
||||
test $ret -eq 0 && gg_run open_llama_7b_v2
|
||||
if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then
|
||||
if [ -z ${GG_BUILD_CUDA} ]; then
|
||||
test $ret -eq 0 && gg_run open_llama_3b_v2
|
||||
else
|
||||
test $ret -eq 0 && gg_run open_llama_7b_v2
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
@@ -384,6 +384,18 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||
break;
|
||||
}
|
||||
params.lora_base = argv[i];
|
||||
} else if (arg == "--mmproj") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.mmproj = argv[i];
|
||||
} else if (arg == "--image") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.image = argv[i];
|
||||
} else if (arg == "-i" || arg == "--interactive") {
|
||||
params.interactive = true;
|
||||
} else if (arg == "--embedding") {
|
||||
@@ -703,6 +715,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
||||
printf(" -np N, --parallel N number of parallel sequences to decode (default: %d)\n", params.n_parallel);
|
||||
printf(" -ns N, --sequences N number of sequences to decode (default: %d)\n", params.n_sequences);
|
||||
printf(" -cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: disabled)\n");
|
||||
printf(" --mmproj MMPROJ_FILE path to a multimodal projector file for LLaVA. see examples/llava/README.md\n");
|
||||
printf(" --image IMAGE_FILE path to an image file. use with multimodal models\n");
|
||||
if (llama_mlock_supported()) {
|
||||
printf(" --mlock force system to keep model in RAM rather than swapping or compressing\n");
|
||||
}
|
||||
|
||||
@@ -104,6 +104,10 @@ struct gpt_params {
|
||||
bool numa = false; // attempt optimizations that help on some NUMA systems
|
||||
bool verbose_prompt = false; // print prompt tokens before generation
|
||||
bool infill = false; // use infill mode
|
||||
|
||||
// multimodal models (see examples/llava)
|
||||
std::string mmproj = ""; // path to multimodal projector
|
||||
std::string image = ""; // path to an image file
|
||||
};
|
||||
|
||||
bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
|
||||
|
||||
+11
-50
@@ -1,50 +1,14 @@
|
||||
#include "sampling.h"
|
||||
|
||||
llama_sampling_context::~llama_sampling_context() {
|
||||
for (auto & it : sequence_contexts) {
|
||||
if (it.second.grammar != NULL) {
|
||||
llama_grammar_free(it.second.grammar);
|
||||
it.second.grammar = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
llama_sampling_context llama_sampling_context_init(
|
||||
const struct gpt_params & params,
|
||||
llama_grammar * grammar) {
|
||||
llama_sampling_context result;
|
||||
llama_sampling_context result;
|
||||
|
||||
result.params = params.sampling_params;
|
||||
result.grammar = grammar;
|
||||
return result;
|
||||
}
|
||||
result.params = params.sampling_params;
|
||||
result.grammar = grammar;
|
||||
|
||||
// Note: Creates the context if it doesn't exist, so this always return something.
|
||||
llama_sampler_sequence_context & llama_sampling_get_sequence_context(
|
||||
llama_sampling_context & ctx_sampling,
|
||||
const llama_seq_id seq) {
|
||||
const auto it = ctx_sampling.sequence_contexts.find(seq);
|
||||
if (it != ctx_sampling.sequence_contexts.end()) {
|
||||
return it->second;
|
||||
}
|
||||
llama_sampler_sequence_context new_ctx = {
|
||||
2.0f * ctx_sampling.params.mirostat_tau,
|
||||
ctx_sampling.grammar != NULL ? llama_grammar_copy(ctx_sampling.grammar) : NULL,
|
||||
};
|
||||
return ctx_sampling.sequence_contexts.insert({seq, new_ctx}).first->second;
|
||||
}
|
||||
|
||||
bool llama_sampling_context_reset(
|
||||
llama_sampling_context & ctx_sampling,
|
||||
const llama_seq_id seq) {
|
||||
const auto it = ctx_sampling.sequence_contexts.find(seq);
|
||||
if (it == ctx_sampling.sequence_contexts.end()) return false;
|
||||
if (it->second.grammar != NULL) {
|
||||
llama_grammar_free(it->second.grammar);
|
||||
it->second.grammar = NULL;
|
||||
}
|
||||
ctx_sampling.sequence_contexts.erase(it);
|
||||
return true;
|
||||
return result;
|
||||
}
|
||||
|
||||
llama_token llama_sampling_sample(
|
||||
@@ -53,8 +17,7 @@ llama_token llama_sampling_sample(
|
||||
struct llama_sampling_context & ctx_sampling,
|
||||
const std::vector<llama_token> & last_tokens,
|
||||
std::vector<llama_token_data> & candidates,
|
||||
const int idx,
|
||||
llama_seq_id seq) {
|
||||
const int idx) {
|
||||
const int n_ctx = llama_n_ctx(ctx);
|
||||
const int n_vocab = llama_n_vocab(llama_get_model(ctx));
|
||||
|
||||
@@ -115,10 +78,8 @@ llama_token llama_sampling_sample(
|
||||
}
|
||||
}
|
||||
|
||||
llama_sampler_sequence_context & ctx_seq = llama_sampling_get_sequence_context(ctx_sampling, seq);
|
||||
|
||||
if (ctx_seq.grammar != NULL) {
|
||||
llama_sample_grammar(ctx, &cur_p, ctx_seq.grammar);
|
||||
if (ctx_sampling.grammar != NULL) {
|
||||
llama_sample_grammar(ctx, &cur_p, ctx_sampling.grammar);
|
||||
}
|
||||
|
||||
if (temp <= 0) {
|
||||
@@ -128,10 +89,10 @@ llama_token llama_sampling_sample(
|
||||
if (mirostat == 1) {
|
||||
const int mirostat_m = 100;
|
||||
llama_sample_temp(ctx, &cur_p, temp);
|
||||
id = llama_sample_token_mirostat(ctx, &cur_p, mirostat_tau, mirostat_eta, mirostat_m, &ctx_seq.mirostat_mu);
|
||||
id = llama_sample_token_mirostat(ctx, &cur_p, mirostat_tau, mirostat_eta, mirostat_m, &ctx_sampling.mirostat_mu);
|
||||
} else if (mirostat == 2) {
|
||||
llama_sample_temp(ctx, &cur_p, temp);
|
||||
id = llama_sample_token_mirostat_v2(ctx, &cur_p, mirostat_tau, mirostat_eta, &ctx_seq.mirostat_mu);
|
||||
id = llama_sample_token_mirostat_v2(ctx, &cur_p, mirostat_tau, mirostat_eta, &ctx_sampling.mirostat_mu);
|
||||
} else {
|
||||
// Temperature sampling
|
||||
size_t min_keep = std::max(1, params.n_probs);
|
||||
@@ -158,8 +119,8 @@ llama_token llama_sampling_sample(
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx_seq.grammar != NULL) {
|
||||
llama_grammar_accept_token(ctx, ctx_seq.grammar, id);
|
||||
if (ctx_sampling.grammar != NULL) {
|
||||
llama_grammar_accept_token(ctx, ctx_sampling.grammar, id);
|
||||
}
|
||||
|
||||
return id;
|
||||
|
||||
+4
-25
@@ -34,27 +34,14 @@ typedef struct llama_sampling_params {
|
||||
|
||||
} llama_sampling_params;
|
||||
|
||||
// per-sequence sampler context
|
||||
typedef struct llama_sampler_sequence_context {
|
||||
float mirostat_mu; // mirostat sampler state
|
||||
llama_grammar * grammar;
|
||||
} llama_sampler_sequence_context;
|
||||
|
||||
// general sampler context
|
||||
typedef struct llama_sampling_context {
|
||||
~llama_sampling_context();
|
||||
|
||||
// parameters that will be used for sampling and when creating
|
||||
// new llama_sampler_sequence_context instances
|
||||
// parameters that will be used for sampling
|
||||
llama_sampling_params params;
|
||||
|
||||
// map of sequence ids to sampler contexts
|
||||
std::unordered_map<llama_seq_id, llama_sampler_sequence_context> sequence_contexts;
|
||||
// mirostat sampler state
|
||||
float mirostat_mu;
|
||||
|
||||
// when non-NULL, new instances of llama_sampler_sequence_context
|
||||
// will get a copy of the grammar here
|
||||
// note: only the pointer is stored here, it is not a copy of
|
||||
// the grammar and shouldn't be freed
|
||||
llama_grammar * grammar;
|
||||
} llama_sampling_context;
|
||||
|
||||
@@ -65,13 +52,6 @@ llama_sampling_context llama_sampling_context_init(
|
||||
const struct gpt_params & params,
|
||||
llama_grammar * grammar = NULL);
|
||||
|
||||
// Fetches the sampler context for the specified sequence id (defaults to 0).
|
||||
// If the context for that sequence id doesn't already exist, it will be created with
|
||||
// default values based on the parameters in the ctx_sampling argument.
|
||||
llama_sampler_sequence_context & llama_sampling_get_sequence_context(
|
||||
llama_sampling_context & ctx_sampling,
|
||||
const llama_seq_id seq = 0);
|
||||
|
||||
// Reset the sampler context for the supplied sequence id (defaults to 0).
|
||||
// This is necessary to reuse a sequence id or free memory used by sequences
|
||||
// that are no longer required.
|
||||
@@ -104,5 +84,4 @@ llama_token llama_sampling_sample(
|
||||
struct llama_sampling_context & ctx_sampling,
|
||||
const std::vector<llama_token> & last_tokens,
|
||||
std::vector<llama_token_data> & candidates,
|
||||
const int idx = 0,
|
||||
llama_seq_id seq = 0);
|
||||
const int idx = 0);
|
||||
|
||||
+8396
File diff suppressed because it is too large
Load Diff
+1
-1
@@ -49,7 +49,7 @@ According to the BLIS documentation, we could set the following
|
||||
environment variables to modify the behavior of openmp:
|
||||
|
||||
```bash
|
||||
export GOMP_GPU_AFFINITY="0-19"
|
||||
export GOMP_CPU_AFFINITY="0-19"
|
||||
export BLIS_NUM_THREADS=14
|
||||
```
|
||||
|
||||
|
||||
@@ -29,6 +29,7 @@ else()
|
||||
add_subdirectory(speculative)
|
||||
add_subdirectory(parallel)
|
||||
add_subdirectory(embd-input)
|
||||
add_subdirectory(llava)
|
||||
add_subdirectory(llama-bench)
|
||||
add_subdirectory(beam-search)
|
||||
if (LLAMA_METAL)
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
set(TARGET clip)
|
||||
add_library(${TARGET} clip.cpp clip.h)
|
||||
install(TARGETS ${TARGET} LIBRARY)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if (NOT MSVC)
|
||||
target_compile_options(${TARGET} PRIVATE -Wno-cast-qual) # stb_image.h
|
||||
endif()
|
||||
if(TARGET BUILD_INFO)
|
||||
add_dependencies(${TARGET} BUILD_INFO)
|
||||
endif()
|
||||
|
||||
set(TARGET llava)
|
||||
add_executable(${TARGET} llava.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama clip ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if(TARGET BUILD_INFO)
|
||||
add_dependencies(${TARGET} BUILD_INFO)
|
||||
endif()
|
||||
@@ -0,0 +1,57 @@
|
||||
# LLaVA
|
||||
|
||||
Currently this implementation supports [llava-v1.5](https://huggingface.co/liuhaotian/llava-v1.5-7b) variants.
|
||||
|
||||
The pre-converted [7b](https://huggingface.co/mys/ggml_llava-v1.5-7b)
|
||||
and [13b](https://huggingface.co/mys/ggml_llava-v1.5-13b)
|
||||
models are available.
|
||||
|
||||
After API is confirmed, more models will be supported / uploaded.
|
||||
|
||||
## Usage
|
||||
Build with cmake or run `make llava` to build it.
|
||||
|
||||
After building, run: `./llava` to see the usage. For example:
|
||||
|
||||
```sh
|
||||
./llava -m llava-v1.5-7b/ggml-model-q5_k.gguf --mmproj llava-v1.5-7b/mmproj-model-f16.gguf --image path/to/an/image.jpg
|
||||
```
|
||||
|
||||
**note**: A lower temperature like 0.1 is recommended for better quality. add `--temp 0.1` to the command to do so.
|
||||
|
||||
## Model conversion
|
||||
|
||||
- Clone `llava-v15-7b`` and `clip-vit-large-patch14-336`` locally:
|
||||
|
||||
```sh
|
||||
git clone https://huggingface.co/liuhaotian/llava-v1.5-7b
|
||||
|
||||
git clone https://huggingface.co/openai/clip-vit-large-patch14-336
|
||||
```
|
||||
|
||||
2. Use `llava-surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents:
|
||||
|
||||
```sh
|
||||
python ./examples/llava/llava-surgery.py -m ../llava-v1.5-7b
|
||||
```
|
||||
|
||||
3. Use `convert-image-encoder-to-gguf.py` to convert the LLaVA image encoder to GGUF:
|
||||
|
||||
```sh
|
||||
python ./examples/llava/convert-image-encoder-to-gguf -m ../clip-vit-large-patch14-336 --llava-projector ../llava-v1.5-7b/llava.projector --output-dir ../llava-v1.5-7b
|
||||
```
|
||||
|
||||
4. Use `convert.py` to convert the LLaMA part of LLaVA to GGUF:
|
||||
|
||||
```sh
|
||||
python ./convert.py ../llava-v1.5-7b
|
||||
```
|
||||
|
||||
Now both the LLaMA part and the image encoder is in the `llava-v1.5-7b` directory.
|
||||
|
||||
## TODO
|
||||
|
||||
- [ ] Support server mode.
|
||||
- [ ] Support non-CPU backend for the image encoding part.
|
||||
- [ ] Support different sampling methods.
|
||||
- [ ] Support more model variants.
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,73 @@
|
||||
#ifndef CLIP_H
|
||||
#define CLIP_H
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
struct clip_ctx;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct clip_vision_hparams {
|
||||
int32_t image_size;
|
||||
int32_t patch_size;
|
||||
int32_t hidden_size;
|
||||
int32_t n_intermediate;
|
||||
int32_t projection_dim;
|
||||
int32_t n_head;
|
||||
int32_t n_layer;
|
||||
float eps;
|
||||
};
|
||||
|
||||
struct clip_ctx * clip_model_load(const char * fname, const int verbosity);
|
||||
|
||||
void clip_free(struct clip_ctx * ctx);
|
||||
|
||||
size_t clip_embd_nbytes(struct clip_ctx * ctx);
|
||||
int clip_n_patches(struct clip_ctx * ctx);
|
||||
int clip_n_mmproj_embd(struct clip_ctx * ctx);
|
||||
|
||||
// RGB uint8 image
|
||||
struct clip_image_u8 {
|
||||
int nx;
|
||||
int ny;
|
||||
uint8_t * data;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
// RGB float32 image (NHWC)
|
||||
// Memory layout: RGBRGBRGB...
|
||||
struct clip_image_f32 {
|
||||
int nx;
|
||||
int ny;
|
||||
float * data;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
struct clip_image_u8_batch {
|
||||
struct clip_image_u8 * data;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
struct clip_image_f32_batch {
|
||||
struct clip_image_f32 * data;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
struct clip_image_u8 * make_clip_image_u8();
|
||||
struct clip_image_f32 * make_clip_image_f32();
|
||||
bool clip_image_load_from_file(const char * fname, struct clip_image_u8 * img);
|
||||
bool clip_image_preprocess(const struct clip_ctx * ctx, const struct clip_image_u8 * img, struct clip_image_f32 * res, const bool pad2square);
|
||||
bool clip_image_encode(const struct clip_ctx * ctx, const int n_threads, struct clip_image_f32 * img, float * vec);
|
||||
|
||||
bool clip_image_batch_encode(const struct clip_ctx * ctx, const int n_threads, const struct clip_image_f32_batch * imgs,
|
||||
float * vec);
|
||||
|
||||
bool clip_model_quantize(const char * fname_inp, const char * fname_out, const int itype);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // CLIP_H
|
||||
@@ -0,0 +1,250 @@
|
||||
import argparse
|
||||
import os
|
||||
import json
|
||||
|
||||
import torch
|
||||
import numpy as np
|
||||
from gguf import *
|
||||
from transformers import CLIPModel, CLIPProcessor
|
||||
|
||||
TEXT = "clip.text"
|
||||
VISION = "clip.vision"
|
||||
|
||||
|
||||
def k(raw_key: str, arch: str) -> str:
|
||||
return raw_key.format(arch=arch)
|
||||
|
||||
|
||||
def should_skip_tensor(name: str, has_text: bool, has_vision: bool, has_llava: bool) -> bool:
|
||||
if name in (
|
||||
"logit_scale",
|
||||
"text_model.embeddings.position_ids",
|
||||
"vision_model.embeddings.position_ids",
|
||||
):
|
||||
return True
|
||||
|
||||
if has_llava and name in ["visual_projection.weight", "vision_model.post_layernorm.weight", "vision_model.post_layernorm.bias"]:
|
||||
return True
|
||||
|
||||
if name.startswith("v") and not has_vision:
|
||||
return True
|
||||
|
||||
if name.startswith("t") and not has_text:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def get_tensor_name(name: str) -> str:
|
||||
if "projection" in name:
|
||||
return name
|
||||
|
||||
if "mm_projector" in name:
|
||||
return name.replace("model.mm_projector", "mm")
|
||||
|
||||
return name.replace("text_model", "t").replace("vision_model", "v").replace("encoder.layers", "blk").replace("embeddings.", "").replace("_proj", "").replace("self_attn.", "attn_").replace("layer_norm", "ln").replace("layernorm", "ln").replace("mlp.fc1", "ffn_down").replace("mlp.fc2", "ffn_up").replace("embedding", "embd").replace("final", "post").replace("layrnorm", "ln")
|
||||
|
||||
|
||||
def bytes_to_unicode():
|
||||
"""
|
||||
Returns list of utf-8 byte and a corresponding list of unicode strings.
|
||||
The reversible bpe codes work on unicode strings.
|
||||
This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
|
||||
When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
|
||||
This is a signficant percentage of your normal, say, 32K bpe vocab.
|
||||
To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
|
||||
And avoids mapping to whitespace/control characters the bpe code barfs on.
|
||||
"""
|
||||
bs = (
|
||||
list(range(ord("!"), ord("~") + 1))
|
||||
+ list(range(ord("¡"), ord("¬") + 1))
|
||||
+ list(range(ord("®"), ord("ÿ") + 1))
|
||||
)
|
||||
cs = bs[:]
|
||||
n = 0
|
||||
for b in range(2**8):
|
||||
if b not in bs:
|
||||
bs.append(b)
|
||||
cs.append(2**8 + n)
|
||||
n += 1
|
||||
cs = [chr(n) for n in cs]
|
||||
return dict(zip(bs, cs))
|
||||
|
||||
|
||||
ap = argparse.ArgumentParser(prog="convert_hf_to_gguf.py")
|
||||
ap.add_argument("-m", "--model-dir", help="Path to model directory cloned from HF Hub", required=True)
|
||||
ap.add_argument("--use-f32", action="store_true", default=False, help="Use f32 instead of f16")
|
||||
ap.add_argument("--text-only", action="store_true", required=False,
|
||||
help="Save a text-only model. It can't be used to encode images")
|
||||
ap.add_argument("--vision-only", action="store_true", required=False,
|
||||
help="Save a vision-only model. It can't be used to encode texts")
|
||||
ap.add_argument("--llava-projector", help="Path to llava.projector file. If specified, save an image encoder for LLaVA models.")
|
||||
ap.add_argument("--image-mean", nargs=3, type=float, required=False, help="Override image mean values")
|
||||
ap.add_argument("--image-std", nargs=3, type=float, required=False, help="Override image std values")
|
||||
ap.add_argument("-o", "--output-dir", help="Directory to save GGUF files. Default is the original model directory", default=None)
|
||||
|
||||
args = ap.parse_args()
|
||||
|
||||
|
||||
if args.text_only and args.vision_only:
|
||||
print("--text-only and --image-only arguments cannot be specified at the same time.")
|
||||
exit(1)
|
||||
|
||||
if args.use_f32:
|
||||
print("WARNING: Weights for the convolution op is always saved in f16, as the convolution op in GGML does not support 32-bit kernel weights yet.")
|
||||
|
||||
# output in the same directory as the model if output_dir is None
|
||||
dir_model = args.model_dir
|
||||
|
||||
|
||||
with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f:
|
||||
vocab = json.load(f)
|
||||
tokens = [key for key in vocab]
|
||||
|
||||
with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
|
||||
config = json.load(f)
|
||||
v_hparams = config["vision_config"]
|
||||
t_hparams = config["text_config"]
|
||||
|
||||
# possible data types
|
||||
# ftype == 0 -> float32
|
||||
# ftype == 1 -> float16
|
||||
#
|
||||
# map from ftype to string
|
||||
ftype_str = ["f32", "f16"]
|
||||
|
||||
ftype = 1
|
||||
if args.use_f32:
|
||||
ftype = 0
|
||||
|
||||
|
||||
model = CLIPModel.from_pretrained(dir_model)
|
||||
processor = CLIPProcessor.from_pretrained(dir_model)
|
||||
|
||||
fname_middle = None
|
||||
has_text_encoder = True
|
||||
has_vision_encoder = True
|
||||
has_llava_projector = False
|
||||
if args.text_only:
|
||||
fname_middle = "text-"
|
||||
has_vision_encoder = False
|
||||
elif args.vision_only:
|
||||
fname_middle = "vision-"
|
||||
has_text_encoder = False
|
||||
elif args.llava_projector is not None:
|
||||
fname_middle = "mmproj-"
|
||||
has_text_encoder = False
|
||||
has_llava_projector = True
|
||||
else:
|
||||
fname_middle = ""
|
||||
|
||||
output_dir = args.output_dir if args.output_dir is not None else dir_model
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
output_prefix = os.path.basename(output_dir).replace("ggml_", "")
|
||||
fname_out = os.path.join(output_dir, f"{fname_middle}model-{ftype_str[ftype]}.gguf")
|
||||
fout = GGUFWriter(path=fname_out, arch="clip")
|
||||
|
||||
fout.add_bool("clip.has_text_encoder", has_text_encoder)
|
||||
fout.add_bool("clip.has_vision_encoder", has_vision_encoder)
|
||||
fout.add_bool("clip.has_llava_projector", has_llava_projector)
|
||||
fout.add_file_type(ftype)
|
||||
model_name = config["_name_or_path"] if "_name_or_path" in config else os.path.basename(dir_model)
|
||||
fout.add_name(model_name)
|
||||
if args.text_only:
|
||||
fout.add_description("text-only CLIP model")
|
||||
elif args.vision_only and not has_llava_projector:
|
||||
fout.add_description("vision-only CLIP model")
|
||||
elif has_llava_projector:
|
||||
fout.add_description("image encoder for LLaVA")
|
||||
else:
|
||||
fout.add_description("two-tower CLIP model")
|
||||
|
||||
if has_text_encoder:
|
||||
# text_model hparams
|
||||
fout.add_uint32(k(KEY_CONTEXT_LENGTH, TEXT), t_hparams["max_position_embeddings"])
|
||||
fout.add_uint32(k(KEY_EMBEDDING_LENGTH, TEXT), t_hparams["hidden_size"])
|
||||
fout.add_uint32(k(KEY_FEED_FORWARD_LENGTH, TEXT), t_hparams["intermediate_size"])
|
||||
fout.add_uint32("clip.text.projection_dim", t_hparams.get("projection_dim", config["projection_dim"]))
|
||||
fout.add_uint32(k(KEY_ATTENTION_HEAD_COUNT, TEXT), t_hparams["num_attention_heads"])
|
||||
fout.add_float32(k(KEY_ATTENTION_LAYERNORM_EPS, TEXT), t_hparams["layer_norm_eps"])
|
||||
fout.add_uint32(k(KEY_BLOCK_COUNT, TEXT), t_hparams["num_hidden_layers"])
|
||||
fout.add_token_list(tokens)
|
||||
|
||||
if has_vision_encoder:
|
||||
# vision_model hparams
|
||||
fout.add_uint32("clip.vision.image_size", v_hparams["image_size"])
|
||||
fout.add_uint32("clip.vision.patch_size", v_hparams["patch_size"])
|
||||
fout.add_uint32(k(KEY_EMBEDDING_LENGTH, VISION), v_hparams["hidden_size"])
|
||||
fout.add_uint32(k(KEY_FEED_FORWARD_LENGTH, VISION), v_hparams["intermediate_size"])
|
||||
fout.add_uint32("clip.vision.projection_dim", v_hparams.get("projection_dim", config["projection_dim"]))
|
||||
fout.add_uint32(k(KEY_ATTENTION_HEAD_COUNT, VISION), v_hparams["num_attention_heads"])
|
||||
fout.add_float32(k(KEY_ATTENTION_LAYERNORM_EPS, VISION), v_hparams["layer_norm_eps"])
|
||||
block_count = v_hparams["num_hidden_layers"] - 1 if has_llava_projector else v_hparams["num_hidden_layers"]
|
||||
fout.add_uint32(k(KEY_BLOCK_COUNT, VISION), block_count)
|
||||
|
||||
image_mean = processor.image_processor.image_mean if args.image_mean is None else args.image_mean
|
||||
image_std = processor.image_processor.image_std if args.image_std is None else args.image_std
|
||||
fout.add_array("clip.vision.image_mean", image_mean)
|
||||
fout.add_array("clip.vision.image_std", image_std)
|
||||
|
||||
use_gelu = v_hparams["hidden_act"] == "gelu"
|
||||
fout.add_bool("clip.use_gelu", use_gelu)
|
||||
|
||||
|
||||
if has_llava_projector:
|
||||
model.vision_model.encoder.layers.pop(-1)
|
||||
projector = torch.load(args.llava_projector)
|
||||
for name, data in projector.items():
|
||||
name = get_tensor_name(name)
|
||||
if data.ndim == 2:
|
||||
data = data.squeeze().numpy().astype(np.float16)
|
||||
else:
|
||||
data = data.squeeze().numpy().astype(np.float32)
|
||||
|
||||
fout.add_tensor(name, data)
|
||||
|
||||
print("Projector tensors added\n")
|
||||
|
||||
state_dict = model.state_dict()
|
||||
for name, data in state_dict.items():
|
||||
if should_skip_tensor(name, has_text_encoder, has_vision_encoder, has_llava_projector):
|
||||
# we don't need this
|
||||
print(f"skipping parameter: {name}")
|
||||
continue
|
||||
|
||||
name = get_tensor_name(name)
|
||||
data = data.squeeze().numpy()
|
||||
|
||||
n_dims = len(data.shape)
|
||||
|
||||
# ftype == 0 -> float32, ftype == 1 -> float16
|
||||
ftype_cur = 0
|
||||
if n_dims == 4:
|
||||
print(f"tensor {name} is always saved in f16")
|
||||
data = data.astype(np.float16)
|
||||
ftype_cur = 1
|
||||
elif ftype == 1:
|
||||
if name[-7:] == ".weight" and n_dims == 2:
|
||||
print(" Converting to float16")
|
||||
data = data.astype(np.float16)
|
||||
ftype_cur = 1
|
||||
else:
|
||||
print(" Converting to float32")
|
||||
data = data.astype(np.float32)
|
||||
ftype_cur = 0
|
||||
else:
|
||||
if data.dtype != np.float32:
|
||||
print(" Converting to float32")
|
||||
data = data.astype(np.float32)
|
||||
ftype_cur = 0
|
||||
|
||||
print(f"{name} - {ftype_str[ftype_cur]} - shape = {data.shape}")
|
||||
fout.add_tensor(name, data)
|
||||
|
||||
|
||||
fout.write_header_to_file()
|
||||
fout.write_kv_data_to_file()
|
||||
fout.write_tensors_to_file()
|
||||
fout.close()
|
||||
|
||||
print("Done. Output file: " + fname_out)
|
||||
@@ -0,0 +1,30 @@
|
||||
import argparse
|
||||
import glob
|
||||
import os
|
||||
import torch
|
||||
|
||||
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("-m", "--model", help="Path to LLaVA v1.5 model")
|
||||
args = ap.parse_args()
|
||||
|
||||
# find the model part that includes the the multimodal projector weights
|
||||
path = sorted(glob.glob(f"{args.model}/pytorch_model*.bin"))[-1]
|
||||
checkpoint = torch.load(path)
|
||||
|
||||
# get a list of mm tensor names
|
||||
mm_tensors = [k for k, v in checkpoint.items() if k.startswith("model.mm_projector")]
|
||||
|
||||
# store these tensors in a new dictionary and torch.save them
|
||||
projector = {name: checkpoint[name] for name in mm_tensors}
|
||||
torch.save(projector, f"{args.model}/llava.projector")
|
||||
|
||||
# remove these tensors from the checkpoint and save it again
|
||||
for name in mm_tensors:
|
||||
del checkpoint[name]
|
||||
|
||||
torch.save(checkpoint, path)
|
||||
|
||||
print("Done!")
|
||||
print(f"Now you can convert {args.model} to a a regular LLaMA GGUF file.")
|
||||
print(f"Also, use {args.model}/llava.projector to prepare a llava-encoder.gguf file.")
|
||||
@@ -0,0 +1,145 @@
|
||||
#pragma once
|
||||
|
||||
// this one and clip lib will be eventually merged to a single lib, let's keep it this way for now
|
||||
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <vector>
|
||||
|
||||
inline bool eval_image_embd(llama_context * ctx_llama, float * embd, int N, int n_batch, int * n_past) {
|
||||
int n_embd = llama_n_embd(llama_get_model(ctx_llama));
|
||||
|
||||
for (int i = 0; i < N; i += n_batch) {
|
||||
int n_eval = N - i;
|
||||
if (n_eval > n_batch) {
|
||||
n_eval = n_batch;
|
||||
}
|
||||
llama_batch batch = {int32_t(n_eval), nullptr, (embd+i*n_embd), nullptr, nullptr, nullptr, *n_past, 1, 0, };
|
||||
if (llama_decode(ctx_llama, batch)) {
|
||||
fprintf(stderr, "%s : failed to eval\n", __func__);
|
||||
return false;
|
||||
}
|
||||
*n_past += n_eval;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_token> tokens, int n_batch, int * n_past) {
|
||||
int N = (int) tokens.size();
|
||||
for (int i = 0; i < N; i += n_batch) {
|
||||
int n_eval = (int) tokens.size() - i;
|
||||
if (n_eval > n_batch) {
|
||||
n_eval = n_batch;
|
||||
}
|
||||
if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval, *n_past, 0))) {
|
||||
fprintf(stderr, "%s : failed to eval\n", __func__);
|
||||
return false;
|
||||
}
|
||||
*n_past += n_eval;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool eval_id(struct llama_context * ctx_llama, int id, int * n_past) {
|
||||
std::vector<llama_token> tokens;
|
||||
tokens.push_back(id);
|
||||
return eval_tokens(ctx_llama, tokens, 1, n_past);
|
||||
}
|
||||
|
||||
inline bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past){
|
||||
std::string str2 = str;
|
||||
std::vector<llama_token> embd_inp = ::llama_tokenize(ctx_llama, str2, true);
|
||||
eval_tokens(ctx_llama, embd_inp, n_batch, n_past);
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO: use common/sampling.h
|
||||
inline llama_token sample_id(llama_context * ctx_llama, gpt_params & params) {
|
||||
// out of user input, sample next token
|
||||
const float temp = params.sampling_params.temp;
|
||||
const int32_t top_k = params.sampling_params.top_k <= 0 ? llama_n_vocab(llama_get_model(ctx_llama)) : params.sampling_params.top_k;
|
||||
const float top_p = params.sampling_params.top_p;
|
||||
const float tfs_z = params.sampling_params.tfs_z;
|
||||
const float typical_p = params.sampling_params.typical_p;
|
||||
// const int32_t repeat_last_n = params.sampling_params.repeat_last_n < 0 ? n_ctx : params.sampling_params.repeat_last_n;
|
||||
// const float repeat_penalty = params.sampling_params.repeat_penalty;
|
||||
// const float alpha_presence = params.sampling_params.presence_penalty;
|
||||
// const float alpha_frequency = params.sampling_params.frequency_penalty;
|
||||
const int mirostat = params.sampling_params.mirostat;
|
||||
const float mirostat_tau = params.sampling_params.mirostat_tau;
|
||||
const float mirostat_eta = params.sampling_params.mirostat_eta;
|
||||
// const bool penalize_nl = params.sampling_params.penalize_nl;
|
||||
|
||||
llama_token id = 0;
|
||||
{
|
||||
auto logits = llama_get_logits(ctx_llama);
|
||||
auto n_vocab = llama_n_vocab(llama_get_model(ctx_llama));
|
||||
|
||||
// Apply params.logit_bias map
|
||||
for (auto it = params.sampling_params.logit_bias.begin(); it != params.sampling_params.logit_bias.end(); it++) {
|
||||
logits[it->first] += it->second;
|
||||
}
|
||||
|
||||
std::vector<llama_token_data> candidates;
|
||||
candidates.reserve(n_vocab);
|
||||
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
||||
candidates.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
|
||||
}
|
||||
|
||||
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
||||
|
||||
// TODO: Apply penalties
|
||||
// float nl_logit = logits[llama_token_nl(ctx)];
|
||||
// auto last_n_repeat = std::min(std::min((int)last_n_tokens.size(), repeat_last_n), n_ctx);
|
||||
// llama_sample_repetition_penalty(ctx, &candidates_p,
|
||||
// last_n_tokens.data() + last_n_tokens.size() - last_n_repeat,
|
||||
// last_n_repeat, repeat_penalty);
|
||||
// llama_sample_frequency_and_presence_penalties(ctx, &candidates_p,
|
||||
// last_n_tokens.data() + last_n_tokens.size() - last_n_repeat,
|
||||
// last_n_repeat, alpha_frequency, alpha_presence);
|
||||
// if (!penalize_nl) {
|
||||
// logits[llama_token_nl(ctx)] = nl_logit;
|
||||
// }
|
||||
|
||||
if (temp <= 0) {
|
||||
// Greedy sampling
|
||||
id = llama_sample_token_greedy(ctx_llama, &candidates_p);
|
||||
} else {
|
||||
if (mirostat == 1) {
|
||||
static float mirostat_mu = 2.0f * mirostat_tau;
|
||||
const int mirostat_m = 100;
|
||||
llama_sample_temp(ctx_llama, &candidates_p, temp);
|
||||
id = llama_sample_token_mirostat(ctx_llama, &candidates_p, mirostat_tau, mirostat_eta, mirostat_m, &mirostat_mu);
|
||||
} else if (mirostat == 2) {
|
||||
static float mirostat_mu = 2.0f * mirostat_tau;
|
||||
llama_sample_temp(ctx_llama, &candidates_p, temp);
|
||||
id = llama_sample_token_mirostat_v2(ctx_llama, &candidates_p, mirostat_tau, mirostat_eta, &mirostat_mu);
|
||||
} else {
|
||||
// Temperature sampling
|
||||
llama_sample_top_k(ctx_llama, &candidates_p, top_k, 1);
|
||||
llama_sample_tail_free(ctx_llama, &candidates_p, tfs_z, 1);
|
||||
llama_sample_typical(ctx_llama, &candidates_p, typical_p, 1);
|
||||
llama_sample_top_p(ctx_llama, &candidates_p, top_p, 1);
|
||||
llama_sample_temp(ctx_llama, &candidates_p, temp);
|
||||
id = llama_sample_token(ctx_llama, &candidates_p);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
inline const char * sample(struct llama_context * ctx_llama, gpt_params & params, int * n_past) {
|
||||
int id = sample_id(ctx_llama, params);
|
||||
static std::string ret;
|
||||
if (id == llama_token_eos(ctx_llama)) {
|
||||
ret = "</s>";
|
||||
} else {
|
||||
ret = llama_token_to_piece(ctx_llama, id);
|
||||
}
|
||||
eval_id(ctx_llama, id, n_past);
|
||||
return ret.c_str();
|
||||
}
|
||||
@@ -0,0 +1,156 @@
|
||||
#include "clip.h"
|
||||
#include "llava-utils.h"
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <vector>
|
||||
|
||||
static void show_additional_info(int /*argc*/, char ** argv) {
|
||||
printf("\n example usage: %s -m <llava-v1.5-7b/ggml-model-q5_k.gguf> --mmproj <llava-v1.5-7b/mmproj-model-f16.gguf> --image <path/to/an/image.jpg> [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]);
|
||||
printf(" note: a lower temperature value like 0.1 is recommended for better quality.\n");
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
ggml_time_init();
|
||||
|
||||
gpt_params params;
|
||||
|
||||
if (!gpt_params_parse(argc, argv, params)) {
|
||||
show_additional_info(argc, argv);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (params.mmproj.empty() || params.image.empty()) {
|
||||
gpt_print_usage(argc, argv, params);
|
||||
show_additional_info(argc, argv);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char * clip_path = params.mmproj.c_str();
|
||||
const char * img_path = params.image.c_str();
|
||||
|
||||
if (params.prompt.empty()) {
|
||||
params.prompt = "describe the image in detail.";
|
||||
}
|
||||
|
||||
auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/ 1);
|
||||
|
||||
// load and preprocess the image
|
||||
clip_image_u8 img;
|
||||
clip_image_f32 img_res;
|
||||
|
||||
if (!clip_image_load_from_file(img_path, &img)) {
|
||||
fprintf(stderr, "%s: is %s really an image file?\n", __func__, img_path);
|
||||
|
||||
clip_free(ctx_clip);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!clip_image_preprocess(ctx_clip, &img, &img_res, /*pad2square =*/ true)) {
|
||||
fprintf(stderr, "%s: unable to preprocess %s\n", __func__, img_path);
|
||||
|
||||
clip_free(ctx_clip);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int n_img_pos = clip_n_patches(ctx_clip);
|
||||
int n_img_embd = clip_n_mmproj_embd(ctx_clip);
|
||||
|
||||
float * image_embd = (float *)malloc(clip_embd_nbytes(ctx_clip));
|
||||
|
||||
if (!image_embd) {
|
||||
fprintf(stderr, "Unable to allocate memory for image embeddings\n");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
const int64_t t_img_enc_start_us = ggml_time_us();
|
||||
if (!clip_image_encode(ctx_clip, params.n_threads, &img_res, image_embd)) {
|
||||
fprintf(stderr, "Unable to encode image\n");
|
||||
|
||||
return 1;
|
||||
}
|
||||
const int64_t t_img_enc_end_us = ggml_time_us();
|
||||
|
||||
// we get the embeddings, free up the memory required for CLIP
|
||||
clip_free(ctx_clip);
|
||||
|
||||
llama_backend_init(params.numa);
|
||||
|
||||
llama_model_params model_params = llama_model_default_params();
|
||||
llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params);
|
||||
if (model == NULL) {
|
||||
fprintf(stderr , "%s: error: unable to load model\n" , __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
llama_context_params ctx_params = llama_context_default_params();
|
||||
|
||||
ctx_params.n_ctx = params.n_ctx < 2048 ? 2048 : params.n_ctx; // we need a longer context size to process image embeddings
|
||||
ctx_params.n_threads = params.n_threads;
|
||||
ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
|
||||
|
||||
llama_context * ctx_llama = llama_new_context_with_model(model, ctx_params);
|
||||
|
||||
if (ctx_llama == NULL) {
|
||||
fprintf(stderr , "%s: error: failed to create the llama_context\n" , __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// make sure that the correct mmproj was used, i.e., compare apples to apples
|
||||
int n_llama_embd = llama_n_embd(llama_get_model(ctx_llama));
|
||||
if (n_img_embd != n_llama_embd) {
|
||||
printf("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_img_embd, n_llama_embd);
|
||||
|
||||
llama_free(ctx_llama);
|
||||
llama_free_model(model);
|
||||
llama_backend_free();
|
||||
free(image_embd);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// process the prompt
|
||||
// llava chat format is "<system_prompt>USER: <image_embeddings>\n<textual_prompt>\nASSISTANT:"
|
||||
|
||||
int n_past = 0;
|
||||
|
||||
const int max_tgt_len = params.n_predict < 0 ? 256 : params.n_predict;
|
||||
|
||||
// GG: are we sure that the should be a trailing whitespace at the end of this string?
|
||||
eval_string(ctx_llama, "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER: ", params.n_batch, &n_past);
|
||||
eval_image_embd(ctx_llama, image_embd, n_img_pos, params.n_batch, &n_past);
|
||||
eval_string(ctx_llama, params.prompt.c_str(), params.n_batch, &n_past);
|
||||
eval_string(ctx_llama, "\nASSISTANT:", params.n_batch, &n_past);
|
||||
|
||||
// generate the response
|
||||
|
||||
printf("\n");
|
||||
|
||||
for (int i = 0; i < max_tgt_len; i++) {
|
||||
const char * tmp = sample(ctx_llama, params, &n_past);
|
||||
if (strcmp(tmp, "</s>") == 0) break;
|
||||
|
||||
printf("%s", tmp);
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
|
||||
{
|
||||
const float t_img_enc_ms = (t_img_enc_end_us - t_img_enc_start_us) / 1000.0;
|
||||
|
||||
printf("\n%s: image encoded in %8.2f ms by CLIP (%8.2f ms per image patch)\n", __func__, t_img_enc_ms, t_img_enc_ms / n_img_pos);
|
||||
}
|
||||
|
||||
llama_print_timings(ctx_llama);
|
||||
|
||||
llama_free(ctx_llama);
|
||||
llama_free_model(model);
|
||||
llama_backend_free();
|
||||
free(image_embd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -69,6 +69,8 @@ struct client {
|
||||
std::string response;
|
||||
|
||||
std::vector<llama_token> tokens_prev;
|
||||
|
||||
llama_sampling_context ctx_sampling;
|
||||
};
|
||||
|
||||
static void print_date_time() {
|
||||
@@ -125,8 +127,6 @@ int main(int argc, char ** argv) {
|
||||
params.logits_all = true;
|
||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
||||
|
||||
llama_sampling_context ctx_sampling = llama_sampling_context_init(params, NULL);
|
||||
|
||||
// load the prompts from an external file if there are any
|
||||
if (params.prompt.empty()) {
|
||||
printf("\n\033[32mNo new questions so proceed with build-in defaults.\033[0m\n");
|
||||
@@ -156,6 +156,7 @@ int main(int argc, char ** argv) {
|
||||
client.id = i;
|
||||
client.tokens_prev.resize(std::max(256, params.n_predict));
|
||||
std::fill(client.tokens_prev.begin(), client.tokens_prev.end(), 0);
|
||||
client.ctx_sampling = llama_sampling_context_init(params, NULL);
|
||||
}
|
||||
|
||||
std::vector<llama_token_data> candidates;
|
||||
@@ -341,7 +342,7 @@ int main(int argc, char ** argv) {
|
||||
//printf("client %d, seq %d, token %d, pos %d, batch %d\n",
|
||||
// client.id, client.seq_id, client.sampled, client.n_decoded, client.i_batch);
|
||||
|
||||
const llama_token id = llama_sampling_sample(ctx, NULL, ctx_sampling, client.tokens_prev, candidates, client.i_batch - i, client.seq_id);
|
||||
const llama_token id = llama_sampling_sample(ctx, NULL, client.ctx_sampling, client.tokens_prev, candidates, client.i_batch - i);
|
||||
|
||||
if (client.n_decoded == 1) {
|
||||
// start measuring generation time after the first token to make sure all concurrent clients
|
||||
@@ -386,7 +387,7 @@ int main(int argc, char ** argv) {
|
||||
|
||||
n_total_prompt += client.n_prompt;
|
||||
n_total_gen += client.n_decoded;
|
||||
llama_sampling_context_reset(ctx_sampling, client.seq_id);
|
||||
|
||||
client.seq_id = -1;
|
||||
}
|
||||
|
||||
|
||||
+2184
-2003
File diff suppressed because it is too large
Load Diff
@@ -136,6 +136,11 @@
|
||||
display: block;
|
||||
}
|
||||
|
||||
fieldset label.slim {
|
||||
margin: 0 0.5em;
|
||||
display: inline;
|
||||
}
|
||||
|
||||
header, footer {
|
||||
text-align: center;
|
||||
}
|
||||
@@ -145,6 +150,14 @@
|
||||
color: #888;
|
||||
}
|
||||
|
||||
.mode-chat textarea[name=prompt] {
|
||||
height: 4.5em;
|
||||
}
|
||||
|
||||
.mode-completion textarea[name=prompt] {
|
||||
height: 10em;
|
||||
}
|
||||
|
||||
|
||||
@keyframes loading-bg-wipe {
|
||||
0% {
|
||||
@@ -187,7 +200,7 @@
|
||||
template: "{{prompt}}\n\n{{history}}\n{{char}}:",
|
||||
historyTemplate: "{{name}}: {{message}}",
|
||||
transcript: [],
|
||||
type: "chat",
|
||||
type: "chat", // "chat" | "completion"
|
||||
char: "Llama",
|
||||
user: "User",
|
||||
})
|
||||
@@ -365,13 +378,44 @@
|
||||
return String(str).replaceAll(/\{\{(.*?)\}\}/g, (_, key) => template(settings[key]));
|
||||
}
|
||||
|
||||
async function runLlama(prompt, llamaParams, char) {
|
||||
const currentMessages = [];
|
||||
const history = session.value.transcript;
|
||||
if (controller.value) {
|
||||
throw new Error("already running");
|
||||
}
|
||||
controller.value = new AbortController();
|
||||
for await (const chunk of llama(prompt, llamaParams, {controller: controller.value})) {
|
||||
const data = chunk.data;
|
||||
|
||||
if (data.stop) {
|
||||
while (
|
||||
currentMessages.length > 0 &&
|
||||
currentMessages[currentMessages.length - 1].content.match(/\n$/) != null
|
||||
) {
|
||||
currentMessages.pop();
|
||||
}
|
||||
transcriptUpdate([...history, [char, currentMessages]])
|
||||
console.log("Completion finished: '", currentMessages.map(msg => msg.content).join(''), "', summary: ", data);
|
||||
} else {
|
||||
currentMessages.push(data);
|
||||
transcriptUpdate([...history, [char, currentMessages]])
|
||||
}
|
||||
|
||||
if (data.timings) {
|
||||
llamaStats.value = data.timings;
|
||||
}
|
||||
}
|
||||
|
||||
controller.value = null;
|
||||
}
|
||||
|
||||
// send message to server
|
||||
const chat = async (msg) => {
|
||||
if (controller.value) {
|
||||
console.log('already running...');
|
||||
return;
|
||||
}
|
||||
controller.value = new AbortController();
|
||||
|
||||
transcriptUpdate([...session.value.transcript, ["{{user}}", msg]])
|
||||
|
||||
@@ -391,55 +435,41 @@
|
||||
).join("\n"),
|
||||
});
|
||||
|
||||
const currentMessages = [];
|
||||
const history = session.value.transcript
|
||||
|
||||
const llamaParams = {
|
||||
await runLlama(prompt, {
|
||||
...params.value,
|
||||
stop: ["</s>", template("{{char}}:"), template("{{user}}:")],
|
||||
}, "{{char}}");
|
||||
}
|
||||
|
||||
const runCompletion = async () => {
|
||||
if (controller.value) {
|
||||
console.log('already running...');
|
||||
return;
|
||||
}
|
||||
const {prompt} = session.value;
|
||||
transcriptUpdate([...session.value.transcript, ["", prompt]]);
|
||||
await runLlama(prompt, {
|
||||
...params.value,
|
||||
stop: [],
|
||||
}, "");
|
||||
}
|
||||
|
||||
for await (const chunk of llama(prompt, llamaParams, { controller: controller.value })) {
|
||||
const data = chunk.data;
|
||||
|
||||
if (data.stop) {
|
||||
while (
|
||||
currentMessages.length > 0 &&
|
||||
currentMessages[currentMessages.length - 1].content.match(/\n$/) != null
|
||||
) {
|
||||
currentMessages.pop();
|
||||
}
|
||||
transcriptUpdate([...history, ["{{char}}", currentMessages]])
|
||||
console.log("Completion finished: '", currentMessages.map(msg => msg.content).join(''), "', summary: ", data);
|
||||
} else {
|
||||
currentMessages.push(data);
|
||||
transcriptUpdate([...history, ["{{char}}", currentMessages]])
|
||||
}
|
||||
|
||||
if (data.timings) {
|
||||
llamaStats.value = data.timings;
|
||||
}
|
||||
const stop = (e) => {
|
||||
e.preventDefault();
|
||||
if (controller.value) {
|
||||
controller.value.abort();
|
||||
controller.value = null;
|
||||
}
|
||||
}
|
||||
|
||||
controller.value = null;
|
||||
const reset = (e) => {
|
||||
stop(e);
|
||||
transcriptUpdate([]);
|
||||
}
|
||||
|
||||
function MessageInput() {
|
||||
const message = useSignal("")
|
||||
|
||||
const stop = (e) => {
|
||||
e.preventDefault();
|
||||
if (controller.value) {
|
||||
controller.value.abort();
|
||||
controller.value = null;
|
||||
}
|
||||
}
|
||||
|
||||
const reset = (e) => {
|
||||
stop(e);
|
||||
transcriptUpdate([]);
|
||||
}
|
||||
|
||||
const submit = (e) => {
|
||||
stop(e);
|
||||
chat(message.value);
|
||||
@@ -474,6 +504,19 @@
|
||||
`
|
||||
}
|
||||
|
||||
function CompletionControls() {
|
||||
const submit = (e) => {
|
||||
stop(e);
|
||||
runCompletion();
|
||||
}
|
||||
return html`
|
||||
<div>
|
||||
<button onclick=${submit} type="button" disabled=${generating.value}>Start</button>
|
||||
<button onclick=${stop} disabled=${!generating.value}>Stop</button>
|
||||
<button onclick=${reset}>Reset</button>
|
||||
</div>`;
|
||||
}
|
||||
|
||||
const ChatLog = (props) => {
|
||||
const messages = session.value.transcript;
|
||||
const container = useRef(null)
|
||||
@@ -497,7 +540,11 @@
|
||||
data;
|
||||
message = html`<${Markdownish} text=${template(text)} />`
|
||||
}
|
||||
return html`<p key=${index}><strong>${template(user)}:</strong> ${message}</p>`
|
||||
if(user) {
|
||||
return html`<p key=${index}><strong>${template(user)}:</strong> ${message}</p>`
|
||||
} else {
|
||||
return html`<p key=${index}>${message}</p>`
|
||||
}
|
||||
};
|
||||
|
||||
return html`
|
||||
@@ -574,18 +621,31 @@
|
||||
userTemplateAutosave()
|
||||
}, [session.value, params.value])
|
||||
|
||||
return html`
|
||||
<form>
|
||||
<fieldset>
|
||||
<${UserTemplateResetButton}/>
|
||||
</fieldset>
|
||||
const GrammarControl = () => (
|
||||
html`
|
||||
<div>
|
||||
<label for="template">Grammar</label>
|
||||
<textarea id="grammar" name="grammar" placeholder="Use gbnf or JSON Schema+convert" value="${params.value.grammar}" rows=4 oninput=${updateParams}/>
|
||||
<input type="text" name="prop-order" placeholder="order: prop1,prop2,prop3" oninput=${updateGrammarJsonSchemaPropOrder} />
|
||||
<button type="button" onclick=${convertJSONSchemaGrammar}>Convert JSON Schema</button>
|
||||
</div>
|
||||
`
|
||||
);
|
||||
|
||||
<fieldset>
|
||||
<div>
|
||||
<label for="prompt">Prompt</label>
|
||||
<textarea type="text" name="prompt" value="${session.value.prompt}" rows=4 oninput=${updateSession}/>
|
||||
</div>
|
||||
</fieldset>
|
||||
const PromptControlFieldSet = () => (
|
||||
html`
|
||||
<fieldset>
|
||||
<div>
|
||||
<label htmlFor="prompt">Prompt</label>
|
||||
<textarea type="text" name="prompt" value="${session.value.prompt}" oninput=${updateSession}/>
|
||||
</div>
|
||||
</fieldset>
|
||||
`
|
||||
);
|
||||
|
||||
const ChatConfigForm = () => (
|
||||
html`
|
||||
${PromptControlFieldSet()}
|
||||
|
||||
<fieldset class="two">
|
||||
<div>
|
||||
@@ -609,15 +669,30 @@
|
||||
<label for="template">Chat history template</label>
|
||||
<textarea id="template" name="historyTemplate" value="${session.value.historyTemplate}" rows=1 oninput=${updateSession}/>
|
||||
</div>
|
||||
${GrammarControl()}
|
||||
</fieldset>
|
||||
`
|
||||
);
|
||||
|
||||
const CompletionConfigForm = () => (
|
||||
html`
|
||||
${PromptControlFieldSet()}
|
||||
<fieldset>${GrammarControl()}</fieldset>
|
||||
`
|
||||
);
|
||||
|
||||
return html`
|
||||
<form>
|
||||
<fieldset class="two">
|
||||
<${UserTemplateResetButton}/>
|
||||
<div>
|
||||
<label for="template">Grammar</label>
|
||||
<textarea id="grammar" name="grammar" placeholder="Use gbnf or JSON Schema+convert" value="${params.value.grammar}" rows=4 oninput=${updateParams}/>
|
||||
<input type="text" name="prop-order" placeholder="order: prop1,prop2,prop3" oninput=${updateGrammarJsonSchemaPropOrder} />
|
||||
<button type="button" onclick=${convertJSONSchemaGrammar}>Convert JSON Schema</button>
|
||||
<label class="slim"><input type="radio" name="type" value="chat" checked=${session.value.type === "chat"} oninput=${updateSession} /> Chat</label>
|
||||
<label class="slim"><input type="radio" name="type" value="completion" checked=${session.value.type === "completion"} oninput=${updateSession} /> Completion</label>
|
||||
</div>
|
||||
</fieldset>
|
||||
|
||||
${session.value.type === 'chat' ? ChatConfigForm() : CompletionConfigForm()}
|
||||
|
||||
<fieldset class="two">
|
||||
${IntField({label: "Predictions", max: 2048, min: -1, name: "n_predict", value: params.value.n_predict})}
|
||||
${FloatField({label: "Temperature", max: 1.5, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature})}
|
||||
@@ -851,7 +926,7 @@
|
||||
function App(props) {
|
||||
|
||||
return html`
|
||||
<div>
|
||||
<div class="mode-${session.value.type}">
|
||||
<header>
|
||||
<h1>llama.cpp</h1>
|
||||
</header>
|
||||
@@ -861,7 +936,7 @@
|
||||
</main>
|
||||
|
||||
<section id="write">
|
||||
<${MessageInput} />
|
||||
<${session.value.type === 'chat' ? MessageInput : CompletionControls} />
|
||||
</section>
|
||||
|
||||
<footer>
|
||||
|
||||
@@ -405,6 +405,7 @@ struct llama_server_context
|
||||
// compare the evaluated prompt with the new prompt
|
||||
n_past = common_part(embd, prompt_tokens);
|
||||
embd = prompt_tokens;
|
||||
|
||||
if (n_past == num_prompt_tokens)
|
||||
{
|
||||
// we have to evaluate at least 1 token to generate logits.
|
||||
@@ -412,6 +413,9 @@ struct llama_server_context
|
||||
n_past--;
|
||||
}
|
||||
|
||||
// since #3228 we now have to manually manage the KV cache
|
||||
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
|
||||
|
||||
LOG_VERBOSE("prompt ingested", {
|
||||
{"n_past", n_past},
|
||||
{"cached", tokens_to_str(ctx, embd.cbegin(), embd.cbegin() + n_past)},
|
||||
@@ -461,9 +465,6 @@ struct llama_server_context
|
||||
// compare the evaluated prompt with the new prompt
|
||||
n_past = common_part(embd, prompt_tokens);
|
||||
|
||||
// since #3228 we now have to manually manage the KV cache
|
||||
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
|
||||
|
||||
embd = prompt_tokens;
|
||||
if (n_past == num_prompt_tokens)
|
||||
{
|
||||
@@ -471,6 +472,9 @@ struct llama_server_context
|
||||
n_past--;
|
||||
}
|
||||
|
||||
// since #3228 we now have to manually manage the KV cache
|
||||
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
|
||||
|
||||
LOG_VERBOSE("prompt ingested", {
|
||||
{"n_past", n_past},
|
||||
{"cached", tokens_to_str(ctx, embd.cbegin(), embd.cbegin() + n_past)},
|
||||
|
||||
@@ -9,6 +9,12 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
struct seq_draft {
|
||||
std::vector<llama_token> tokens;
|
||||
|
||||
struct llama_grammar * grammar = NULL;
|
||||
};
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
gpt_params params;
|
||||
|
||||
@@ -213,13 +219,8 @@ int main(int argc, char ** argv) {
|
||||
if (grammar_dft) {
|
||||
llama_grammar_free(grammar_dft);
|
||||
}
|
||||
// Note: Hardcoded to sequence id 0, if this ever supports parallel generation
|
||||
// that will need to change.
|
||||
auto it = ctx_sampling.sequence_contexts.find(0);
|
||||
GGML_ASSERT(it != ctx_sampling.sequence_contexts.end());
|
||||
// This is necessary because each sequence id in sequence_contexts
|
||||
// uses a copy of the original grammar.
|
||||
grammar_dft = llama_grammar_copy(it->second.grammar);
|
||||
|
||||
grammar_dft = llama_grammar_copy(ctx_sampling.grammar);
|
||||
|
||||
LOG("copied target grammar to draft grammar\n");
|
||||
}
|
||||
|
||||
@@ -14428,7 +14428,7 @@ static void ggml_compute_forward_conv_2d_f16_f32(
|
||||
int64_t t0 = ggml_perf_time_us();
|
||||
UNUSED(t0);
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
For each kanji character, write a Markdown‐formatted mnemonic that uses its keyword and the keyword of all its components.
|
||||
|
||||
Kanji: 欠 (lack of)
|
||||
Components: 𠂊 (hook claw), 人 (person)
|
||||
Mnemonic: This **person** is a pirate. He lost his hand to a crocodile many years ago. Nowadays, the ***lack of*** a hand does not bother him too much. In fact, the **hook claw** that replaces it is the mark of a true pirate, so he is quite proud of it!
|
||||
|
||||
Kanji: 類 (kind (of something))
|
||||
Components: 米 (rice), 大 (large), 頁 (page)
|
||||
Mnemonic: The waiter at a Chinese restaurant hands you a **large** menu. Each **page** has all ***kinds*** of **rice** on offer!
|
||||
|
||||
Kanji: 燃 (burn)
|
||||
Components: 火 (fire), 然 (sort of thing)
|
||||
Mnemonic: ***Burning*** things up with **fire** is just my **sort of thing**. (Spoken like a true pyromaniac.)
|
||||
|
||||
Kanji: 頂 (top of)
|
||||
Components: 丁 (street), 頁 (page)
|
||||
Mnemonic: To be at the ***top of*** your game, you need both practical knowledge (**street** smarts) and theoretical knowledge (having read many **pages**).
|
||||
|
||||
Kanji: 険 (risky and steep)
|
||||
Components: 阝 (small village), 㑒 (consensus)
|
||||
Mnemonic: Everyone agrees (there is **consensus**) that the path to the **small village** is ***risky and steep***.
|
||||
|
||||
Kanji: 困 (distressed)
|
||||
Components: 囗 (closed box), 木 (tree)
|
||||
Mnemonic: You would feel ***distressed*** too if you were a **tree** trapped in a **closed box**! I have no place to grow!
|
||||
|
||||
Kanji: 頭 (head)
|
||||
Components: 豆 (bean), 頁 (page)
|
||||
Mnemonic: What do you have in that ***head*** of yours? A **bean** for a brain? Go read more **pages** and become more knowledgeable about the world!
|
||||
|
||||
Kanji: 確 (certain)
|
||||
Components: 石 (stone), 冖 (roof without a chimney), 隹 (old bird)
|
||||
Mnemonic: An **old bird** has made a nest on your **roof**. What do you do? You call Misaka from a <cite>A ***Certain*** Scientific Railgun</cite> to get rid of it, of course! But she doesn’t really want to vaporize the poor thing, so she just throws a **stone** to scare it away. (What was the point of calling her, then‽)
|
||||
|
||||
Kanji: 魚 (fish)
|
||||
Components: 𠂊 (hook claw), 田 (rice field), 灬 (fire sparks)
|
||||
Mnemonic: Catch ***fish*** with a **hook**, collect rice from the **rice field**, cook them with **fire**… And my meal is ready!
|
||||
|
||||
Kanji: 警 (to police (something))
|
||||
Components: 敬 (respect), 言 (say)
|
||||
Mnemonic: ***To police something*** is to make people **respect** what the law **says**.
|
||||
|
||||
Kanji: 筆 (writing brush)
|
||||
Components: 竹 (bamboo), 聿 (brush)
|
||||
Mnemonic: A traditional ***writing brush*** is a **brush** made of **bamboo**.
|
||||
|
||||
Kanji: 獄 (prison)
|
||||
Components: 犭 (animal), 言 (say), 犬 (dog)
|
||||
Mnemonic: In ***prison***, like in the **animal** kingdom, only the toughest survive. You have to watch what you **say**. It’s a **dog**‐eat‐dog world.
|
||||
|
||||
Kanji: 新 (new)
|
||||
Components: 立 (standing up), 木 (tree), 斤 (axe)
|
||||
Mnemonic: In order for a ***new*** construction to be made, an empty lot is needed. If there are any **trees** **standing up**, they must be cut down with an **axe**.
|
||||
|
||||
Kanji: 怪 (suspicious)
|
||||
Components: 忄 (weak heart), 圣 (sacred)
|
||||
Mnemonic: That painting of the **Sacred** **Heart** of Jesus looks ***suspicious***. I think it might be a forgery.
|
||||
|
||||
Kanji: 温 (warm (to the touch))
|
||||
Components: 氵 (water drops), 日 (sun), 皿 (dish)
|
||||
Mnemonic: If you leave **water** on a **dish** in the **sun**, it will get ***warm***.
|
||||
|
||||
Kanji: 階 (floor (of a building))
|
||||
Components: 阝 (small village), 皆 (all)
|
||||
Mnemonic: It might be a **small village**, but, despite that, **all** of its buildings have many ***floors***. It’s a village of skyscrapers!
|
||||
|
||||
Kanji: 多 (many)
|
||||
Components: 夕 (evening (before sunset)), 夕 (evening (before sunset))
|
||||
Mnemonic: Two **evenings** in a day would be one too ***many***.
|
||||
|
||||
Kanji: 別 (separate)
|
||||
Components: 口 (mouth), 万 (ten thousand), 刂 (knife)
|
||||
Mnemonic: Tom Six is at it again. For his next flick, he wants to stitch together **ten thousand** people, **mouth**‐to‐anus. One of the most graphic and disturbing scenes will feature one of the victims using a **knife** to ***separate*** perself.
|
||||
|
||||
Kanji: 並 (line up)
|
||||
Components: 䒑 (antlers on a wall), 业 (runway)
|
||||
Mnemonic: In order to land a plane you have to ***line up*** properly with the **runway**. The things that look like **antlers** at the end of the runway are the control towers; you should follow their instructions.
|
||||
|
||||
Kanji: 姿 (figure)
|
||||
Components: 次 (next), 女 (woman)
|
||||
Mnemonic: The **next** **woman** that I date will have a perfect **figure**. Because I’m done with 3D women—it will *literally* be an anime figure!
|
||||
|
||||
Kanji: 実 (real)
|
||||
Components: 宀 (roof with a chimney), 𡗗 (three people)
|
||||
Mnemonic: Living under a **roof with a chimney** with **three people** (a wife and two children)—a happy family life—is not something I could have ever imagined. It does not feel ***real***.
|
||||
|
||||
Kanji: 謝 (apologize)
|
||||
Components: 言 (say), 射 (shoot)
|
||||
Mnemonic: **Shot** first, ***apologize*** (**say** you are sorry) later.
|
||||
|
||||
Kanji: 提 (propose)
|
||||
Components: 扌 (left hand), 是 (go with)
|
||||
Mnemonic:
|
||||
Reference in New Issue
Block a user