temporary refactor llama_vision_graph_builder

ngxson · Jan 22, 2025 · 9716c7b · 9716c7b
1 parent 32daa38
commit 9716c7b
Show file tree

Hide file tree

Showing 3 changed files with 174 additions and 120 deletions.
diff --git a/examples/vision/vision.cpp b/examples/vision/vision.cpp
@@ -50,7 +50,7 @@ static llama_vision_bitmap * load_image_from_file(const char * fname) {
 }
 
 // split string by a `std::string delim` instead of `char delim`
-static std::vector<std::string> string_split(std::string s, const std::string & delimiter) {
+static std::vector<std::string> string_split_str(std::string s, const std::string & delimiter) {
     std::vector<std::string> tokens;
     size_t pos = 0;
     std::string token;
@@ -76,7 +76,7 @@ static std::vector<tokenized_part> tokenize_with_img_placement(
         const std::string & text,
         bool   add_special,
         bool   parse_special) {
-    std::vector<std::string> parts = string_split(text, IMG_PLACEMENT);
+    std::vector<std::string> parts = string_split_str(text, IMG_PLACEMENT);
     std::vector<tokenized_part> output;
     for (const auto & part : parts) {
         //printf("tokenizing part: %s\n", part.c_str());
@@ -114,6 +114,10 @@ int main(int argc, char ** argv) {
     llama_context * ctx = llama_init.context.get();
     const llama_model * model = llama_init.model.get();
     const llama_vocab * vocab = llama_model_get_vocab(model);
+    if (!model) {
+        LOG_ERR("failed to load model\n");
+        return 1;
+    }
 
     struct common_sampler * smpl = common_sampler_init(model, params.sampling);
 

diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -4056,6 +4056,11 @@ enum llama_rope_type llama_model_rope_type(const struct llama_model * model) {
         case LLM_ARCH_QWEN2VL:
             return LLAMA_ROPE_TYPE_MROPE;
 
+        case LLM_ARCH_VISION_LLAVA:
+        case LLM_ARCH_VISION_MOBILEVLM:
+        case LLM_ARCH_VISION_MINICPMV:
+            GGML_ABORT("vision arch does not use RoPE");
+
         // all model arches should be listed explicitly here
         case LLM_ARCH_UNKNOWN:
             GGML_ABORT("unknown architecture");