foldl
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎convert.py‎
Lines changed: 74 additions & 3 deletions b/‎convert.py‎
Lines changed: 74 additions & 3 deletions
diff --git a/‎docs/models.md‎
Lines changed: 3 additions & 0 deletions b/‎docs/models.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎models/chatglm.cpp‎
Lines changed: 4 additions & 2 deletions b/‎models/chatglm.cpp‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎models/chatglm.h‎
Lines changed: 6 additions & 6 deletions b/‎models/chatglm.h‎
Lines changed: 6 additions & 6 deletions
@@ -35,6 +35,7 @@ LittleAcademia[<a href="https://github.com/foldl/little-academia"   style="text-
 
 **What's New:**
 
+* 2026-03-03: GLM-OCR
 * 2026-02-22: Youtu-VL
 * 2026-02-18: Youtu-LLM
 * 2026-02-16: Voice Clone with Qwen3-TTS
 
@@ -263,6 +263,7 @@ class ModelType(Enum):
     DotsOCR                 = ModelTypeTagChatImageIn + 0x0000020
     Mistral3                = ModelTypeTagChatImageIn + 0x0000030
     StepVL                  = ModelTypeTagChatImageIn + 0x0000040
+    GLM_OCR                 = ModelTypeTagChatImageIn + 0x0000050
 
     Qwen2Audio              = ModelTypeTagChatAudioIn + 0x0000001
     Qwen3ForcedAligner      = ModelTypeTagChatAudioIn + 0x0000002
@@ -3972,6 +3973,7 @@ def get_weight_names(config):
 
 class GLM4VConverter(BaseConverter):
     MODEL_TYPE = ModelType.GLM4V
+    ASSERT_HEAD_DIM = True
 
     @classmethod
     def state_dict_pp(cls, config, state_dict):
@@ -3987,7 +3989,7 @@ def state_dict_pp(cls, config, state_dict):
                     r[name.replace('gate_up_proj.weight', 'up_proj.weight')]   = part(tensor, 1, 2).contiguous()
                 elif ('.k_proj.' in name) or ('.q_proj.' in name):
                     rope_dim = GLM4VConverter.rope_dim
-                    head_dim = GLM4VConverter.txt_config.hidden_size // GLM4VConverter.txt_config.num_attention_heads
+                    head_dim = GLM4VConverter.txt_config.head_dim
                     r[name] = permute_pair_rope_nope(tensor, tensor.shape[0] // head_dim, rope_dim)
                 else:
                     r[name] = tensor
@@ -4020,11 +4022,16 @@ def state_dict_pp(cls, config, state_dict):
     def dump_config(f, config, ggml_type):
         GLM4VConverter.txt_config = AttributeDict(config.text_config)
         txt_config = GLM4VConverter.txt_config
-        assert txt_config.attention_bias
+
         if isinstance(txt_config.eos_token_id, list):
             txt_config.eos_token_id = txt_config.eos_token_id[0]
 
-        head_dim = txt_config.hidden_size // txt_config.num_attention_heads
+        if 'head_dim' not in txt_config:
+            txt_config.head_dim = txt_config.hidden_size // txt_config.num_attention_heads
+        head_dim = txt_config.head_dim
+
+        if GLM4VConverter.ASSERT_HEAD_DIM:
+            assert head_dim == txt_config.hidden_size // txt_config.num_attention_heads
 
         rope_dim = int(txt_config.rope_parameters["partial_rotary_factor"] * head_dim)
         GLM4VConverter.rope_dim = rope_dim
@@ -4076,6 +4083,68 @@ def get_weight_names(config):
         weights += GLM4VConverter.get_vit_weight_names(config.vision_config['depth'])
         return weights
 
+class GLMOCRConverter(BaseConverter):
+    MODEL_TYPE = ModelType.GLM_OCR
+
+    @classmethod
+    def state_dict_pp(cls, config, state_dict):
+        return GLM4VConverter.state_dict_pp(config, state_dict)
+
+    @staticmethod
+    def dump_config(f, config, ggml_type):
+        print("WARNING: MTP not supported!")
+        GLM4VConverter.ASSERT_HEAD_DIM = False
+
+        GLM4VConverter.dump_config(f, config, ggml_type)
+
+        config_values = [
+            GLM4VConverter.txt_config.head_dim
+        ]
+        f.write(struct.pack("<i", *config_values))
+
+    @staticmethod
+    def get_vit_weight_names(num_layer):
+        weight_names = ["visual.downsample.weight",
+                        "visual.downsample.bias",
+                        "visual.merger.gate_proj.weight",
+                        "visual.merger.up_proj.weight",
+                        "visual.merger.down_proj.weight",
+                        "visual.merger.proj.weight",
+                        "visual.merger.post_projection_norm.weight",
+                        "visual.merger.post_projection_norm.bias",
+                        "visual.patch_embed.proj.0.weight",
+                        "visual.patch_embed.proj.bias",
+                        "visual.patch_embed.proj.1.weight",
+                        "visual.post_layernorm.weight"]
+        for i in range(num_layer):
+            weight_names += [
+                    f"visual.layers.{i}.norm1.weight",
+                    f"visual.layers.{i}.norm2.weight",
+                    f"visual.layers.{i}.attn.q_proj.weight",
+                    f"visual.layers.{i}.attn.k_proj.weight",
+                    f"visual.layers.{i}.attn.v_proj.weight",
+                    f"visual.layers.{i}.attn.o_proj.weight",
+                    f"visual.layers.{i}.attn.q_norm.weight",
+                    f"visual.layers.{i}.attn.k_norm.weight",
+                    f"visual.layers.{i}.mlp.gate_proj.weight",
+                    f"visual.layers.{i}.mlp.up_proj.weight",
+                    f"visual.layers.{i}.mlp.down_proj.weight",
+                    f"visual.layers.{i}.attn.q_proj.bias",
+                    f"visual.layers.{i}.attn.k_proj.bias",
+                    f"visual.layers.{i}.attn.v_proj.bias",
+                    f"visual.layers.{i}.attn.o_proj.bias",
+                    f"visual.layers.{i}.mlp.gate_proj.bias",
+                    f"visual.layers.{i}.mlp.up_proj.bias",
+                    f"visual.layers.{i}.mlp.down_proj.bias",
+            ]
+        return weight_names
+
+    @staticmethod
+    def get_weight_names(config):
+        weights  = GLM4Converter.get_weight_names(GLM4VConverter.txt_config)
+        weights += GLMOCRConverter.get_vit_weight_names(config.vision_config['depth'])
+        return weights
+
 class Phi2Converter(BaseConverter):
     MODEL_TYPE = ModelType.Phi2
 
@@ -9694,6 +9763,8 @@ def main():
         DotsOCRConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
     elif arch.endswith('Glm4vForConditionalGeneration'):
         GLM4VConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
+    elif arch.endswith('GlmOcrForConditionalGeneration'):
+        GLMOCRConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
     elif arch == 'MegrezMoeForCausalLM':
         MegrezMoEConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
     elif arch == 'OuroForCausalLM':
 
@@ -460,6 +460,9 @@ Please use `--format completion` for these models.
 * Nanonets-OCR2 (`Qwen2VLForConditionalGeneration`, `Qwen2_5_VLForConditionalGeneration`)
     * [x] OCR2: [3B](https://huggingface.co/nanonets/Nanonets-OCR2-3B/tree/d0368059ad151ce9e38f526890cfd4f27b28be65), [1.5B](https://huggingface.co/nanonets/Nanonets-OCR2-1.5B-exp/tree/306a9b2a65672a3dbebd9bce9a9373a9a18674a2)
 
+* GLM-OCR (`GlmOcrForConditionalGeneration`)
+    * [x] [0.7B](https://huggingface.co/zai-org/GLM-OCR/tree/677c6baa60442a451f8a8c7eabdfab32d9801a0b)
+
 ## ASR Models
 
 * GLM-ASR (`GlmAsrForConditionalGeneration`)
 
@@ -680,7 +680,7 @@ namespace chatllm::glm::glm4_0414
         return r;
     }
 
-    ConditionalGeneration::ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config, ModelType type)
+    ConditionalGeneration::ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config, ModelType type, int head_dim)
         : BaseModelForConditionalGeneration(type, config, runtime_config), config(config)
     {
         const size_t tensor_ovhd = ggml_tensor_overhead();
@@ -689,10 +689,12 @@ namespace chatllm::glm::glm4_0414
         w_ctx_.gctx = GGMLContext({.mem_size = ctx_size, .mem_buffer = nullptr, .no_alloc = true});
         w_ctx_.dtype = config.dtype;
 
+        if (head_dim < 0) head_dim = config.hidden_size / config.num_attention_heads;
+
         transformer = new ModelClass(
                             &w_ctx_, config, false,
                             config.hidden_size, config.num_attention_heads, config.num_key_value_heads,
-                            config.intermediate_size, config.max_length, config.use_attention_bias != 0, false);
+                            config.intermediate_size, head_dim, config.max_length, config.use_attention_bias != 0, false);
 
         for (int i = 0; i < config.num_hidden_layers; i++)
         {
 
@@ -187,8 +187,8 @@ namespace chatllm::glm::glm4_0414
     class GLM4SelfAttention : public RoPESelfAttention<BaseAttention>
     {
     public:
-        GLM4SelfAttention(InitContext *ctx, int hidden_size, int num_attention_heads, int num_kv_heads, int max_length, bool qkv_bias, bool o_bias)
-            : RoPESelfAttention<BaseAttention>(ctx, hidden_size, num_attention_heads, num_kv_heads, max_length, qkv_bias, o_bias)
+        GLM4SelfAttention(InitContext *ctx, int hidden_size, int num_attention_heads, int num_kv_heads, int head_dim, int max_length, bool qkv_bias, bool o_bias)
+            : RoPESelfAttention<BaseAttention>(ctx, hidden_size, num_attention_heads, num_kv_heads, head_dim, max_length, qkv_bias, o_bias)
         {
         }
     };
@@ -197,20 +197,20 @@ namespace chatllm::glm::glm4_0414
     {
     public:
         GLM4Block(InitContext *ctx, int hidden_size, int num_attention_heads, int num_kv_heads, int intermediate_size,
-                  int max_length, bool qkv_bias, bool o_bias)
-            : LMBlock4(ctx, hidden_size, num_attention_heads, intermediate_size, num_kv_heads, max_length, qkv_bias, o_bias)
+                  int head_dim, int max_length, bool qkv_bias, bool o_bias)
+            : LMBlock4(ctx, hidden_size, num_attention_heads, intermediate_size, num_kv_heads, head_dim, max_length, qkv_bias, o_bias)
         {
             mlp.set_prec(ggml::prec::GGML_PREC_F32);
         }
     };
 
-    typedef Model<Config, Embedding, RMSNorm, GLM4Block, int, int, int, int, int, bool, bool> ModelClass;
+    typedef Model<Config, Embedding, RMSNorm, GLM4Block, int, int, int, int, int, int, bool, bool> ModelClass;
 
     class ConditionalGeneration : public BaseModelForConditionalGeneration
     {
     public:
         ConditionalGeneration() = default;
-        ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config, ModelType type = MODEL_TYPE_GLM4);
+        ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config, ModelType type = MODEL_TYPE_GLM4, int head_dim = -1);
 
         void load(ModelLoader &loader) override;
Original file line number	Diff line number	Diff line change
`@@ -680,7 +680,7 @@ namespace chatllm::glm::glm4_0414`
`680`	`680`	`return r;`
`681`	`681`	`}`
`682`	`682`
`683`		`- ConditionalGeneration::ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config, ModelType type)`
	`683`	`+ ConditionalGeneration::ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config, ModelType type, int head_dim)`
`684`	`684`	`: BaseModelForConditionalGeneration(type, config, runtime_config), config(config)`
`685`	`685`	`{`
`686`	`686`	`const size_t tensor_ovhd = ggml_tensor_overhead();`
`@@ -689,10 +689,12 @@ namespace chatllm::glm::glm4_0414`
`689`	`689`	`w_ctx_.gctx = GGMLContext({.mem_size = ctx_size, .mem_buffer = nullptr, .no_alloc = true});`
`690`	`690`	`w_ctx_.dtype = config.dtype;`
`691`	`691`
	`692`	`+ if (head_dim < 0) head_dim = config.hidden_size / config.num_attention_heads;`
	`693`	`+`
`692`	`694`	`transformer = new ModelClass(`
`693`	`695`	`&w_ctx_, config, false,`
`694`	`696`	`config.hidden_size, config.num_attention_heads, config.num_key_value_heads,`
`695`		`- config.intermediate_size, config.max_length, config.use_attention_bias != 0, false);`
	`697`	`+ config.intermediate_size, head_dim, config.max_length, config.use_attention_bias != 0, false);`
`696`	`698`
`697`	`699`	`for (int i = 0; i < config.num_hidden_layers; i++)`
`698`	`700`	`{`
Original file line number	Diff line number	Diff line change
`@@ -187,8 +187,8 @@ namespace chatllm::glm::glm4_0414`
`187`	`187`	`class GLM4SelfAttention : public RoPESelfAttention<BaseAttention>`
`188`	`188`	`{`
`189`	`189`	`public:`
`190`		`- GLM4SelfAttention(InitContext *ctx, int hidden_size, int num_attention_heads, int num_kv_heads, int max_length, bool qkv_bias, bool o_bias)`
`191`		`- : RoPESelfAttention<BaseAttention>(ctx, hidden_size, num_attention_heads, num_kv_heads, max_length, qkv_bias, o_bias)`
	`190`	`+ GLM4SelfAttention(InitContext *ctx, int hidden_size, int num_attention_heads, int num_kv_heads, int head_dim, int max_length, bool qkv_bias, bool o_bias)`
	`191`	`+ : RoPESelfAttention<BaseAttention>(ctx, hidden_size, num_attention_heads, num_kv_heads, head_dim, max_length, qkv_bias, o_bias)`
`192`	`192`	`{`
`193`	`193`	`}`
`194`	`194`	`};`
`@@ -197,20 +197,20 @@ namespace chatllm::glm::glm4_0414`
`197`	`197`	`{`
`198`	`198`	`public:`
`199`	`199`	`GLM4Block(InitContext *ctx, int hidden_size, int num_attention_heads, int num_kv_heads, int intermediate_size,`
`200`		`- int max_length, bool qkv_bias, bool o_bias)`
`201`		`- : LMBlock4(ctx, hidden_size, num_attention_heads, intermediate_size, num_kv_heads, max_length, qkv_bias, o_bias)`
	`200`	`+ int head_dim, int max_length, bool qkv_bias, bool o_bias)`
	`201`	`+ : LMBlock4(ctx, hidden_size, num_attention_heads, intermediate_size, num_kv_heads, head_dim, max_length, qkv_bias, o_bias)`
`202`	`202`	`{`
`203`	`203`	`mlp.set_prec(ggml::prec::GGML_PREC_F32);`
`204`	`204`	`}`
`205`	`205`	`};`
`206`	`206`
`207`		`- typedef Model<Config, Embedding, RMSNorm, GLM4Block, int, int, int, int, int, bool, bool> ModelClass;`
	`207`	`+ typedef Model<Config, Embedding, RMSNorm, GLM4Block, int, int, int, int, int, int, bool, bool> ModelClass;`
`208`	`208`
`209`	`209`	`class ConditionalGeneration : public BaseModelForConditionalGeneration`
`210`	`210`	`{`
`211`	`211`	`public:`
`212`	`212`	`ConditionalGeneration() = default;`
`213`		`- ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config, ModelType type = MODEL_TYPE_GLM4);`
	`213`	`+ ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config, ModelType type = MODEL_TYPE_GLM4, int head_dim = -1);`
`214`	`214`
`215`	`215`	`void load(ModelLoader &loader) override;`
`216`	`216`