11#include " qwen.h"
22#include < optional>
3+ #include < string.h>
34#include " ../src/audio_process.h"
45#include " ../src/vision_process.h"
56#include " qwen_asr.h"
@@ -900,6 +901,11 @@ namespace chatllm::qwen::ds_r1_distill
900901
901902namespace chatllm ::qwen::vit
902903{
904+ Config::Config ()
905+ {
906+ memset (this , 0 , sizeof (Config));
907+ }
908+
903909 PatchEmbedding::PatchEmbedding (InitContext *ctx, const Config &config)
904910 : proj0(ctx, 3 , config.hidden_size, config.patch_size, config.patch_size, 0 , 1 , 1 , false ),
905911 proj1 (ctx, 3 , config.hidden_size, config.patch_size, config.patch_size, 0 , 1 , 1 , false )
@@ -1366,9 +1372,9 @@ namespace chatllm::qwen::vit
13661372 :
13671373 GRAPH_SIZE(GRAPH_SIZE), _ctx(&backend_context),
13681374 n_threads(runtime_config.n_threads),
1375+ vis_config(),
13691376 max_patches(max_patches)
13701377 {
1371- memset (&vis_config, 0 , sizeof (vis_config));
13721378 _ctx.cache_dtype = runtime_config.cache_type ;
13731379 model_gpu_layers = BackendContext::get_ngl_of_model (runtime_config.model_gpu_layers , " vis" );
13741380 }
@@ -1391,8 +1397,10 @@ namespace chatllm::qwen::vit
13911397 const auto vis_cfg = config[" config.json" ][" vision_config" ];
13921398 if (!vis_cfg.IsObject ()) return false ;
13931399
1400+ int full_cnt = 0 ;
1401+
13941402 vis_config.dtype = dtype;
1395- vis_config.is_ver_2_0 = vis_cfg [" model_type" ].ToString () == " qwen2_vl" ;
1403+ vis_config.is_ver_2_0 = config[ " config.json " ] [" model_type" ].ToString () == " qwen2_vl" ;
13961404
13971405 vis_config.patch_size = (int )vis_cfg[" patch_size" ].ToInt ();
13981406 vis_config.num_attention_heads = (int )vis_cfg[" num_heads" ].ToInt ();
@@ -1420,7 +1428,10 @@ namespace chatllm::qwen::vit
14201428 auto indices = vis_cfg[" fullatt_block_indexes" ];
14211429 CHATLLM_CHECK ((int )indices.length () <= VIT_MAX_LAYERS);
14221430 for (int i = 0 ; i < (int )indices.length (); i++)
1431+ {
1432+ full_cnt += 1 ;
14231433 vis_config.fullatt_block_indices [indices[i].ToInt ()] = true ;
1434+ }
14241435 }
14251436
14261437 auto pp_cfg = config[" preprocessor_config.json" ];
@@ -1446,7 +1457,7 @@ namespace chatllm::qwen::vit
14461457 }
14471458
14481459 const size_t tensor_ovhd = ggml_tensor_overhead ();
1449- const size_t num_tensors = vis_config.is_ver_2_0 ? 10 + vis_config.num_hidden_layers * 18 : 5 + vis_config.num_hidden_layers * 18 ;
1460+ const size_t num_tensors = vis_config.is_ver_2_0 ? 10 + vis_config.num_hidden_layers * 18 : ( 9 + vis_config.num_hidden_layers * 18 - full_cnt) ;
14501461 const size_t ctx_size = num_tensors * tensor_ovhd;
14511462 _ctx.gctx = GGMLContext ({.mem_size = ctx_size, .mem_buffer = nullptr , .no_alloc = true });
14521463 _ctx.dtype = dtype;
0 commit comments