diff --git "a/logs/quantize-Qwen3-Coder-Next-Q8_0.log" "b/logs/quantize-Qwen3-Coder-Next-Q8_0.log"
new file mode 100644--- /dev/null
+++ "b/logs/quantize-Qwen3-Coder-Next-Q8_0.log"
@@ -0,0 +1,910 @@
+numactl -N ${SOCKET} -m ${SOCKET} \
+./build/bin/llama-quantize \
+    --pure \
+    /mnt/data/models/ubergarm/Qwen3-Coder-Next-GGUF/Qwen3-Coder-Next-512x2.5B-BF16-00001-of-00004.gguf \
+    /mnt/data/models/ubergarm/Qwen3-Coder-Next-GGUF/Qwen3-Coder-Next-Q8_0.gguf \
+    Q8_0 \
+    128
+
+main: build = 4211 (b2cb4512)
+main: built with cc (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0 for x86_64-linux-gnu
+main: quantizing '/mnt/data/models/ubergarm/Qwen3-Coder-Next-GGUF/Qwen3-Coder-Next-512x2.5B-BF16-00001-of-00004.gguf' to '/mnt/data/models/ubergarm/Qwen3-Coder-Next-GGUF/Qwen3.5-Coder-Next-Q8_0.gguf' as Q8_0 using 128 threads
+llama_model_loader: additional 3 GGUFs metadata loaded.
+llama_model_loader: loaded meta data with 46 key-value pairs and 843 tensors from /mnt/data/models/ubergarm/Qwen3-Coder-Next-GGUF/Qwen3-Coder-Next-512x2.5B-BF16-00001-of-00004.gguf (version GGUF V3 (latest))
+llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
+llama_model_loader: - kv   0:                       general.architecture str              = qwen3next
+llama_model_loader: - kv   1:                               general.type str              = model
+llama_model_loader: - kv   2:                     general.sampling.top_k i32              = 40
+llama_model_loader: - kv   3:                     general.sampling.top_p f32              = 0.950000
+llama_model_loader: - kv   4:                      general.sampling.temp f32              = 1.000000
+llama_model_loader: - kv   5:                               general.name str              = Qwen3 Coder Next
+llama_model_loader: - kv   6:                         general.size_label str              = 512x2.5B
+llama_model_loader: - kv   7:                            general.license str              = apache-2.0
+llama_model_loader: - kv   8:                       general.license.link str              = https://huggingface.co/Qwen/Qwen3-Cod...
+llama_model_loader: - kv   9:                               general.tags arr[str,1]       = ["text-generation"]
+llama_model_loader: - kv  10:                      qwen3next.block_count u32              = 48
+llama_model_loader: - kv  11:                   qwen3next.context_length u32              = 262144
+llama_model_loader: - kv  12:                 qwen3next.embedding_length u32              = 2048
+llama_model_loader: - kv  13:              qwen3next.feed_forward_length u32              = 5120
+llama_model_loader: - kv  14:             qwen3next.attention.head_count u32              = 16
+llama_model_loader: - kv  15:          qwen3next.attention.head_count_kv u32              = 2
+llama_model_loader: - kv  16:                   qwen3next.rope.freq_base f32              = 5000000.000000
+llama_model_loader: - kv  17: qwen3next.attention.layer_norm_rms_epsilon f32              = 0.000001
+llama_model_loader: - kv  18:                     qwen3next.expert_count u32              = 512
+llama_model_loader: - kv  19:                qwen3next.expert_used_count u32              = 10
+llama_model_loader: - kv  20:             qwen3next.attention.key_length u32              = 256
+llama_model_loader: - kv  21:           qwen3next.attention.value_length u32              = 256
+llama_model_loader: - kv  22:                          general.file_type u32              = 32
+llama_model_loader: - kv  23:       qwen3next.expert_feed_forward_length u32              = 512
+llama_model_loader: - kv  24: qwen3next.expert_shared_feed_forward_length u32              = 512
+llama_model_loader: - kv  25:                  qwen3next.ssm.conv_kernel u32              = 4
+llama_model_loader: - kv  26:                   qwen3next.ssm.state_size u32              = 128
+llama_model_loader: - kv  27:                  qwen3next.ssm.group_count u32              = 16
+llama_model_loader: - kv  28:               qwen3next.ssm.time_step_rank u32              = 32
+llama_model_loader: - kv  29:                   qwen3next.ssm.inner_size u32              = 4096
+llama_model_loader: - kv  30:          qwen3next.full_attention_interval u32              = 4
+llama_model_loader: - kv  31:             qwen3next.rope.dimension_count u32              = 64
+llama_model_loader: - kv  32:               general.quantization_version u32              = 2
+llama_model_loader: - kv  33:                       tokenizer.ggml.model str              = gpt2
+llama_model_loader: - kv  34:                         tokenizer.ggml.pre str              = qwen2
+llama_model_loader: - kv  35:                      tokenizer.ggml.tokens arr[str,151936]  = ["!", "\"", "#", "$", "%", "&", "'", ...
+llama_model_loader: - kv  36:                  tokenizer.ggml.token_type arr[i32,151936]  = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
+llama_model_loader: - kv  37:                      tokenizer.ggml.merges arr[str,151387]  = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
+llama_model_loader: - kv  38:                tokenizer.ggml.eos_token_id u32              = 151645
+llama_model_loader: - kv  39:            tokenizer.ggml.padding_token_id u32              = 151643
+llama_model_loader: - kv  40:                tokenizer.ggml.bos_token_id u32              = 151643
+llama_model_loader: - kv  41:               tokenizer.ggml.add_bos_token bool             = false
+llama_model_loader: - kv  42:                    tokenizer.chat_template str              = {% macro render_extra_keys(json_dict,...
+llama_model_loader: - kv  43:                                   split.no u16              = 0
+llama_model_loader: - kv  44:                                split.count u16              = 4
+llama_model_loader: - kv  45:                        split.tensors.count i32              = 843
+llama_model_loader: - type  f32:  361 tensors
+llama_model_loader: - type bf16:  482 tensors
+[   1/ 843]                    token_embd.weight - [ 2048, 151936,     1,     1], type =   bf16, converting to q8_0 .. size =   593.50 MiB ->   315.30 MiB
+[   2/ 843]               blk.0.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[   3/ 843]                          blk.0.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[   4/ 843]              blk.0.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[   5/ 843]                    blk.0.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[   6/ 843]                  blk.0.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[   7/ 843]                blk.0.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[   8/ 843]               blk.0.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[   9/ 843]                blk.0.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[  10/ 843]                 blk.0.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[  11/ 843]           blk.0.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[  12/ 843]           blk.0.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[  13/ 843]             blk.0.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[  14/ 843]            blk.0.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[  15/ 843]          blk.0.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[  16/ 843]          blk.0.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[  17/ 843]            blk.0.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[  18/ 843]      blk.0.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[  19/ 843]     blk.0.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[  20/ 843]               blk.1.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[  21/ 843]                          blk.1.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[  22/ 843]              blk.1.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[  23/ 843]                    blk.1.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[  24/ 843]                  blk.1.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[  25/ 843]                blk.1.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[  26/ 843]               blk.1.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[  27/ 843]                blk.1.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[  28/ 843]                 blk.1.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[  29/ 843]            blk.1.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[  30/ 843]          blk.1.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[  31/ 843]          blk.1.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[  32/ 843]            blk.1.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[  33/ 843]      blk.1.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[  34/ 843]     blk.1.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[  35/ 843]           blk.1.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[  36/ 843]           blk.1.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[  37/ 843]             blk.1.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[  38/ 843]               blk.2.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[  39/ 843]                          blk.2.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[  40/ 843]              blk.2.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[  41/ 843]                    blk.2.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[  42/ 843]                  blk.2.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[  43/ 843]                blk.2.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[  44/ 843]               blk.2.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[  45/ 843]                blk.2.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[  46/ 843]                 blk.2.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[  47/ 843]            blk.2.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[  48/ 843]          blk.2.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[  49/ 843]          blk.2.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[  50/ 843]            blk.2.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[  51/ 843]      blk.2.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[  52/ 843]     blk.2.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[  53/ 843]           blk.2.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[  54/ 843]           blk.2.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[  55/ 843]             blk.2.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[  56/ 843]               blk.3.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[  57/ 843]            blk.3.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[  58/ 843]          blk.3.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[  59/ 843]          blk.3.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[  60/ 843]            blk.3.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[  61/ 843]      blk.3.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[  62/ 843]     blk.3.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[  63/ 843]             blk.3.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[  64/ 843]                  blk.3.attn_k.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[  65/ 843]             blk.3.attn_output.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[  66/ 843]             blk.3.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[  67/ 843]                  blk.3.attn_q.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[  68/ 843]                  blk.3.attn_v.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[  69/ 843]           blk.3.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[  70/ 843]           blk.3.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[  71/ 843]             blk.3.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[  72/ 843]               blk.4.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[  73/ 843]                          blk.4.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[  74/ 843]              blk.4.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[  75/ 843]                    blk.4.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[  76/ 843]                  blk.4.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[  77/ 843]                blk.4.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[  78/ 843]               blk.4.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[  79/ 843]                blk.4.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[  80/ 843]                 blk.4.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[  81/ 843]            blk.4.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[  82/ 843]          blk.4.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[  83/ 843]          blk.4.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[  84/ 843]            blk.4.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[  85/ 843]      blk.4.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[  86/ 843]     blk.4.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[  87/ 843]           blk.4.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[  88/ 843]           blk.4.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[  89/ 843]             blk.4.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[  90/ 843]               blk.5.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[  91/ 843]                          blk.5.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[  92/ 843]              blk.5.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[  93/ 843]                    blk.5.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[  94/ 843]                  blk.5.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[  95/ 843]                blk.5.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[  96/ 843]               blk.5.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[  97/ 843]                blk.5.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[  98/ 843]                 blk.5.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[  99/ 843]            blk.5.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 100/ 843]          blk.5.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 101/ 843]          blk.5.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 102/ 843]            blk.5.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 103/ 843]      blk.5.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 104/ 843]     blk.5.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 105/ 843]           blk.5.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 106/ 843]           blk.5.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 107/ 843]             blk.5.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 108/ 843]               blk.6.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 109/ 843]                          blk.6.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 110/ 843]              blk.6.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 111/ 843]                    blk.6.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 112/ 843]                  blk.6.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 113/ 843]                blk.6.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 114/ 843]               blk.6.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 115/ 843]                blk.6.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 116/ 843]                 blk.6.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 117/ 843]           blk.6.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 118/ 843]           blk.6.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 119/ 843]             blk.6.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 120/ 843]            blk.6.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 121/ 843]          blk.6.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 122/ 843]          blk.6.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 123/ 843]            blk.6.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 124/ 843]      blk.6.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 125/ 843]     blk.6.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 126/ 843]               blk.7.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 127/ 843]            blk.7.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 128/ 843]          blk.7.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 129/ 843]          blk.7.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 130/ 843]            blk.7.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 131/ 843]      blk.7.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 132/ 843]     blk.7.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 133/ 843]             blk.7.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 134/ 843]                  blk.7.attn_k.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 135/ 843]             blk.7.attn_output.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 136/ 843]             blk.7.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 137/ 843]                  blk.7.attn_q.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 138/ 843]                  blk.7.attn_v.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 139/ 843]           blk.7.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 140/ 843]           blk.7.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 141/ 843]             blk.7.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 142/ 843]               blk.8.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 143/ 843]                          blk.8.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 144/ 843]              blk.8.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 145/ 843]                    blk.8.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 146/ 843]                  blk.8.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 147/ 843]                blk.8.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 148/ 843]               blk.8.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 149/ 843]                blk.8.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 150/ 843]                 blk.8.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 151/ 843]            blk.8.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 152/ 843]          blk.8.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 153/ 843]          blk.8.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 154/ 843]            blk.8.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 155/ 843]      blk.8.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 156/ 843]     blk.8.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 157/ 843]           blk.8.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 158/ 843]           blk.8.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 159/ 843]             blk.8.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 160/ 843]               blk.9.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 161/ 843]                          blk.9.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 162/ 843]              blk.9.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 163/ 843]                    blk.9.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 164/ 843]                  blk.9.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 165/ 843]                blk.9.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 166/ 843]               blk.9.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 167/ 843]                blk.9.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 168/ 843]                 blk.9.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 169/ 843]            blk.9.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 170/ 843]          blk.9.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 171/ 843]          blk.9.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 172/ 843]            blk.9.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 173/ 843]      blk.9.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 174/ 843]     blk.9.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 175/ 843]              blk.10.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 176/ 843]                         blk.10.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 177/ 843]             blk.10.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 178/ 843]                   blk.10.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 179/ 843]                 blk.10.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 180/ 843]               blk.10.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 181/ 843]              blk.10.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 182/ 843]               blk.10.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 183/ 843]                blk.10.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 184/ 843]           blk.10.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 185/ 843]         blk.10.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 186/ 843]         blk.10.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 187/ 843]           blk.10.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 188/ 843]     blk.10.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 189/ 843]    blk.10.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 190/ 843]           blk.9.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 191/ 843]           blk.9.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 192/ 843]             blk.9.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 193/ 843]          blk.10.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 194/ 843]          blk.10.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 195/ 843]            blk.10.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 196/ 843]              blk.11.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 197/ 843]           blk.11.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 198/ 843]         blk.11.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 199/ 843]         blk.11.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 200/ 843]           blk.11.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 201/ 843]     blk.11.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 202/ 843]    blk.11.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 203/ 843]            blk.11.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 204/ 843]                 blk.11.attn_k.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 205/ 843]            blk.11.attn_output.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 206/ 843]            blk.11.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 207/ 843]                 blk.11.attn_q.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 208/ 843]                 blk.11.attn_v.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 209/ 843]          blk.11.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 210/ 843]          blk.11.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 211/ 843]            blk.11.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 212/ 843]              blk.12.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 213/ 843]                         blk.12.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 214/ 843]             blk.12.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 215/ 843]                   blk.12.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 216/ 843]                 blk.12.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 217/ 843]               blk.12.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 218/ 843]              blk.12.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 219/ 843]               blk.12.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 220/ 843]                blk.12.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 221/ 843]          blk.12.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 222/ 843]          blk.12.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 223/ 843]            blk.12.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 224/ 843]           blk.12.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 225/ 843]         blk.12.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 226/ 843]         blk.12.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 227/ 843]           blk.12.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 228/ 843]     blk.12.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 229/ 843]    blk.12.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 230/ 843]              blk.13.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 231/ 843]                         blk.13.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 232/ 843]             blk.13.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 233/ 843]                   blk.13.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 234/ 843]                 blk.13.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 235/ 843]               blk.13.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 236/ 843]              blk.13.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 237/ 843]               blk.13.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 238/ 843]                blk.13.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 239/ 843]           blk.13.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 240/ 843]         blk.13.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 241/ 843]         blk.13.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 242/ 843]           blk.13.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 243/ 843]     blk.13.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 244/ 843]    blk.13.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 245/ 843]          blk.13.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 246/ 843]          blk.13.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 247/ 843]            blk.13.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 248/ 843]              blk.14.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 249/ 843]                         blk.14.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 250/ 843]             blk.14.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 251/ 843]                   blk.14.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 252/ 843]                 blk.14.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 253/ 843]               blk.14.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 254/ 843]              blk.14.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 255/ 843]               blk.14.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 256/ 843]                blk.14.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 257/ 843]           blk.14.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 258/ 843]         blk.14.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 259/ 843]         blk.14.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 260/ 843]           blk.14.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 261/ 843]     blk.14.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 262/ 843]    blk.14.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 263/ 843]          blk.14.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 264/ 843]          blk.14.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 265/ 843]            blk.14.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 266/ 843]              blk.15.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 267/ 843]           blk.15.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 268/ 843]         blk.15.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 269/ 843]         blk.15.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 270/ 843]           blk.15.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 271/ 843]     blk.15.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 272/ 843]    blk.15.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 273/ 843]            blk.15.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 274/ 843]                 blk.15.attn_k.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 275/ 843]            blk.15.attn_output.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 276/ 843]            blk.15.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 277/ 843]                 blk.15.attn_q.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 278/ 843]                 blk.15.attn_v.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 279/ 843]          blk.15.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 280/ 843]          blk.15.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 281/ 843]            blk.15.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 282/ 843]              blk.16.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 283/ 843]                         blk.16.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 284/ 843]             blk.16.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 285/ 843]                   blk.16.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 286/ 843]                 blk.16.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 287/ 843]               blk.16.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 288/ 843]              blk.16.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 289/ 843]               blk.16.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 290/ 843]                blk.16.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 291/ 843]           blk.16.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 292/ 843]         blk.16.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 293/ 843]         blk.16.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 294/ 843]           blk.16.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 295/ 843]     blk.16.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 296/ 843]    blk.16.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 297/ 843]          blk.16.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 298/ 843]          blk.16.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 299/ 843]            blk.16.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 300/ 843]              blk.17.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 301/ 843]                         blk.17.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 302/ 843]             blk.17.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 303/ 843]                   blk.17.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 304/ 843]                 blk.17.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 305/ 843]               blk.17.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 306/ 843]              blk.17.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 307/ 843]               blk.17.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 308/ 843]                blk.17.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 309/ 843]          blk.17.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 310/ 843]          blk.17.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 311/ 843]            blk.17.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 312/ 843]           blk.17.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 313/ 843]         blk.17.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 314/ 843]         blk.17.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 315/ 843]           blk.17.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 316/ 843]     blk.17.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 317/ 843]    blk.17.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 318/ 843]              blk.18.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 319/ 843]                         blk.18.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 320/ 843]             blk.18.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 321/ 843]                   blk.18.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 322/ 843]                 blk.18.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 323/ 843]               blk.18.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 324/ 843]              blk.18.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 325/ 843]               blk.18.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 326/ 843]                blk.18.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 327/ 843]           blk.18.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 328/ 843]         blk.18.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 329/ 843]         blk.18.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 330/ 843]           blk.18.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 331/ 843]     blk.18.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 332/ 843]    blk.18.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 333/ 843]          blk.18.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 334/ 843]          blk.18.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 335/ 843]            blk.18.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 336/ 843]              blk.19.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 337/ 843]           blk.19.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 338/ 843]         blk.19.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 339/ 843]         blk.19.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 340/ 843]           blk.19.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 341/ 843]     blk.19.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 342/ 843]    blk.19.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 343/ 843]            blk.19.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 344/ 843]                 blk.19.attn_k.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 345/ 843]            blk.19.attn_output.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 346/ 843]            blk.19.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 347/ 843]                 blk.19.attn_q.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 348/ 843]                 blk.19.attn_v.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 349/ 843]          blk.19.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 350/ 843]          blk.19.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 351/ 843]            blk.19.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 352/ 843]              blk.20.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 353/ 843]                         blk.20.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 354/ 843]             blk.20.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 355/ 843]                   blk.20.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 356/ 843]                 blk.20.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 357/ 843]               blk.20.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 358/ 843]              blk.20.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 359/ 843]               blk.20.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 360/ 843]                blk.20.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 361/ 843]           blk.20.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 362/ 843]         blk.20.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 363/ 843]         blk.20.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 364/ 843]           blk.20.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 365/ 843]     blk.20.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 366/ 843]    blk.20.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 367/ 843]          blk.20.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 368/ 843]          blk.20.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 369/ 843]            blk.20.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 370/ 843]              blk.21.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 371/ 843]                         blk.21.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 372/ 843]             blk.21.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 373/ 843]                   blk.21.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 374/ 843]                 blk.21.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 375/ 843]               blk.21.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 376/ 843]              blk.21.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 377/ 843]               blk.21.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 378/ 843]                blk.21.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 379/ 843]           blk.21.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 380/ 843]         blk.21.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 381/ 843]         blk.21.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 382/ 843]           blk.21.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 383/ 843]     blk.21.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 384/ 843]    blk.21.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 385/ 843]          blk.21.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 386/ 843]          blk.21.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 387/ 843]            blk.21.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 388/ 843]              blk.22.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 389/ 843]                         blk.22.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 390/ 843]             blk.22.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 391/ 843]                   blk.22.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 392/ 843]                 blk.22.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 393/ 843]               blk.22.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 394/ 843]              blk.22.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 395/ 843]               blk.22.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 396/ 843]                blk.22.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 397/ 843]           blk.22.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 398/ 843]         blk.22.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 399/ 843]         blk.22.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 400/ 843]           blk.22.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 401/ 843]     blk.22.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 402/ 843]    blk.22.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 403/ 843]          blk.22.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 404/ 843]          blk.22.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 405/ 843]            blk.22.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 406/ 843]              blk.23.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 407/ 843]          blk.23.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 408/ 843]          blk.23.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 409/ 843]            blk.23.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 410/ 843]           blk.23.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 411/ 843]         blk.23.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 412/ 843]         blk.23.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 413/ 843]           blk.23.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 414/ 843]     blk.23.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 415/ 843]    blk.23.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 416/ 843]            blk.23.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 417/ 843]                 blk.23.attn_k.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 418/ 843]            blk.23.attn_output.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 419/ 843]            blk.23.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 420/ 843]                 blk.23.attn_q.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 421/ 843]                 blk.23.attn_v.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 422/ 843]              blk.24.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 423/ 843]                         blk.24.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 424/ 843]             blk.24.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 425/ 843]                   blk.24.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 426/ 843]                 blk.24.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 427/ 843]               blk.24.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 428/ 843]              blk.24.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 429/ 843]               blk.24.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 430/ 843]                blk.24.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 431/ 843]           blk.24.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 432/ 843]         blk.24.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 433/ 843]         blk.24.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 434/ 843]           blk.24.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 435/ 843]     blk.24.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 436/ 843]    blk.24.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 437/ 843]          blk.24.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 438/ 843]          blk.24.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 439/ 843]            blk.24.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 440/ 843]              blk.25.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 441/ 843]                         blk.25.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 442/ 843]             blk.25.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 443/ 843]                   blk.25.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 444/ 843]                 blk.25.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 445/ 843]               blk.25.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 446/ 843]              blk.25.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 447/ 843]               blk.25.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 448/ 843]                blk.25.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 449/ 843]           blk.25.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 450/ 843]         blk.25.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 451/ 843]         blk.25.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 452/ 843]           blk.25.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 453/ 843]     blk.25.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 454/ 843]    blk.25.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 455/ 843]          blk.25.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 456/ 843]          blk.25.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 457/ 843]            blk.25.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 458/ 843]              blk.26.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 459/ 843]                         blk.26.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 460/ 843]             blk.26.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 461/ 843]                   blk.26.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 462/ 843]                 blk.26.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 463/ 843]               blk.26.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 464/ 843]              blk.26.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 465/ 843]               blk.26.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 466/ 843]                blk.26.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 467/ 843]           blk.26.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 468/ 843]         blk.26.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 469/ 843]         blk.26.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 470/ 843]           blk.26.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 471/ 843]     blk.26.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 472/ 843]    blk.26.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 473/ 843]          blk.26.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 474/ 843]          blk.26.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 475/ 843]            blk.26.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 476/ 843]              blk.27.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 477/ 843]           blk.27.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 478/ 843]         blk.27.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 479/ 843]         blk.27.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 480/ 843]           blk.27.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 481/ 843]     blk.27.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 482/ 843]    blk.27.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 483/ 843]            blk.27.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 484/ 843]                 blk.27.attn_k.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 485/ 843]            blk.27.attn_output.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 486/ 843]            blk.27.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 487/ 843]                 blk.27.attn_q.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 488/ 843]                 blk.27.attn_v.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 489/ 843]          blk.27.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 490/ 843]          blk.27.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 491/ 843]            blk.27.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 492/ 843]              blk.28.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 493/ 843]                         blk.28.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 494/ 843]             blk.28.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 495/ 843]                   blk.28.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 496/ 843]                 blk.28.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 497/ 843]               blk.28.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 498/ 843]              blk.28.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 499/ 843]               blk.28.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 500/ 843]                blk.28.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 501/ 843]           blk.28.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 502/ 843]         blk.28.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 503/ 843]         blk.28.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 504/ 843]           blk.28.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 505/ 843]     blk.28.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 506/ 843]    blk.28.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 507/ 843]          blk.28.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 508/ 843]          blk.28.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 509/ 843]            blk.28.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 510/ 843]              blk.29.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 511/ 843]                         blk.29.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 512/ 843]             blk.29.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 513/ 843]                   blk.29.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 514/ 843]                 blk.29.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 515/ 843]               blk.29.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 516/ 843]              blk.29.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 517/ 843]               blk.29.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 518/ 843]                blk.29.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 519/ 843]          blk.29.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 520/ 843]          blk.29.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 521/ 843]            blk.29.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 522/ 843]           blk.29.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 523/ 843]         blk.29.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 524/ 843]         blk.29.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 525/ 843]           blk.29.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 526/ 843]     blk.29.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 527/ 843]    blk.29.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 528/ 843]              blk.30.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 529/ 843]                         blk.30.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 530/ 843]             blk.30.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 531/ 843]                   blk.30.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 532/ 843]                 blk.30.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 533/ 843]               blk.30.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 534/ 843]              blk.30.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 535/ 843]               blk.30.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 536/ 843]                blk.30.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 537/ 843]           blk.30.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 538/ 843]         blk.30.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 539/ 843]         blk.30.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 540/ 843]           blk.30.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 541/ 843]     blk.30.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 542/ 843]    blk.30.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 543/ 843]          blk.30.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 544/ 843]          blk.30.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 545/ 843]            blk.30.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 546/ 843]              blk.31.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 547/ 843]           blk.31.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 548/ 843]         blk.31.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 549/ 843]         blk.31.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 550/ 843]           blk.31.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 551/ 843]     blk.31.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 552/ 843]    blk.31.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 553/ 843]            blk.31.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 554/ 843]                 blk.31.attn_k.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 555/ 843]            blk.31.attn_output.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 556/ 843]            blk.31.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 557/ 843]                 blk.31.attn_q.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 558/ 843]                 blk.31.attn_v.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 559/ 843]          blk.31.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 560/ 843]          blk.31.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 561/ 843]            blk.31.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 562/ 843]              blk.32.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 563/ 843]                         blk.32.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 564/ 843]             blk.32.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 565/ 843]                   blk.32.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 566/ 843]                 blk.32.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 567/ 843]               blk.32.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 568/ 843]              blk.32.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 569/ 843]               blk.32.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 570/ 843]                blk.32.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 571/ 843]           blk.32.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 572/ 843]         blk.32.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 573/ 843]         blk.32.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 574/ 843]           blk.32.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 575/ 843]     blk.32.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 576/ 843]    blk.32.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 577/ 843]          blk.32.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 578/ 843]          blk.32.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 579/ 843]            blk.32.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 580/ 843]              blk.33.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 581/ 843]                         blk.33.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 582/ 843]             blk.33.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 583/ 843]                   blk.33.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 584/ 843]                 blk.33.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 585/ 843]               blk.33.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 586/ 843]              blk.33.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 587/ 843]               blk.33.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 588/ 843]                blk.33.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 589/ 843]           blk.33.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 590/ 843]         blk.33.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 591/ 843]         blk.33.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 592/ 843]           blk.33.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 593/ 843]     blk.33.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 594/ 843]    blk.33.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 595/ 843]          blk.33.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 596/ 843]          blk.33.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 597/ 843]            blk.33.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 598/ 843]              blk.34.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 599/ 843]                         blk.34.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 600/ 843]             blk.34.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 601/ 843]                   blk.34.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 602/ 843]                 blk.34.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 603/ 843]               blk.34.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 604/ 843]              blk.34.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 605/ 843]               blk.34.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 606/ 843]                blk.34.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 607/ 843]          blk.34.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 608/ 843]          blk.34.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 609/ 843]            blk.34.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 610/ 843]           blk.34.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 611/ 843]         blk.34.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 612/ 843]         blk.34.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 613/ 843]           blk.34.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 614/ 843]     blk.34.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 615/ 843]    blk.34.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 616/ 843]              blk.35.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 617/ 843]           blk.35.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 618/ 843]         blk.35.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 619/ 843]         blk.35.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 620/ 843]           blk.35.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 621/ 843]     blk.35.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 622/ 843]    blk.35.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 623/ 843]            blk.35.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 624/ 843]                 blk.35.attn_k.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 625/ 843]            blk.35.attn_output.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 626/ 843]            blk.35.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 627/ 843]                 blk.35.attn_q.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 628/ 843]                 blk.35.attn_v.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 629/ 843]          blk.35.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 630/ 843]          blk.35.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 631/ 843]            blk.35.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 632/ 843]              blk.36.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 633/ 843]                         blk.36.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 634/ 843]             blk.36.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 635/ 843]                   blk.36.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 636/ 843]                 blk.36.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 637/ 843]               blk.36.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 638/ 843]              blk.36.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 639/ 843]               blk.36.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 640/ 843]                blk.36.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 641/ 843]           blk.36.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 642/ 843]         blk.36.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 643/ 843]         blk.36.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 644/ 843]           blk.36.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 645/ 843]     blk.36.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 646/ 843]    blk.36.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 647/ 843]          blk.36.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 648/ 843]          blk.36.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 649/ 843]            blk.36.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 650/ 843]              blk.37.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 651/ 843]                         blk.37.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 652/ 843]             blk.37.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 653/ 843]                   blk.37.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 654/ 843]                 blk.37.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 655/ 843]               blk.37.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 656/ 843]              blk.37.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 657/ 843]               blk.37.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 658/ 843]                blk.37.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 659/ 843]           blk.37.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 660/ 843]         blk.37.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 661/ 843]         blk.37.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 662/ 843]           blk.37.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 663/ 843]     blk.37.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 664/ 843]    blk.37.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 665/ 843]          blk.37.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 666/ 843]          blk.37.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 667/ 843]            blk.37.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 668/ 843]              blk.38.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 669/ 843]                         blk.38.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 670/ 843]             blk.38.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 671/ 843]                   blk.38.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 672/ 843]                 blk.38.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 673/ 843]               blk.38.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 674/ 843]              blk.38.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 675/ 843]               blk.38.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 676/ 843]                blk.38.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 677/ 843]           blk.38.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 678/ 843]         blk.38.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 679/ 843]         blk.38.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 680/ 843]           blk.38.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 681/ 843]     blk.38.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 682/ 843]    blk.38.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 683/ 843]          blk.38.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 684/ 843]          blk.38.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 685/ 843]            blk.38.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 686/ 843]              blk.39.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 687/ 843]           blk.39.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 688/ 843]         blk.39.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 689/ 843]         blk.39.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 690/ 843]           blk.39.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 691/ 843]     blk.39.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 692/ 843]    blk.39.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 693/ 843]            blk.39.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 694/ 843]                 blk.39.attn_k.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 695/ 843]            blk.39.attn_output.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 696/ 843]            blk.39.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 697/ 843]                 blk.39.attn_q.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 698/ 843]                 blk.39.attn_v.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 699/ 843]          blk.39.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 700/ 843]          blk.39.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 701/ 843]            blk.39.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 702/ 843]              blk.40.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 703/ 843]                         blk.40.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 704/ 843]             blk.40.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 705/ 843]                   blk.40.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 706/ 843]                 blk.40.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 707/ 843]               blk.40.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 708/ 843]              blk.40.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 709/ 843]               blk.40.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 710/ 843]                blk.40.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 711/ 843]          blk.40.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 712/ 843]          blk.40.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 713/ 843]            blk.40.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 714/ 843]           blk.40.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 715/ 843]         blk.40.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 716/ 843]         blk.40.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 717/ 843]           blk.40.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 718/ 843]     blk.40.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 719/ 843]    blk.40.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 720/ 843]              blk.41.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 721/ 843]                         blk.41.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 722/ 843]             blk.41.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 723/ 843]                   blk.41.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 724/ 843]                 blk.41.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 725/ 843]               blk.41.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 726/ 843]              blk.41.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 727/ 843]               blk.41.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 728/ 843]                blk.41.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 729/ 843]           blk.41.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 730/ 843]         blk.41.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 731/ 843]         blk.41.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 732/ 843]           blk.41.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 733/ 843]     blk.41.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 734/ 843]    blk.41.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 735/ 843]          blk.41.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 736/ 843]          blk.41.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 737/ 843]            blk.41.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 738/ 843]              blk.42.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 739/ 843]                         blk.42.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 740/ 843]             blk.42.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 741/ 843]                   blk.42.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 742/ 843]                 blk.42.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 743/ 843]               blk.42.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 744/ 843]              blk.42.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 745/ 843]               blk.42.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 746/ 843]                blk.42.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 747/ 843]           blk.42.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 748/ 843]         blk.42.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 749/ 843]         blk.42.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 750/ 843]           blk.42.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 751/ 843]     blk.42.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 752/ 843]    blk.42.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 753/ 843]          blk.42.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 754/ 843]          blk.42.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 755/ 843]            blk.42.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 756/ 843]              blk.43.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 757/ 843]           blk.43.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 758/ 843]         blk.43.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 759/ 843]         blk.43.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 760/ 843]           blk.43.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 761/ 843]     blk.43.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 762/ 843]    blk.43.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 763/ 843]            blk.43.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 764/ 843]                 blk.43.attn_k.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 765/ 843]            blk.43.attn_output.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 766/ 843]            blk.43.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 767/ 843]                 blk.43.attn_q.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 768/ 843]                 blk.43.attn_v.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 769/ 843]          blk.43.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 770/ 843]          blk.43.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 771/ 843]            blk.43.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 772/ 843]              blk.44.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 773/ 843]                         blk.44.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 774/ 843]             blk.44.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 775/ 843]                   blk.44.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 776/ 843]                 blk.44.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 777/ 843]               blk.44.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 778/ 843]              blk.44.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 779/ 843]               blk.44.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 780/ 843]                blk.44.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 781/ 843]           blk.44.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 782/ 843]         blk.44.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 783/ 843]         blk.44.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 784/ 843]           blk.44.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 785/ 843]     blk.44.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 786/ 843]    blk.44.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 787/ 843]          blk.44.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 788/ 843]          blk.44.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 789/ 843]            blk.44.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 790/ 843]              blk.45.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 791/ 843]                         blk.45.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 792/ 843]             blk.45.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 793/ 843]                   blk.45.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 794/ 843]                 blk.45.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 795/ 843]               blk.45.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 796/ 843]              blk.45.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 797/ 843]               blk.45.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 798/ 843]                blk.45.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 799/ 843]           blk.45.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 800/ 843]         blk.45.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 801/ 843]         blk.45.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 802/ 843]           blk.45.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 803/ 843]     blk.45.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 804/ 843]    blk.45.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 805/ 843]          blk.45.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 806/ 843]          blk.45.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 807/ 843]            blk.45.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 808/ 843]              blk.46.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 809/ 843]                         blk.46.ssm_a - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 810/ 843]             blk.46.ssm_conv1d.weight - [    4,  8192,     1,     1], type =    f32, size =    0.125 MB
+[ 811/ 843]                   blk.46.ssm_dt.bias - [   32,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 812/ 843]                 blk.46.ssm_ba.weight - [ 2048,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.25 MiB ->     0.13 MiB
+[ 813/ 843]               blk.46.attn_qkv.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 814/ 843]              blk.46.attn_gate.weight - [ 2048,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 815/ 843]               blk.46.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
+[ 816/ 843]                blk.46.ssm_out.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 817/ 843]          blk.46.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 818/ 843]          blk.46.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 819/ 843]            blk.46.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 820/ 843]           blk.46.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 821/ 843]         blk.46.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 822/ 843]         blk.46.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 823/ 843]           blk.46.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 824/ 843]     blk.46.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 825/ 843]    blk.46.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 826/ 843]              blk.47.attn_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 827/ 843]           blk.47.ffn_gate_inp.weight - [ 2048,   512,     1,     1], type =    f32, size =    4.000 MB
+[ 828/ 843]         blk.47.ffn_down_shexp.weight - [  512,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 829/ 843]         blk.47.ffn_gate_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 830/ 843]           blk.47.ffn_up_shexp.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 831/ 843]     blk.47.ffn_gate_inp_shexp.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 832/ 843]    blk.47.post_attention_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+[ 833/ 843]            blk.47.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 834/ 843]                 blk.47.attn_k.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 835/ 843]            blk.47.attn_output.weight - [ 4096,  2048,     1,     1], type =   bf16, converting to q8_0 .. size =    16.00 MiB ->     8.50 MiB
+[ 836/ 843]            blk.47.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MB
+[ 837/ 843]                 blk.47.attn_q.weight - [ 2048,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    32.00 MiB ->    17.00 MiB
+[ 838/ 843]                 blk.47.attn_v.weight - [ 2048,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     2.00 MiB ->     1.06 MiB
+[ 839/ 843]                        output.weight - [ 2048, 151936,     1,     1], type =   bf16, converting to q8_0 .. size =   593.50 MiB ->   315.30 MiB
+[ 840/ 843]          blk.47.ffn_down_exps.weight - [  512,  2048,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 841/ 843]          blk.47.ffn_gate_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 842/ 843]            blk.47.ffn_up_exps.weight - [ 2048,   512,   512,     1], type =   bf16, converting to q8_0 .. size =  1024.00 MiB ->   544.00 MiB
+[ 843/ 843]                   output_norm.weight - [ 2048,     1,     1,     1], type =    f32, size =    0.008 MB
+llama_model_quantize_internal: model size  = 152065.68 MB
+llama_model_quantize_internal: quant size  = 80877.56 MB
+
+main: quantize time = 183367.66 ms
+main:    total time = 183367.66 ms