diff --git "a/logs/quantize-Qwen3-Coder-Next-Q8_0.log" "b/logs/quantize-Qwen3-Coder-Next-Q8_0.log" new file mode 100644--- /dev/null +++ "b/logs/quantize-Qwen3-Coder-Next-Q8_0.log" @@ -0,0 +1,910 @@ +numactl -N ${SOCKET} -m ${SOCKET} \ +./build/bin/llama-quantize \ + --pure \ + /mnt/data/models/ubergarm/Qwen3-Coder-Next-GGUF/Qwen3-Coder-Next-512x2.5B-BF16-00001-of-00004.gguf \ + /mnt/data/models/ubergarm/Qwen3-Coder-Next-GGUF/Qwen3-Coder-Next-Q8_0.gguf \ + Q8_0 \ + 128 + +main: build = 4211 (b2cb4512) +main: built with cc (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0 for x86_64-linux-gnu +main: quantizing '/mnt/data/models/ubergarm/Qwen3-Coder-Next-GGUF/Qwen3-Coder-Next-512x2.5B-BF16-00001-of-00004.gguf' to '/mnt/data/models/ubergarm/Qwen3-Coder-Next-GGUF/Qwen3.5-Coder-Next-Q8_0.gguf' as Q8_0 using 128 threads +llama_model_loader: additional 3 GGUFs metadata loaded. +llama_model_loader: loaded meta data with 46 key-value pairs and 843 tensors from /mnt/data/models/ubergarm/Qwen3-Coder-Next-GGUF/Qwen3-Coder-Next-512x2.5B-BF16-00001-of-00004.gguf (version GGUF V3 (latest)) +llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. +llama_model_loader: - kv 0: general.architecture str = qwen3next +llama_model_loader: - kv 1: general.type str = model +llama_model_loader: - kv 2: general.sampling.top_k i32 = 40 +llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000 +llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000 +llama_model_loader: - kv 5: general.name str = Qwen3 Coder Next +llama_model_loader: - kv 6: general.size_label str = 512x2.5B +llama_model_loader: - kv 7: general.license str = apache-2.0 +llama_model_loader: - kv 8: general.license.link str = https://huggingface.co/Qwen/Qwen3-Cod... +llama_model_loader: - kv 9: general.tags arr[str,1] = ["text-generation"] +llama_model_loader: - kv 10: qwen3next.block_count u32 = 48 +llama_model_loader: - kv 11: qwen3next.context_length u32 = 262144 +llama_model_loader: - kv 12: qwen3next.embedding_length u32 = 2048 +llama_model_loader: - kv 13: qwen3next.feed_forward_length u32 = 5120 +llama_model_loader: - kv 14: qwen3next.attention.head_count u32 = 16 +llama_model_loader: - kv 15: qwen3next.attention.head_count_kv u32 = 2 +llama_model_loader: - kv 16: qwen3next.rope.freq_base f32 = 5000000.000000 +llama_model_loader: - kv 17: qwen3next.attention.layer_norm_rms_epsilon f32 = 0.000001 +llama_model_loader: - kv 18: qwen3next.expert_count u32 = 512 +llama_model_loader: - kv 19: qwen3next.expert_used_count u32 = 10 +llama_model_loader: - kv 20: qwen3next.attention.key_length u32 = 256 +llama_model_loader: - kv 21: qwen3next.attention.value_length u32 = 256 +llama_model_loader: - kv 22: general.file_type u32 = 32 +llama_model_loader: - kv 23: qwen3next.expert_feed_forward_length u32 = 512 +llama_model_loader: - kv 24: qwen3next.expert_shared_feed_forward_length u32 = 512 +llama_model_loader: - kv 25: qwen3next.ssm.conv_kernel u32 = 4 +llama_model_loader: - kv 26: qwen3next.ssm.state_size u32 = 128 +llama_model_loader: - kv 27: qwen3next.ssm.group_count u32 = 16 +llama_model_loader: - kv 28: qwen3next.ssm.time_step_rank u32 = 32 +llama_model_loader: - kv 29: qwen3next.ssm.inner_size u32 = 4096 +llama_model_loader: - kv 30: qwen3next.full_attention_interval u32 = 4 +llama_model_loader: - kv 31: qwen3next.rope.dimension_count u32 = 64 +llama_model_loader: - kv 32: general.quantization_version u32 = 2 +llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2 +llama_model_loader: - kv 34: tokenizer.ggml.pre str = qwen2 +llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,151936] = ["!", "\"", "#", "$", "%", "&", "'", ... +llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,151936] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... +llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,151387] = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",... +llama_model_loader: - kv 38: tokenizer.ggml.eos_token_id u32 = 151645 +llama_model_loader: - kv 39: tokenizer.ggml.padding_token_id u32 = 151643 +llama_model_loader: - kv 40: tokenizer.ggml.bos_token_id u32 = 151643 +llama_model_loader: - kv 41: tokenizer.ggml.add_bos_token bool = false +llama_model_loader: - kv 42: tokenizer.chat_template str = {% macro render_extra_keys(json_dict,... +llama_model_loader: - kv 43: split.no u16 = 0 +llama_model_loader: - kv 44: split.count u16 = 4 +llama_model_loader: - kv 45: split.tensors.count i32 = 843 +llama_model_loader: - type f32: 361 tensors +llama_model_loader: - type bf16: 482 tensors +[ 1/ 843] token_embd.weight - [ 2048, 151936, 1, 1], type = bf16, converting to q8_0 .. size = 593.50 MiB -> 315.30 MiB +[ 2/ 843] blk.0.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 3/ 843] blk.0.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 4/ 843] blk.0.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 5/ 843] blk.0.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 6/ 843] blk.0.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 7/ 843] blk.0.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 8/ 843] blk.0.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 9/ 843] blk.0.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 10/ 843] blk.0.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 11/ 843] blk.0.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 12/ 843] blk.0.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 13/ 843] blk.0.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 14/ 843] blk.0.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 15/ 843] blk.0.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 16/ 843] blk.0.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 17/ 843] blk.0.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 18/ 843] blk.0.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 19/ 843] blk.0.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 20/ 843] blk.1.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 21/ 843] blk.1.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 22/ 843] blk.1.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 23/ 843] blk.1.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 24/ 843] blk.1.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 25/ 843] blk.1.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 26/ 843] blk.1.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 27/ 843] blk.1.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 28/ 843] blk.1.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 29/ 843] blk.1.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 30/ 843] blk.1.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 31/ 843] blk.1.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 32/ 843] blk.1.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 33/ 843] blk.1.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 34/ 843] blk.1.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 35/ 843] blk.1.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 36/ 843] blk.1.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 37/ 843] blk.1.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 38/ 843] blk.2.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 39/ 843] blk.2.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 40/ 843] blk.2.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 41/ 843] blk.2.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 42/ 843] blk.2.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 43/ 843] blk.2.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 44/ 843] blk.2.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 45/ 843] blk.2.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 46/ 843] blk.2.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 47/ 843] blk.2.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 48/ 843] blk.2.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 49/ 843] blk.2.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 50/ 843] blk.2.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 51/ 843] blk.2.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 52/ 843] blk.2.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 53/ 843] blk.2.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 54/ 843] blk.2.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 55/ 843] blk.2.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 56/ 843] blk.3.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 57/ 843] blk.3.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 58/ 843] blk.3.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 59/ 843] blk.3.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 60/ 843] blk.3.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 61/ 843] blk.3.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 62/ 843] blk.3.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 63/ 843] blk.3.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 64/ 843] blk.3.attn_k.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 65/ 843] blk.3.attn_output.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 66/ 843] blk.3.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 67/ 843] blk.3.attn_q.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 68/ 843] blk.3.attn_v.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 69/ 843] blk.3.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 70/ 843] blk.3.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 71/ 843] blk.3.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 72/ 843] blk.4.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 73/ 843] blk.4.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 74/ 843] blk.4.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 75/ 843] blk.4.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 76/ 843] blk.4.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 77/ 843] blk.4.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 78/ 843] blk.4.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 79/ 843] blk.4.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 80/ 843] blk.4.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 81/ 843] blk.4.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 82/ 843] blk.4.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 83/ 843] blk.4.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 84/ 843] blk.4.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 85/ 843] blk.4.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 86/ 843] blk.4.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 87/ 843] blk.4.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 88/ 843] blk.4.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 89/ 843] blk.4.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 90/ 843] blk.5.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 91/ 843] blk.5.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 92/ 843] blk.5.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 93/ 843] blk.5.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 94/ 843] blk.5.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 95/ 843] blk.5.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 96/ 843] blk.5.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 97/ 843] blk.5.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 98/ 843] blk.5.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 99/ 843] blk.5.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 100/ 843] blk.5.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 101/ 843] blk.5.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 102/ 843] blk.5.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 103/ 843] blk.5.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 104/ 843] blk.5.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 105/ 843] blk.5.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 106/ 843] blk.5.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 107/ 843] blk.5.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 108/ 843] blk.6.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 109/ 843] blk.6.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 110/ 843] blk.6.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 111/ 843] blk.6.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 112/ 843] blk.6.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 113/ 843] blk.6.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 114/ 843] blk.6.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 115/ 843] blk.6.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 116/ 843] blk.6.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 117/ 843] blk.6.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 118/ 843] blk.6.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 119/ 843] blk.6.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 120/ 843] blk.6.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 121/ 843] blk.6.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 122/ 843] blk.6.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 123/ 843] blk.6.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 124/ 843] blk.6.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 125/ 843] blk.6.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 126/ 843] blk.7.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 127/ 843] blk.7.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 128/ 843] blk.7.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 129/ 843] blk.7.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 130/ 843] blk.7.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 131/ 843] blk.7.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 132/ 843] blk.7.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 133/ 843] blk.7.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 134/ 843] blk.7.attn_k.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 135/ 843] blk.7.attn_output.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 136/ 843] blk.7.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 137/ 843] blk.7.attn_q.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 138/ 843] blk.7.attn_v.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 139/ 843] blk.7.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 140/ 843] blk.7.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 141/ 843] blk.7.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 142/ 843] blk.8.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 143/ 843] blk.8.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 144/ 843] blk.8.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 145/ 843] blk.8.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 146/ 843] blk.8.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 147/ 843] blk.8.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 148/ 843] blk.8.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 149/ 843] blk.8.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 150/ 843] blk.8.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 151/ 843] blk.8.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 152/ 843] blk.8.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 153/ 843] blk.8.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 154/ 843] blk.8.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 155/ 843] blk.8.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 156/ 843] blk.8.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 157/ 843] blk.8.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 158/ 843] blk.8.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 159/ 843] blk.8.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 160/ 843] blk.9.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 161/ 843] blk.9.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 162/ 843] blk.9.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 163/ 843] blk.9.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 164/ 843] blk.9.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 165/ 843] blk.9.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 166/ 843] blk.9.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 167/ 843] blk.9.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 168/ 843] blk.9.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 169/ 843] blk.9.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 170/ 843] blk.9.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 171/ 843] blk.9.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 172/ 843] blk.9.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 173/ 843] blk.9.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 174/ 843] blk.9.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 175/ 843] blk.10.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 176/ 843] blk.10.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 177/ 843] blk.10.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 178/ 843] blk.10.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 179/ 843] blk.10.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 180/ 843] blk.10.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 181/ 843] blk.10.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 182/ 843] blk.10.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 183/ 843] blk.10.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 184/ 843] blk.10.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 185/ 843] blk.10.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 186/ 843] blk.10.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 187/ 843] blk.10.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 188/ 843] blk.10.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 189/ 843] blk.10.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 190/ 843] blk.9.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 191/ 843] blk.9.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 192/ 843] blk.9.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 193/ 843] blk.10.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 194/ 843] blk.10.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 195/ 843] blk.10.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 196/ 843] blk.11.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 197/ 843] blk.11.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 198/ 843] blk.11.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 199/ 843] blk.11.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 200/ 843] blk.11.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 201/ 843] blk.11.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 202/ 843] blk.11.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 203/ 843] blk.11.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 204/ 843] blk.11.attn_k.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 205/ 843] blk.11.attn_output.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 206/ 843] blk.11.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 207/ 843] blk.11.attn_q.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 208/ 843] blk.11.attn_v.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 209/ 843] blk.11.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 210/ 843] blk.11.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 211/ 843] blk.11.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 212/ 843] blk.12.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 213/ 843] blk.12.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 214/ 843] blk.12.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 215/ 843] blk.12.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 216/ 843] blk.12.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 217/ 843] blk.12.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 218/ 843] blk.12.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 219/ 843] blk.12.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 220/ 843] blk.12.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 221/ 843] blk.12.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 222/ 843] blk.12.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 223/ 843] blk.12.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 224/ 843] blk.12.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 225/ 843] blk.12.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 226/ 843] blk.12.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 227/ 843] blk.12.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 228/ 843] blk.12.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 229/ 843] blk.12.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 230/ 843] blk.13.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 231/ 843] blk.13.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 232/ 843] blk.13.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 233/ 843] blk.13.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 234/ 843] blk.13.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 235/ 843] blk.13.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 236/ 843] blk.13.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 237/ 843] blk.13.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 238/ 843] blk.13.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 239/ 843] blk.13.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 240/ 843] blk.13.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 241/ 843] blk.13.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 242/ 843] blk.13.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 243/ 843] blk.13.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 244/ 843] blk.13.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 245/ 843] blk.13.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 246/ 843] blk.13.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 247/ 843] blk.13.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 248/ 843] blk.14.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 249/ 843] blk.14.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 250/ 843] blk.14.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 251/ 843] blk.14.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 252/ 843] blk.14.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 253/ 843] blk.14.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 254/ 843] blk.14.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 255/ 843] blk.14.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 256/ 843] blk.14.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 257/ 843] blk.14.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 258/ 843] blk.14.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 259/ 843] blk.14.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 260/ 843] blk.14.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 261/ 843] blk.14.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 262/ 843] blk.14.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 263/ 843] blk.14.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 264/ 843] blk.14.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 265/ 843] blk.14.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 266/ 843] blk.15.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 267/ 843] blk.15.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 268/ 843] blk.15.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 269/ 843] blk.15.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 270/ 843] blk.15.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 271/ 843] blk.15.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 272/ 843] blk.15.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 273/ 843] blk.15.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 274/ 843] blk.15.attn_k.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 275/ 843] blk.15.attn_output.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 276/ 843] blk.15.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 277/ 843] blk.15.attn_q.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 278/ 843] blk.15.attn_v.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 279/ 843] blk.15.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 280/ 843] blk.15.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 281/ 843] blk.15.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 282/ 843] blk.16.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 283/ 843] blk.16.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 284/ 843] blk.16.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 285/ 843] blk.16.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 286/ 843] blk.16.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 287/ 843] blk.16.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 288/ 843] blk.16.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 289/ 843] blk.16.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 290/ 843] blk.16.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 291/ 843] blk.16.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 292/ 843] blk.16.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 293/ 843] blk.16.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 294/ 843] blk.16.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 295/ 843] blk.16.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 296/ 843] blk.16.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 297/ 843] blk.16.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 298/ 843] blk.16.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 299/ 843] blk.16.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 300/ 843] blk.17.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 301/ 843] blk.17.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 302/ 843] blk.17.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 303/ 843] blk.17.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 304/ 843] blk.17.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 305/ 843] blk.17.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 306/ 843] blk.17.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 307/ 843] blk.17.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 308/ 843] blk.17.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 309/ 843] blk.17.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 310/ 843] blk.17.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 311/ 843] blk.17.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 312/ 843] blk.17.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 313/ 843] blk.17.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 314/ 843] blk.17.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 315/ 843] blk.17.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 316/ 843] blk.17.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 317/ 843] blk.17.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 318/ 843] blk.18.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 319/ 843] blk.18.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 320/ 843] blk.18.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 321/ 843] blk.18.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 322/ 843] blk.18.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 323/ 843] blk.18.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 324/ 843] blk.18.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 325/ 843] blk.18.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 326/ 843] blk.18.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 327/ 843] blk.18.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 328/ 843] blk.18.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 329/ 843] blk.18.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 330/ 843] blk.18.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 331/ 843] blk.18.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 332/ 843] blk.18.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 333/ 843] blk.18.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 334/ 843] blk.18.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 335/ 843] blk.18.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 336/ 843] blk.19.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 337/ 843] blk.19.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 338/ 843] blk.19.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 339/ 843] blk.19.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 340/ 843] blk.19.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 341/ 843] blk.19.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 342/ 843] blk.19.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 343/ 843] blk.19.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 344/ 843] blk.19.attn_k.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 345/ 843] blk.19.attn_output.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 346/ 843] blk.19.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 347/ 843] blk.19.attn_q.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 348/ 843] blk.19.attn_v.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 349/ 843] blk.19.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 350/ 843] blk.19.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 351/ 843] blk.19.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 352/ 843] blk.20.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 353/ 843] blk.20.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 354/ 843] blk.20.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 355/ 843] blk.20.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 356/ 843] blk.20.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 357/ 843] blk.20.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 358/ 843] blk.20.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 359/ 843] blk.20.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 360/ 843] blk.20.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 361/ 843] blk.20.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 362/ 843] blk.20.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 363/ 843] blk.20.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 364/ 843] blk.20.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 365/ 843] blk.20.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 366/ 843] blk.20.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 367/ 843] blk.20.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 368/ 843] blk.20.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 369/ 843] blk.20.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 370/ 843] blk.21.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 371/ 843] blk.21.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 372/ 843] blk.21.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 373/ 843] blk.21.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 374/ 843] blk.21.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 375/ 843] blk.21.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 376/ 843] blk.21.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 377/ 843] blk.21.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 378/ 843] blk.21.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 379/ 843] blk.21.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 380/ 843] blk.21.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 381/ 843] blk.21.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 382/ 843] blk.21.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 383/ 843] blk.21.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 384/ 843] blk.21.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 385/ 843] blk.21.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 386/ 843] blk.21.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 387/ 843] blk.21.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 388/ 843] blk.22.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 389/ 843] blk.22.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 390/ 843] blk.22.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 391/ 843] blk.22.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 392/ 843] blk.22.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 393/ 843] blk.22.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 394/ 843] blk.22.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 395/ 843] blk.22.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 396/ 843] blk.22.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 397/ 843] blk.22.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 398/ 843] blk.22.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 399/ 843] blk.22.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 400/ 843] blk.22.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 401/ 843] blk.22.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 402/ 843] blk.22.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 403/ 843] blk.22.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 404/ 843] blk.22.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 405/ 843] blk.22.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 406/ 843] blk.23.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 407/ 843] blk.23.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 408/ 843] blk.23.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 409/ 843] blk.23.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 410/ 843] blk.23.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 411/ 843] blk.23.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 412/ 843] blk.23.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 413/ 843] blk.23.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 414/ 843] blk.23.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 415/ 843] blk.23.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 416/ 843] blk.23.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 417/ 843] blk.23.attn_k.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 418/ 843] blk.23.attn_output.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 419/ 843] blk.23.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 420/ 843] blk.23.attn_q.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 421/ 843] blk.23.attn_v.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 422/ 843] blk.24.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 423/ 843] blk.24.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 424/ 843] blk.24.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 425/ 843] blk.24.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 426/ 843] blk.24.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 427/ 843] blk.24.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 428/ 843] blk.24.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 429/ 843] blk.24.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 430/ 843] blk.24.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 431/ 843] blk.24.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 432/ 843] blk.24.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 433/ 843] blk.24.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 434/ 843] blk.24.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 435/ 843] blk.24.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 436/ 843] blk.24.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 437/ 843] blk.24.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 438/ 843] blk.24.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 439/ 843] blk.24.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 440/ 843] blk.25.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 441/ 843] blk.25.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 442/ 843] blk.25.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 443/ 843] blk.25.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 444/ 843] blk.25.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 445/ 843] blk.25.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 446/ 843] blk.25.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 447/ 843] blk.25.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 448/ 843] blk.25.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 449/ 843] blk.25.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 450/ 843] blk.25.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 451/ 843] blk.25.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 452/ 843] blk.25.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 453/ 843] blk.25.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 454/ 843] blk.25.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 455/ 843] blk.25.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 456/ 843] blk.25.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 457/ 843] blk.25.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 458/ 843] blk.26.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 459/ 843] blk.26.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 460/ 843] blk.26.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 461/ 843] blk.26.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 462/ 843] blk.26.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 463/ 843] blk.26.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 464/ 843] blk.26.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 465/ 843] blk.26.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 466/ 843] blk.26.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 467/ 843] blk.26.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 468/ 843] blk.26.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 469/ 843] blk.26.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 470/ 843] blk.26.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 471/ 843] blk.26.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 472/ 843] blk.26.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 473/ 843] blk.26.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 474/ 843] blk.26.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 475/ 843] blk.26.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 476/ 843] blk.27.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 477/ 843] blk.27.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 478/ 843] blk.27.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 479/ 843] blk.27.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 480/ 843] blk.27.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 481/ 843] blk.27.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 482/ 843] blk.27.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 483/ 843] blk.27.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 484/ 843] blk.27.attn_k.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 485/ 843] blk.27.attn_output.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 486/ 843] blk.27.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 487/ 843] blk.27.attn_q.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 488/ 843] blk.27.attn_v.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 489/ 843] blk.27.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 490/ 843] blk.27.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 491/ 843] blk.27.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 492/ 843] blk.28.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 493/ 843] blk.28.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 494/ 843] blk.28.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 495/ 843] blk.28.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 496/ 843] blk.28.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 497/ 843] blk.28.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 498/ 843] blk.28.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 499/ 843] blk.28.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 500/ 843] blk.28.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 501/ 843] blk.28.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 502/ 843] blk.28.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 503/ 843] blk.28.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 504/ 843] blk.28.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 505/ 843] blk.28.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 506/ 843] blk.28.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 507/ 843] blk.28.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 508/ 843] blk.28.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 509/ 843] blk.28.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 510/ 843] blk.29.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 511/ 843] blk.29.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 512/ 843] blk.29.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 513/ 843] blk.29.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 514/ 843] blk.29.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 515/ 843] blk.29.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 516/ 843] blk.29.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 517/ 843] blk.29.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 518/ 843] blk.29.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 519/ 843] blk.29.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 520/ 843] blk.29.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 521/ 843] blk.29.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 522/ 843] blk.29.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 523/ 843] blk.29.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 524/ 843] blk.29.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 525/ 843] blk.29.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 526/ 843] blk.29.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 527/ 843] blk.29.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 528/ 843] blk.30.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 529/ 843] blk.30.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 530/ 843] blk.30.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 531/ 843] blk.30.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 532/ 843] blk.30.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 533/ 843] blk.30.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 534/ 843] blk.30.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 535/ 843] blk.30.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 536/ 843] blk.30.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 537/ 843] blk.30.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 538/ 843] blk.30.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 539/ 843] blk.30.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 540/ 843] blk.30.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 541/ 843] blk.30.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 542/ 843] blk.30.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 543/ 843] blk.30.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 544/ 843] blk.30.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 545/ 843] blk.30.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 546/ 843] blk.31.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 547/ 843] blk.31.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 548/ 843] blk.31.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 549/ 843] blk.31.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 550/ 843] blk.31.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 551/ 843] blk.31.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 552/ 843] blk.31.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 553/ 843] blk.31.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 554/ 843] blk.31.attn_k.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 555/ 843] blk.31.attn_output.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 556/ 843] blk.31.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 557/ 843] blk.31.attn_q.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 558/ 843] blk.31.attn_v.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 559/ 843] blk.31.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 560/ 843] blk.31.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 561/ 843] blk.31.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 562/ 843] blk.32.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 563/ 843] blk.32.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 564/ 843] blk.32.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 565/ 843] blk.32.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 566/ 843] blk.32.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 567/ 843] blk.32.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 568/ 843] blk.32.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 569/ 843] blk.32.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 570/ 843] blk.32.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 571/ 843] blk.32.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 572/ 843] blk.32.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 573/ 843] blk.32.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 574/ 843] blk.32.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 575/ 843] blk.32.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 576/ 843] blk.32.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 577/ 843] blk.32.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 578/ 843] blk.32.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 579/ 843] blk.32.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 580/ 843] blk.33.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 581/ 843] blk.33.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 582/ 843] blk.33.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 583/ 843] blk.33.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 584/ 843] blk.33.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 585/ 843] blk.33.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 586/ 843] blk.33.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 587/ 843] blk.33.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 588/ 843] blk.33.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 589/ 843] blk.33.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 590/ 843] blk.33.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 591/ 843] blk.33.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 592/ 843] blk.33.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 593/ 843] blk.33.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 594/ 843] blk.33.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 595/ 843] blk.33.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 596/ 843] blk.33.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 597/ 843] blk.33.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 598/ 843] blk.34.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 599/ 843] blk.34.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 600/ 843] blk.34.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 601/ 843] blk.34.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 602/ 843] blk.34.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 603/ 843] blk.34.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 604/ 843] blk.34.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 605/ 843] blk.34.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 606/ 843] blk.34.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 607/ 843] blk.34.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 608/ 843] blk.34.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 609/ 843] blk.34.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 610/ 843] blk.34.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 611/ 843] blk.34.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 612/ 843] blk.34.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 613/ 843] blk.34.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 614/ 843] blk.34.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 615/ 843] blk.34.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 616/ 843] blk.35.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 617/ 843] blk.35.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 618/ 843] blk.35.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 619/ 843] blk.35.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 620/ 843] blk.35.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 621/ 843] blk.35.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 622/ 843] blk.35.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 623/ 843] blk.35.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 624/ 843] blk.35.attn_k.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 625/ 843] blk.35.attn_output.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 626/ 843] blk.35.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 627/ 843] blk.35.attn_q.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 628/ 843] blk.35.attn_v.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 629/ 843] blk.35.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 630/ 843] blk.35.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 631/ 843] blk.35.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 632/ 843] blk.36.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 633/ 843] blk.36.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 634/ 843] blk.36.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 635/ 843] blk.36.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 636/ 843] blk.36.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 637/ 843] blk.36.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 638/ 843] blk.36.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 639/ 843] blk.36.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 640/ 843] blk.36.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 641/ 843] blk.36.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 642/ 843] blk.36.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 643/ 843] blk.36.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 644/ 843] blk.36.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 645/ 843] blk.36.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 646/ 843] blk.36.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 647/ 843] blk.36.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 648/ 843] blk.36.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 649/ 843] blk.36.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 650/ 843] blk.37.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 651/ 843] blk.37.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 652/ 843] blk.37.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 653/ 843] blk.37.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 654/ 843] blk.37.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 655/ 843] blk.37.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 656/ 843] blk.37.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 657/ 843] blk.37.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 658/ 843] blk.37.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 659/ 843] blk.37.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 660/ 843] blk.37.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 661/ 843] blk.37.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 662/ 843] blk.37.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 663/ 843] blk.37.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 664/ 843] blk.37.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 665/ 843] blk.37.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 666/ 843] blk.37.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 667/ 843] blk.37.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 668/ 843] blk.38.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 669/ 843] blk.38.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 670/ 843] blk.38.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 671/ 843] blk.38.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 672/ 843] blk.38.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 673/ 843] blk.38.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 674/ 843] blk.38.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 675/ 843] blk.38.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 676/ 843] blk.38.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 677/ 843] blk.38.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 678/ 843] blk.38.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 679/ 843] blk.38.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 680/ 843] blk.38.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 681/ 843] blk.38.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 682/ 843] blk.38.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 683/ 843] blk.38.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 684/ 843] blk.38.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 685/ 843] blk.38.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 686/ 843] blk.39.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 687/ 843] blk.39.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 688/ 843] blk.39.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 689/ 843] blk.39.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 690/ 843] blk.39.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 691/ 843] blk.39.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 692/ 843] blk.39.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 693/ 843] blk.39.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 694/ 843] blk.39.attn_k.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 695/ 843] blk.39.attn_output.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 696/ 843] blk.39.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 697/ 843] blk.39.attn_q.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 698/ 843] blk.39.attn_v.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 699/ 843] blk.39.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 700/ 843] blk.39.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 701/ 843] blk.39.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 702/ 843] blk.40.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 703/ 843] blk.40.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 704/ 843] blk.40.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 705/ 843] blk.40.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 706/ 843] blk.40.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 707/ 843] blk.40.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 708/ 843] blk.40.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 709/ 843] blk.40.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 710/ 843] blk.40.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 711/ 843] blk.40.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 712/ 843] blk.40.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 713/ 843] blk.40.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 714/ 843] blk.40.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 715/ 843] blk.40.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 716/ 843] blk.40.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 717/ 843] blk.40.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 718/ 843] blk.40.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 719/ 843] blk.40.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 720/ 843] blk.41.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 721/ 843] blk.41.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 722/ 843] blk.41.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 723/ 843] blk.41.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 724/ 843] blk.41.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 725/ 843] blk.41.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 726/ 843] blk.41.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 727/ 843] blk.41.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 728/ 843] blk.41.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 729/ 843] blk.41.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 730/ 843] blk.41.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 731/ 843] blk.41.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 732/ 843] blk.41.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 733/ 843] blk.41.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 734/ 843] blk.41.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 735/ 843] blk.41.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 736/ 843] blk.41.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 737/ 843] blk.41.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 738/ 843] blk.42.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 739/ 843] blk.42.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 740/ 843] blk.42.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 741/ 843] blk.42.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 742/ 843] blk.42.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 743/ 843] blk.42.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 744/ 843] blk.42.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 745/ 843] blk.42.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 746/ 843] blk.42.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 747/ 843] blk.42.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 748/ 843] blk.42.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 749/ 843] blk.42.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 750/ 843] blk.42.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 751/ 843] blk.42.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 752/ 843] blk.42.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 753/ 843] blk.42.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 754/ 843] blk.42.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 755/ 843] blk.42.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 756/ 843] blk.43.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 757/ 843] blk.43.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 758/ 843] blk.43.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 759/ 843] blk.43.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 760/ 843] blk.43.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 761/ 843] blk.43.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 762/ 843] blk.43.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 763/ 843] blk.43.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 764/ 843] blk.43.attn_k.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 765/ 843] blk.43.attn_output.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 766/ 843] blk.43.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 767/ 843] blk.43.attn_q.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 768/ 843] blk.43.attn_v.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 769/ 843] blk.43.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 770/ 843] blk.43.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 771/ 843] blk.43.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 772/ 843] blk.44.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 773/ 843] blk.44.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 774/ 843] blk.44.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 775/ 843] blk.44.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 776/ 843] blk.44.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 777/ 843] blk.44.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 778/ 843] blk.44.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 779/ 843] blk.44.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 780/ 843] blk.44.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 781/ 843] blk.44.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 782/ 843] blk.44.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 783/ 843] blk.44.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 784/ 843] blk.44.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 785/ 843] blk.44.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 786/ 843] blk.44.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 787/ 843] blk.44.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 788/ 843] blk.44.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 789/ 843] blk.44.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 790/ 843] blk.45.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 791/ 843] blk.45.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 792/ 843] blk.45.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 793/ 843] blk.45.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 794/ 843] blk.45.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 795/ 843] blk.45.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 796/ 843] blk.45.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 797/ 843] blk.45.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 798/ 843] blk.45.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 799/ 843] blk.45.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 800/ 843] blk.45.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 801/ 843] blk.45.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 802/ 843] blk.45.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 803/ 843] blk.45.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 804/ 843] blk.45.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 805/ 843] blk.45.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 806/ 843] blk.45.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 807/ 843] blk.45.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 808/ 843] blk.46.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 809/ 843] blk.46.ssm_a - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 810/ 843] blk.46.ssm_conv1d.weight - [ 4, 8192, 1, 1], type = f32, size = 0.125 MB +[ 811/ 843] blk.46.ssm_dt.bias - [ 32, 1, 1, 1], type = f32, size = 0.000 MB +[ 812/ 843] blk.46.ssm_ba.weight - [ 2048, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.25 MiB -> 0.13 MiB +[ 813/ 843] blk.46.attn_qkv.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 814/ 843] blk.46.attn_gate.weight - [ 2048, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 815/ 843] blk.46.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MB +[ 816/ 843] blk.46.ssm_out.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 817/ 843] blk.46.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 818/ 843] blk.46.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 819/ 843] blk.46.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 820/ 843] blk.46.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 821/ 843] blk.46.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 822/ 843] blk.46.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 823/ 843] blk.46.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 824/ 843] blk.46.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 825/ 843] blk.46.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 826/ 843] blk.47.attn_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 827/ 843] blk.47.ffn_gate_inp.weight - [ 2048, 512, 1, 1], type = f32, size = 4.000 MB +[ 828/ 843] blk.47.ffn_down_shexp.weight - [ 512, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 829/ 843] blk.47.ffn_gate_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 830/ 843] blk.47.ffn_up_shexp.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 831/ 843] blk.47.ffn_gate_inp_shexp.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 832/ 843] blk.47.post_attention_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +[ 833/ 843] blk.47.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 834/ 843] blk.47.attn_k.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 835/ 843] blk.47.attn_output.weight - [ 4096, 2048, 1, 1], type = bf16, converting to q8_0 .. size = 16.00 MiB -> 8.50 MiB +[ 836/ 843] blk.47.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MB +[ 837/ 843] blk.47.attn_q.weight - [ 2048, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 32.00 MiB -> 17.00 MiB +[ 838/ 843] blk.47.attn_v.weight - [ 2048, 512, 1, 1], type = bf16, converting to q8_0 .. size = 2.00 MiB -> 1.06 MiB +[ 839/ 843] output.weight - [ 2048, 151936, 1, 1], type = bf16, converting to q8_0 .. size = 593.50 MiB -> 315.30 MiB +[ 840/ 843] blk.47.ffn_down_exps.weight - [ 512, 2048, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 841/ 843] blk.47.ffn_gate_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 842/ 843] blk.47.ffn_up_exps.weight - [ 2048, 512, 512, 1], type = bf16, converting to q8_0 .. size = 1024.00 MiB -> 544.00 MiB +[ 843/ 843] output_norm.weight - [ 2048, 1, 1, 1], type = f32, size = 0.008 MB +llama_model_quantize_internal: model size = 152065.68 MB +llama_model_quantize_internal: quant size = 80877.56 MB + +main: quantize time = 183367.66 ms +main: total time = 183367.66 ms