diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 68446aa44f9..d9ee390b383 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -7212,6 +7212,7 @@ def prepare_tensors(self): "DeepseekV3ForCausalLM", "KimiVLForConditionalGeneration", "YoutuForCausalLM", + "YoutuVLForConditionalGeneration" ) class DeepseekV2Model(TextModel): model_arch = gguf.MODEL_ARCH.DEEPSEEK2 @@ -9955,6 +9956,27 @@ def _is_audio_tensor(self, name: str): return any(p in name for p in ["audio", "codebook", "conformer", "depth_embedding", "depthformer", "depth_linear"]) +@ModelBase.register("Lfm2Model") +class LFM2ColBertModel(LFM2Model): + model_arch = gguf.MODEL_ARCH.LFM2 + dense_tensor_name = "dense_2" + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: + if not name.startswith(self.dense_tensor_name): + name = "model." + name + + return super().modify_tensors(data_torch, name, bid) + + def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]: + # dense tensor is stored in a separate safetensors file + from safetensors.torch import load_file + tensors_file = self.dir_model / "1_Dense" / "model.safetensors" + assert tensors_file.is_file() + tensor = load_file(tensors_file)["linear.weight"] + self.gguf_writer.add_embedding_length_out(tensor.shape[0]) + yield f"{self.dense_tensor_name}.weight", tensor.clone() + + @ModelBase.register("Lfm2MoeForCausalLM") class LFM2MoeModel(TextModel): model_arch = gguf.MODEL_ARCH.LFM2MOE @@ -10674,8 +10696,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [] -@ModelBase.register("YOUTUVLForConditionalGeneration", "YOUTUVLForCausalLM") -class YOUTUVLVisionModel(MmprojModel): +@ModelBase.register("YoutuVLForConditionalGeneration") +class YoutuVLVisionModel(MmprojModel): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) assert self.hparams_vision is not None diff --git a/docs/ops.md b/docs/ops.md index 2b2770cb765..142f401d03a 100644 --- a/docs/ops.md +++ b/docs/ops.md @@ -22,7 +22,7 @@ Legend: | ARANGE | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | ARGMAX | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ❌ | ❌ | ❌ | -| CEIL | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | ❌ | +| CEIL | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | 🟡 | ✅ | ❌ | ❌ | | CLAMP | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | ❌ | ❌ | ❌ | | CONCAT | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ✅ | ❌ | ❌ | ❌ | | CONT | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | ❌ | ❌ | diff --git a/docs/ops/WebGPU.csv b/docs/ops/WebGPU.csv index bfff75e66f0..8cd7e12001e 100644 --- a/docs/ops/WebGPU.csv +++ b/docs/ops/WebGPU.csv @@ -35,8 +35,8 @@ "WebGPU: WebGPU","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" "WebGPU: WebGPU","FLOOR","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","WebGPU" "WebGPU: WebGPU","FLOOR","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","WebGPU" +"WebGPU: WebGPU","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","WebGPU" "WebGPU: WebGPU","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","WebGPU" @@ -77,8 +77,8 @@ "WebGPU: WebGPU","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" "WebGPU: WebGPU","FLOOR","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","WebGPU" "WebGPU: WebGPU","FLOOR","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","WebGPU" +"WebGPU: WebGPU","CEIL","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","CEIL","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" "WebGPU: WebGPU","ROUND","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","WebGPU" "WebGPU: WebGPU","ROUND","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","WebGPU" "WebGPU: WebGPU","TRUNC","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","WebGPU" @@ -119,8 +119,8 @@ "WebGPU: WebGPU","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" "WebGPU: WebGPU","FLOOR","type=f32,ne_a=[128,2,2,2],v=0","support","0","no","WebGPU" "WebGPU: WebGPU","FLOOR","type=f32,ne_a=[5,7,11,13],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f32,ne_a=[128,2,2,2],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f32,ne_a=[5,7,11,13],v=0","support","0","no","WebGPU" +"WebGPU: WebGPU","CEIL","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","CEIL","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROUND","type=f32,ne_a=[128,2,2,2],v=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROUND","type=f32,ne_a=[5,7,11,13],v=0","support","0","no","WebGPU" "WebGPU: WebGPU","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","0","no","WebGPU" @@ -161,8 +161,8 @@ "WebGPU: WebGPU","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" "WebGPU: WebGPU","FLOOR","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","WebGPU" "WebGPU: WebGPU","FLOOR","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","WebGPU" +"WebGPU: WebGPU","CEIL","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","CEIL","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" "WebGPU: WebGPU","ROUND","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","WebGPU" "WebGPU: WebGPU","ROUND","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","WebGPU" "WebGPU: WebGPU","TRUNC","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","WebGPU" @@ -965,6 +965,7 @@ "WebGPU: WebGPU","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,2560],ne_kernel=[3,3,1,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","0","no","WebGPU" "WebGPU: WebGPU","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,2560],ne_kernel=[3,3,2,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","0","no","WebGPU" "WebGPU: WebGPU","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[5,5,1,32],ne_kernel=[3,4,1,32],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","0","no","WebGPU" +"WebGPU: WebGPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[2,2,1536,729],ne_kernel=[2,2,1536,4096],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","0","no","WebGPU" "WebGPU: WebGPU","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","0","no","WebGPU" "WebGPU: WebGPU","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","0","no","WebGPU" "WebGPU: WebGPU","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","0","no","WebGPU" @@ -4964,6 +4965,7 @@ "WebGPU: WebGPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","0","no","WebGPU" "WebGPU: WebGPU","CONV_TRANSPOSE_2D","ne_input=[3,2,3,1],ne_kernel=[2,2,1,3],stride=1","support","0","no","WebGPU" "WebGPU: WebGPU","CONV_TRANSPOSE_2D","ne_input=[10,10,9,1],ne_kernel=[3,3,1,9],stride=2","support","0","no","WebGPU" +"WebGPU: WebGPU","CONV_TRANSPOSE_2D","ne_input=[129,63,35,1],ne_kernel=[3,3,48,35],stride=1","support","0","no","WebGPU" "WebGPU: WebGPU","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","0","no","WebGPU" "WebGPU: WebGPU","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","0","no","WebGPU" "WebGPU: WebGPU","ARGMAX","type=f32,ne=[32,1,1,1]","support","0","no","WebGPU" @@ -5715,15 +5717,15 @@ "WebGPU: WebGPU","L2_NORM","type=f32,ne=[64,5,4,3]","support","0","no","WebGPU" "WebGPU: WebGPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001,inplace=1","support","1","yes","WebGPU" "WebGPU: WebGPU","L2_NORM","type=f32,ne=[64,5,4,3]","support","0","no","WebGPU" -"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[3,1024,1,1]","support","0","no","WebGPU" -"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[3,1024,1,1]","support","0","no","WebGPU" -"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[3,1024,1,1]","support","0","no","WebGPU" -"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[3,1536,1,1]","support","0","no","WebGPU" -"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[3,1536,1,1]","support","0","no","WebGPU" -"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[3,1536,1,1]","support","0","no","WebGPU" -"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[3,2048,1,1]","support","0","no","WebGPU" -"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[3,2048,1,1]","support","0","no","WebGPU" -"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[3,2048,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[3,1024,1,1],ne_b=[3,1024,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[6,1024,1,1],ne_b=[3,1024,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[3,1024,4,1],ne_b=[3,1024,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[3,1536,1,1],ne_b=[3,1536,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[6,1536,1,1],ne_b=[3,1536,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[3,1536,4,1],ne_b=[3,1536,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[3,2048,1,1],ne_b=[3,2048,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[6,2048,1,1],ne_b=[3,2048,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[3,2048,4,1],ne_b=[3,2048,1,1]","support","0","no","WebGPU" "WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[4,1024,1,1]","support","0","no","WebGPU" "WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[4,1024,1,1]","support","0","no","WebGPU" "WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[4,1024,1,1]","support","0","no","WebGPU" @@ -5733,6 +5735,15 @@ "WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[4,2048,1,1]","support","0","no","WebGPU" "WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[4,2048,1,1]","support","0","no","WebGPU" "WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[4,2048,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[9,1024,1,1],ne_b=[9,1024,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[18,1024,1,1],ne_b=[9,1024,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[9,1024,4,1],ne_b=[9,1024,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[9,1536,1,1],ne_b=[9,1536,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[18,1536,1,1],ne_b=[9,1536,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[9,1536,4,1],ne_b=[9,1536,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[9,2048,1,1],ne_b=[9,2048,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[18,2048,1,1],ne_b=[9,2048,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","SSM_CONV","type=f32,ne_a=[9,2048,4,1],ne_b=[9,2048,1,1]","support","0","no","WebGPU" "WebGPU: WebGPU","SSM_SCAN","type=f32,d_state=16,head_dim=1,n_head=1024,n_group=1,n_seq_tokens=32,n_seqs=4","support","0","no","WebGPU" "WebGPU: WebGPU","SSM_SCAN","type=f32,d_state=128,head_dim=64,n_head=16,n_group=2,n_seq_tokens=32,n_seqs=4","support","0","no","WebGPU" "WebGPU: WebGPU","SSM_SCAN","type=f32,d_state=256,head_dim=64,n_head=8,n_group=2,n_seq_tokens=32,n_seqs=4","support","0","no","WebGPU" @@ -8662,7 +8673,7 @@ "WebGPU: WebGPU","CLAMP","type=f16,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","0","no","WebGPU" "WebGPU: WebGPU","LEAKY_RELU","type=f16,ne_a=[10,5,4,3],negative_slope=0.100000","support","0","no","WebGPU" "WebGPU: WebGPU","FLOOR","type=f16,ne=[10,2,2,2]","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f16,ne=[10,2,2,2]","support","0","no","WebGPU" +"WebGPU: WebGPU","CEIL","type=f16,ne=[10,2,2,2]","support","1","yes","WebGPU" "WebGPU: WebGPU","ROUND","type=f16,ne=[10,2,2,2]","support","0","no","WebGPU" "WebGPU: WebGPU","TRUNC","type=f16,ne=[10,2,2,2]","support","0","no","WebGPU" "WebGPU: WebGPU","SQR","type=f16,ne=[7,1,5,3]","support","0","no","WebGPU" @@ -8674,8 +8685,8 @@ "WebGPU: WebGPU","LEAKY_RELU","type=f16,ne_a=[7,1,5,3],negative_slope=0.100000","support","0","no","WebGPU" "WebGPU: WebGPU","FLOOR","type=f16,ne=[7,1,5,3]","support","0","no","WebGPU" "WebGPU: WebGPU","FLOOR","type=f16,ne=[1024,1024,1,1]","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f16,ne=[7,1,5,3]","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f16,ne=[1024,1024,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","CEIL","type=f16,ne=[7,1,5,3]","support","1","yes","WebGPU" +"WebGPU: WebGPU","CEIL","type=f16,ne=[1024,1024,1,1]","support","1","yes","WebGPU" "WebGPU: WebGPU","ROUND","type=f16,ne=[7,1,5,3]","support","0","no","WebGPU" "WebGPU: WebGPU","ROUND","type=f16,ne=[1024,1024,1,1]","support","0","no","WebGPU" "WebGPU: WebGPU","TRUNC","type=f16,ne=[7,1,5,3]","support","0","no","WebGPU" @@ -8688,7 +8699,7 @@ "WebGPU: WebGPU","CLAMP","type=f32,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","0","no","WebGPU" "WebGPU: WebGPU","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","0","no","WebGPU" "WebGPU: WebGPU","FLOOR","type=f32,ne=[10,2,2,2]","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f32,ne=[10,2,2,2]","support","0","no","WebGPU" +"WebGPU: WebGPU","CEIL","type=f32,ne=[10,2,2,2]","support","1","yes","WebGPU" "WebGPU: WebGPU","ROUND","type=f32,ne=[10,2,2,2]","support","0","no","WebGPU" "WebGPU: WebGPU","TRUNC","type=f32,ne=[10,2,2,2]","support","0","no","WebGPU" "WebGPU: WebGPU","SQR","type=f32,ne=[7,1,5,3]","support","0","no","WebGPU" @@ -8700,8 +8711,8 @@ "WebGPU: WebGPU","LEAKY_RELU","type=f32,ne_a=[7,1,5,3],negative_slope=0.100000","support","0","no","WebGPU" "WebGPU: WebGPU","FLOOR","type=f32,ne=[7,1,5,3]","support","0","no","WebGPU" "WebGPU: WebGPU","FLOOR","type=f32,ne=[1024,1024,1,1]","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f32,ne=[7,1,5,3]","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f32,ne=[1024,1024,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","CEIL","type=f32,ne=[7,1,5,3]","support","1","yes","WebGPU" +"WebGPU: WebGPU","CEIL","type=f32,ne=[1024,1024,1,1]","support","1","yes","WebGPU" "WebGPU: WebGPU","ROUND","type=f32,ne=[7,1,5,3]","support","0","no","WebGPU" "WebGPU: WebGPU","ROUND","type=f32,ne=[1024,1024,1,1]","support","0","no","WebGPU" "WebGPU: WebGPU","TRUNC","type=f32,ne=[7,1,5,3]","support","0","no","WebGPU" @@ -8916,6 +8927,8 @@ "WebGPU: WebGPU","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","SOFT_MAX","type=f32,ne=[200001,2,3,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","SOFT_MAX","type=f32,ne=[200001,2,3,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=0.000000","support","0","no","WebGPU" "WebGPU: WebGPU","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=0.000000","support","0","no","WebGPU" "WebGPU: WebGPU","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=1.000000,max_bias=0.000000","support","0","no","WebGPU" @@ -8968,6 +8981,7 @@ "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROPE","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","WebGPU" @@ -8977,6 +8991,7 @@ "WebGPU: WebGPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROPE","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","WebGPU" @@ -8987,11 +9002,13 @@ "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROPE","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROPE","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","WebGPU" @@ -9001,6 +9018,7 @@ "WebGPU: WebGPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROPE","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","WebGPU" @@ -9011,11 +9029,13 @@ "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROPE","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROPE","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","WebGPU" @@ -9025,6 +9045,7 @@ "WebGPU: WebGPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROPE","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","WebGPU" @@ -9035,11 +9056,13 @@ "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROPE","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROPE","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","WebGPU" @@ -9049,6 +9072,7 @@ "WebGPU: WebGPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROPE","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","WebGPU" @@ -9059,6 +9083,7 @@ "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROPE","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","1","yes","WebGPU" @@ -9184,6 +9209,7 @@ "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","0","no","WebGPU" +"WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","0","no","WebGPU" @@ -9193,6 +9219,7 @@ "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","0","no","WebGPU" +"WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","0","no","WebGPU" @@ -9203,11 +9230,13 @@ "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","0","no","WebGPU" +"WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","0","no","WebGPU" +"WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","0","no","WebGPU" @@ -9217,6 +9246,7 @@ "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","0","no","WebGPU" +"WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","0","no","WebGPU" @@ -9227,11 +9257,13 @@ "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","0","no","WebGPU" +"WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","0","no","WebGPU" +"WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","0","no","WebGPU" @@ -9241,6 +9273,7 @@ "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","0","no","WebGPU" +"WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","0","no","WebGPU" @@ -9251,11 +9284,13 @@ "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","0","no","WebGPU" +"WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","0","no","WebGPU" +"WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","0","no","WebGPU" @@ -9265,6 +9300,7 @@ "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","0","no","WebGPU" +"WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","0","no","WebGPU" @@ -9275,6 +9311,7 @@ "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[128,16,2,1],n_dims=128,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","0","no","WebGPU" +"WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[16,16,8192,1],n_dims=16,mode=40,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","0","no","WebGPU" "WebGPU: WebGPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0,inplace=0","support","0","no","WebGPU" @@ -9542,333 +9579,333 @@ "WebGPU: WebGPU","ARGSORT","type=f32,ne=[2048,2,1,3],order=1","support","0","no","WebGPU" "WebGPU: WebGPU","ARGSORT","type=f32,ne=[2049,2,1,3],order=1","support","0","no","WebGPU" "WebGPU: WebGPU","ARGSORT","type=f32,ne=[2,8,8192,1],order=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[12,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[13,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[13,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[15,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[15,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[15,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[19,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[19,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[19,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[19,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[27,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[27,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[27,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[27,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[27,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[43,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[43,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[43,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[43,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[43,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[64,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[75,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[64,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[75,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[64,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[75,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[64,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[75,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[64,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[75,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,10,10,10],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[60,10,10,10],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1023,2,1,3],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,2,1,3],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1025,2,1,3],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2047,2,1,3],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,2,1,3],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2049,2,1,3],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,10,10,10],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[60,10,10,10],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1023,2,1,3],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,2,1,3],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1025,2,1,3],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2047,2,1,3],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,2,1,3],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2049,2,1,3],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,10,10,10],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[60,10,10,10],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1023,2,1,3],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,2,1,3],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1025,2,1,3],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2047,2,1,3],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,2,1,3],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2049,2,1,3],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,10,10,10],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[60,10,10,10],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1023,2,1,3],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,2,1,3],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1025,2,1,3],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2047,2,1,3],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,2,1,3],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2049,2,1,3],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,10,10,10],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[60,10,10,10],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1023,2,1,3],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,2,1,3],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1025,2,1,3],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2047,2,1,3],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,2,1,3],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2049,2,1,3],k=15","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[12,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[13,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[13,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[15,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[15,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[15,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[19,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[19,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[19,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[19,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[27,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[27,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[27,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[27,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[27,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[43,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[43,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[43,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[43,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[43,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[64,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[75,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[64,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[75,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[64,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[75,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[64,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[75,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[64,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[75,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,10,10,10],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[60,10,10,10],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1023,2,1,3],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,2,1,3],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1025,2,1,3],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2047,2,1,3],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,2,1,3],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2049,2,1,3],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,10,10,10],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[60,10,10,10],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1023,2,1,3],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,2,1,3],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1025,2,1,3],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2047,2,1,3],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,2,1,3],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2049,2,1,3],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,10,10,10],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[60,10,10,10],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1023,2,1,3],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,2,1,3],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1025,2,1,3],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2047,2,1,3],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,2,1,3],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2049,2,1,3],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,10,10,10],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[60,10,10,10],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1023,2,1,3],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,2,1,3],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1025,2,1,3],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2047,2,1,3],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,2,1,3],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2049,2,1,3],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,10,10,10],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[60,10,10,10],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1023,2,1,3],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,2,1,3],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1025,2,1,3],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2047,2,1,3],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,2,1,3],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2049,2,1,3],k=15,ties=0","support","0","no","WebGPU" "WebGPU: WebGPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","0","no","WebGPU" "WebGPU: WebGPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","0","no","WebGPU" -"WebGPU: WebGPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=nearest,flags=none","support","0","no","WebGPU" -"WebGPU: WebGPU","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=nearest,flags=none","support","0","no","WebGPU" +"WebGPU: WebGPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=nearest","support","0","no","WebGPU" +"WebGPU: WebGPU","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=nearest","support","0","no","WebGPU" "WebGPU: WebGPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear,transpose=0","support","0","no","WebGPU" "WebGPU: WebGPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear,transpose=1","support","0","no","WebGPU" -"WebGPU: WebGPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear,flags=none","support","0","no","WebGPU" -"WebGPU: WebGPU","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bilinear,flags=none","support","0","no","WebGPU" +"WebGPU: WebGPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear","support","0","no","WebGPU" +"WebGPU: WebGPU","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bilinear","support","0","no","WebGPU" "WebGPU: WebGPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bicubic,transpose=0","support","0","no","WebGPU" "WebGPU: WebGPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bicubic,transpose=1","support","0","no","WebGPU" -"WebGPU: WebGPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bicubic,flags=none","support","0","no","WebGPU" -"WebGPU: WebGPU","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bicubic,flags=none","support","0","no","WebGPU" -"WebGPU: WebGPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=513,transpose=0","support","0","no","WebGPU" -"WebGPU: WebGPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=513,transpose=1","support","0","no","WebGPU" -"WebGPU: WebGPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear,flags=none","support","0","no","WebGPU" -"WebGPU: WebGPU","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bilinear,flags=none","support","0","no","WebGPU" -"WebGPU: WebGPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear,flags=align_corners","support","0","no","WebGPU" -"WebGPU: WebGPU","UPSCALE","type=f32,ne=[1,4,3,2],ne_tgt=[2,8,3,2],mode=bilinear,flags=align_corners","support","0","no","WebGPU" -"WebGPU: WebGPU","UPSCALE","type=f32,ne=[4,1,3,2],ne_tgt=[1,1,3,2],mode=bilinear,flags=align_corners","support","0","no","WebGPU" -"WebGPU: WebGPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bicubic,flags=align_corners","support","0","no","WebGPU" -"WebGPU: WebGPU","UPSCALE","type=f32,ne=[1,4,3,2],ne_tgt=[2,8,3,2],mode=bicubic,flags=align_corners","support","0","no","WebGPU" -"WebGPU: WebGPU","UPSCALE","type=f32,ne=[4,1,3,2],ne_tgt=[1,1,3,2],mode=bicubic,flags=align_corners","support","0","no","WebGPU" +"WebGPU: WebGPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bicubic","support","0","no","WebGPU" +"WebGPU: WebGPU","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bicubic","support","0","no","WebGPU" +"WebGPU: WebGPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear|antialias,transpose=0","support","0","no","WebGPU" +"WebGPU: WebGPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear|antialias,transpose=1","support","0","no","WebGPU" +"WebGPU: WebGPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear|antialias","support","0","no","WebGPU" +"WebGPU: WebGPU","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bilinear|antialias","support","0","no","WebGPU" +"WebGPU: WebGPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear|align_corners","support","0","no","WebGPU" +"WebGPU: WebGPU","UPSCALE","type=f32,ne=[1,4,3,2],ne_tgt=[2,8,3,2],mode=bilinear|align_corners","support","0","no","WebGPU" +"WebGPU: WebGPU","UPSCALE","type=f32,ne=[4,1,3,2],ne_tgt=[1,1,3,2],mode=bilinear|align_corners","support","0","no","WebGPU" +"WebGPU: WebGPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bicubic|align_corners","support","0","no","WebGPU" +"WebGPU: WebGPU","UPSCALE","type=f32,ne=[1,4,3,2],ne_tgt=[2,8,3,2],mode=bicubic|align_corners","support","0","no","WebGPU" +"WebGPU: WebGPU","UPSCALE","type=f32,ne=[4,1,3,2],ne_tgt=[1,1,3,2],mode=bicubic|align_corners","support","0","no","WebGPU" "WebGPU: WebGPU","SUM","type=f32,ne=[10,5,4,3]","support","0","no","WebGPU" "WebGPU: WebGPU","SUM_ROWS","type=f32,ne=[10,5,4,3],permute=0,slice=0","support","0","no","WebGPU" "WebGPU: WebGPU","SUM","type=f32,ne=[11,5,6,3],permute=[0,2,1,3]","support","0","no","WebGPU" @@ -9891,8 +9928,9 @@ "WebGPU: WebGPU","GROUP_NORM","type=f32,ne=[64,64,320,1],num_groups=32,eps=0.000001","support","0","no","WebGPU" "WebGPU: WebGPU","GROUP_NORM","type=f32,ne=[9,9,1280,1],num_groups=32,eps=0.000001","support","0","no","WebGPU" "WebGPU: WebGPU","ACC","type=f32,ne_a=[256,17,1,1],ne_b=[256,16,1,1]","support","0","no","WebGPU" -"WebGPU: WebGPU","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1","support","0","no","WebGPU" -"WebGPU: WebGPU","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1,circular=0","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[33,17,2,1],pad_0=4,pad_1=3,circular=1","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0,circular=0","support","0","no","WebGPU" "WebGPU: WebGPU","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","0","no","WebGPU" "WebGPU: WebGPU","PAD_REFLECT_1D","type=f32,ne_a=[3000,384,4,1],pad_0=10,pad_1=9","support","0","no","WebGPU" "WebGPU: WebGPU","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","0","no","WebGPU" @@ -9903,6 +9941,7 @@ "WebGPU: WebGPU","CUMSUM","type=f32,ne=[10,5,4,3]","support","0","no","WebGPU" "WebGPU: WebGPU","CUMSUM","type=f32,ne=[127,5,4,3]","support","0","no","WebGPU" "WebGPU: WebGPU","CUMSUM","type=f32,ne=[128,5,4,3]","support","0","no","WebGPU" +"WebGPU: WebGPU","CUMSUM","type=f32,ne=[128,128,4,4]","support","0","no","WebGPU" "WebGPU: WebGPU","CUMSUM","type=f32,ne=[255,5,4,3]","support","0","no","WebGPU" "WebGPU: WebGPU","CUMSUM","type=f32,ne=[256,5,4,3]","support","0","no","WebGPU" "WebGPU: WebGPU","CUMSUM","type=f32,ne=[511,5,4,3]","support","0","no","WebGPU" @@ -9922,17 +9961,41 @@ "WebGPU: WebGPU","FILL","type=f32,ne=[303,207,11,3],c=2.000000","support","0","no","WebGPU" "WebGPU: WebGPU","FILL","type=f32,ne=[800,600,4,4],c=-152.000000","support","0","no","WebGPU" "WebGPU: WebGPU","FILL","type=f32,ne=[2048,512,2,2],c=3.500000","support","0","no","WebGPU" +"WebGPU: WebGPU","DIAG","type=f32,ne=[10,1,4,3]","support","0","no","WebGPU" +"WebGPU: WebGPU","DIAG","type=f32,ne=[79,1,19,13]","support","0","no","WebGPU" +"WebGPU: WebGPU","DIAG","type=f32,ne=[256,1,8,16]","support","0","no","WebGPU" "WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[10,10,4,3],ne_rhs=[3,10,4,3]","support","0","no","WebGPU" "WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[11,11,1,1],ne_rhs=[5,11,1,1]","support","0","no","WebGPU" "WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[17,17,2,4],ne_rhs=[9,17,2,4]","support","0","no","WebGPU" "WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[30,30,7,1],ne_rhs=[8,30,7,1]","support","0","no","WebGPU" "WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[42,42,5,2],ne_rhs=[10,42,5,2]","support","0","no","WebGPU" "WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[10,64,2,2]","support","0","no","WebGPU" +"WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[64,64,2,2]","support","0","no","WebGPU" +"WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[79,79,5,3],ne_rhs=[417,79,5,3]","support","0","no","WebGPU" +"WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,2],ne_rhs=[32,128,4,2]","support","0","no","WebGPU" +"WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[80,80,2,8],ne_rhs=[80,80,2,8]","support","0","no","WebGPU" +"WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[80,80,2,8],ne_rhs=[79,80,2,8]","support","0","no","WebGPU" +"WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[80,80,2,8],ne_rhs=[81,80,2,8]","support","0","no","WebGPU" +"WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[80,80,8,8],ne_rhs=[80,80,8,8]","support","0","no","WebGPU" +"WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[80,80,8,8],ne_rhs=[79,80,8,8]","support","0","no","WebGPU" +"WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[80,80,8,8],ne_rhs=[81,80,8,8]","support","0","no","WebGPU" +"WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[84,84,4,4],ne_rhs=[32,84,4,4]","support","0","no","WebGPU" +"WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[95,95,8,8],ne_rhs=[40,95,8,8]","support","0","no","WebGPU" "WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[100,100,4,4],ne_rhs=[41,100,4,4]","support","0","no","WebGPU" -"WebGPU: WebGPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1","support","0","no","WebGPU" +"WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,4],ne_rhs=[31,128,4,4]","support","0","no","WebGPU" +"WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,4],ne_rhs=[32,128,4,4]","support","0","no","WebGPU" +"WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[128,128,3,4],ne_rhs=[32,128,3,4]","support","0","no","WebGPU" +"WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,1],ne_rhs=[32,128,4,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[64,64,4,4],ne_rhs=[200,64,4,4]","support","0","no","WebGPU" +"WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[64,64,4,4],ne_rhs=[384,64,4,4]","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0,circular=0","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0,circular=0","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0,circular=1","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0,circular=1","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1,circular=0","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1,circular=0","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1,circular=1","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1,circular=1","support","0","no","WebGPU" "WebGPU: WebGPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","WebGPU" "WebGPU: WebGPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","WebGPU" "WebGPU: WebGPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","WebGPU" diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 81111e81b2c..d8eaaa2691f 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -33,7 +33,7 @@ static void batch_add_seq(llama_batch & batch, const std::vector & toke } } -static void batch_decode(llama_context * ctx, llama_batch & batch, float * output, int n_seq, int n_embd, int embd_norm) { +static void batch_decode(llama_context * ctx, llama_batch & batch, float * output, int n_seq, int n_embd_out, int embd_norm) { const enum llama_pooling_type pooling_type = llama_pooling_type(ctx); // clear previous kv_cache values (irrelevant for embeddings) @@ -65,8 +65,8 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu GGML_ASSERT(embd != NULL && "failed to get sequence embeddings"); } - float * out = output + embd_pos * n_embd; - common_embd_normalize(embd, out, n_embd, embd_norm); + float * out = output + embd_pos * n_embd_out; + common_embd_normalize(embd, out, n_embd_out, embd_norm); } } @@ -252,8 +252,8 @@ int main(int argc, char ** argv) { } // allocate output - const int n_embd = llama_model_n_embd(model); - std::vector embeddings(n_embd_count * n_embd, 0); + const int n_embd_out = llama_model_n_embd_out(model); + std::vector embeddings(n_embd_count * n_embd_out, 0); float * emb = embeddings.data(); // break into batches @@ -267,8 +267,8 @@ int main(int argc, char ** argv) { // encode if at capacity if (batch.n_tokens + n_toks > n_batch || s >= n_seq_max) { - float * out = emb + e * n_embd; - batch_decode(ctx, batch, out, s, n_embd, params.embd_normalize); + float * out = emb + e * n_embd_out; + batch_decode(ctx, batch, out, s, n_embd_out, params.embd_normalize); e += pooling_type == LLAMA_POOLING_TYPE_NONE ? batch.n_tokens : s; s = 0; common_batch_clear(batch); @@ -280,8 +280,8 @@ int main(int argc, char ** argv) { } // final batch - float * out = emb + e * n_embd; - batch_decode(ctx, batch, out, s, n_embd, params.embd_normalize); + float * out = emb + e * n_embd_out; + batch_decode(ctx, batch, out, s, n_embd_out, params.embd_normalize); if (params.embd_out.empty()) { LOG("\n"); @@ -289,19 +289,19 @@ int main(int argc, char ** argv) { if (pooling_type == LLAMA_POOLING_TYPE_NONE) { for (int j = 0; j < n_embd_count; j++) { LOG("embedding %d: ", j); - for (int i = 0; i < std::min(3, n_embd); i++) { + for (int i = 0; i < std::min(3, n_embd_out); i++) { if (params.embd_normalize == 0) { - LOG("%6.0f ", emb[j * n_embd + i]); + LOG("%6.0f ", emb[j * n_embd_out + i]); } else { - LOG("%9.6f ", emb[j * n_embd + i]); + LOG("%9.6f ", emb[j * n_embd_out + i]); } } LOG(" ... "); - for (int i = n_embd - 3; i < n_embd; i++) { + for (int i = n_embd_out - 3; i < n_embd_out; i++) { if (params.embd_normalize == 0) { - LOG("%6.0f ", emb[j * n_embd + i]); + LOG("%6.0f ", emb[j * n_embd_out + i]); } else { - LOG("%9.6f ", emb[j * n_embd + i]); + LOG("%9.6f ", emb[j * n_embd_out + i]); } } LOG("\n"); @@ -320,9 +320,9 @@ int main(int argc, char ** argv) { for (uint32_t i = 0; i < n_cls_out; i++) { // NOTE: if you change this log - update the tests in ci/run.sh if (n_cls_out == 1) { - LOG("rerank score %d: %8.3f\n", j, emb[j * n_embd]); + LOG("rerank score %d: %8.3f\n", j, emb[j * n_embd_out]); } else { - LOG("rerank score %d: %8.3f [%s]\n", j, emb[j * n_embd + i], cls_out_labels[i].c_str()); + LOG("rerank score %d: %8.3f [%s]\n", j, emb[j * n_embd_out + i], cls_out_labels[i].c_str()); } } } @@ -330,11 +330,11 @@ int main(int argc, char ** argv) { // print the first part of the embeddings or for a single prompt, the full embedding for (int j = 0; j < n_prompts; j++) { LOG("embedding %d: ", j); - for (int i = 0; i < (n_prompts > 1 ? std::min(16, n_embd) : n_embd); i++) { + for (int i = 0; i < (n_prompts > 1 ? std::min(16, n_embd_out) : n_embd_out); i++) { if (params.embd_normalize == 0) { - LOG("%6.0f ", emb[j * n_embd + i]); + LOG("%6.0f ", emb[j * n_embd_out + i]); } else { - LOG("%9.6f ", emb[j * n_embd + i]); + LOG("%9.6f ", emb[j * n_embd_out + i]); } } LOG("\n"); @@ -350,7 +350,7 @@ int main(int argc, char ** argv) { LOG("\n"); for (int i = 0; i < n_prompts; i++) { for (int j = 0; j < n_prompts; j++) { - float sim = common_embd_similarity_cos(emb + i * n_embd, emb + j * n_embd, n_embd); + float sim = common_embd_similarity_cos(emb + i * n_embd_out, emb + j * n_embd_out, n_embd_out); LOG("%6.2f ", sim); } LOG("%1.10s", prompts[i].c_str()); @@ -368,9 +368,9 @@ int main(int argc, char ** argv) { if (notArray) LOG(" {\n \"object\": \"embedding\",\n \"index\": %d,\n \"embedding\": ",j); LOG("["); for (int i = 0;;) { // at least one iteration (n_embd > 0) - LOG(params.embd_normalize == 0 ? "%1.0f" : "%1.7f", emb[j * n_embd + i]); + LOG(params.embd_normalize == 0 ? "%1.0f" : "%1.7f", emb[j * n_embd_out + i]); i++; - if (i < n_embd) LOG(","); else break; + if (i < n_embd_out) LOG(","); else break; } LOG(notArray ? "]\n }" : "]"); j++; @@ -383,7 +383,7 @@ int main(int argc, char ** argv) { for (int i = 0;;) { // at least two iteration (n_embd_count > 1) LOG(" ["); for (int j = 0;;) { // at least two iteration (n_embd_count > 1) - float sim = common_embd_similarity_cos(emb + i * n_embd, emb + j * n_embd, n_embd); + float sim = common_embd_similarity_cos(emb + i * n_embd_out, emb + j * n_embd_out, n_embd_out); LOG("%6.2f", sim); j++; if (j < n_embd_count) LOG(", "); else break; @@ -397,7 +397,7 @@ int main(int argc, char ** argv) { if (notArray) LOG("\n}\n"); } else if (params.embd_out == "raw") { - print_raw_embeddings(emb, n_embd_count, n_embd, model, pooling_type, params.embd_normalize); + print_raw_embeddings(emb, n_embd_count, n_embd_out, model, pooling_type, params.embd_normalize); } LOG("\n"); diff --git a/examples/model-conversion/logits.cpp b/examples/model-conversion/logits.cpp index 5bcf0632677..f71f772ab11 100644 --- a/examples/model-conversion/logits.cpp +++ b/examples/model-conversion/logits.cpp @@ -161,9 +161,9 @@ int main(int argc, char ** argv) { std::vector embd_out; if (embedding_mode) { - const int n_embd = llama_model_n_embd(model); + const int n_embd_out = llama_model_n_embd_out(model); const int n_embd_count = pooling_enabled ? 1 : batch.n_tokens; - const int n_embeddings = n_embd * n_embd_count; + const int n_embeddings = n_embd_out * n_embd_count; float * embeddings; type = "-embeddings"; @@ -177,7 +177,7 @@ int main(int argc, char ** argv) { embeddings = llama_get_embeddings(ctx); } - printf("Embedding dimension: %d\n", n_embd); + printf("Embedding dimension: %d\n", n_embd_out); printf("\n"); // Print embeddings in the specified format @@ -185,16 +185,16 @@ int main(int argc, char ** argv) { printf("embedding %d: ", j); // Print first 3 values - for (int i = 0; i < 3 && i < n_embd; i++) { - printf("%9.6f ", embeddings[j * n_embd + i]); + for (int i = 0; i < 3 && i < n_embd_out; i++) { + printf("%9.6f ", embeddings[j * n_embd_out + i]); } printf(" ... "); // Print last 3 values - for (int i = n_embd - 3; i < n_embd; i++) { + for (int i = n_embd_out - 3; i < n_embd_out; i++) { if (i >= 0) { - printf("%9.6f ", embeddings[j * n_embd + i]); + printf("%9.6f ", embeddings[j * n_embd_out + i]); } } diff --git a/examples/retrieval/retrieval.cpp b/examples/retrieval/retrieval.cpp index 8f92ff90578..3f2afd4346e 100644 --- a/examples/retrieval/retrieval.cpp +++ b/examples/retrieval/retrieval.cpp @@ -217,8 +217,8 @@ int main(int argc, char ** argv) { struct llama_batch batch = llama_batch_init(n_batch, 0, 1); // allocate output - const int n_embd = llama_model_n_embd(model); - std::vector embeddings(n_chunks * n_embd, 0); + const int n_embd_out = llama_model_n_embd_out(model); + std::vector embeddings(n_chunks * n_embd_out, 0); float * emb = embeddings.data(); // break into batches @@ -232,8 +232,8 @@ int main(int argc, char ** argv) { // encode if at capacity if (batch.n_tokens + n_toks > n_batch || s >= llama_n_seq_max(ctx)) { - float * out = emb + p * n_embd; - batch_process(ctx, batch, out, s, n_embd); + float * out = emb + p * n_embd_out; + batch_process(ctx, batch, out, s, n_embd_out); common_batch_clear(batch); p += s; s = 0; @@ -245,12 +245,12 @@ int main(int argc, char ** argv) { } // final batch - float * out = emb + p * n_embd; - batch_process(ctx, batch, out, s, n_embd); + float * out = emb + p * n_embd_out; + batch_process(ctx, batch, out, s, n_embd_out); // save embeddings to chunks for (int i = 0; i < n_chunks; i++) { - chunks[i].embedding = std::vector(emb + i * n_embd, emb + (i + 1) * n_embd); + chunks[i].embedding = std::vector(emb + i * n_embd_out, emb + (i + 1) * n_embd_out); // clear tokens as they are no longer needed chunks[i].tokens.clear(); } @@ -266,8 +266,8 @@ int main(int argc, char ** argv) { batch_add_seq(query_batch, query_tokens, 0); - std::vector query_emb(n_embd, 0); - batch_process(ctx, query_batch, query_emb.data(), 1, n_embd); + std::vector query_emb(n_embd_out, 0); + batch_process(ctx, query_batch, query_emb.data(), 1, n_embd_out); common_batch_clear(query_batch); @@ -275,7 +275,7 @@ int main(int argc, char ** argv) { { std::vector> similarities; for (int i = 0; i < n_chunks; i++) { - float sim = common_embd_similarity_cos(chunks[i].embedding.data(), query_emb.data(), n_embd); + float sim = common_embd_similarity_cos(chunks[i].embedding.data(), query_emb.data(), n_embd_out); similarities.push_back(std::make_pair(i, sim)); } diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index 55f2f46086d..995b774c207 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -1065,6 +1065,7 @@ struct ggml_cuda_graph { int number_consecutive_updates = 0; bool cuda_graphs_enabled = false; std::vector ggml_graph_properties; + std::vector extraneous_srcs_properties; #endif }; diff --git a/ggml/src/ggml-cuda/fattn-common.cuh b/ggml/src/ggml-cuda/fattn-common.cuh index 09c19429ad8..31446787287 100644 --- a/ggml/src/ggml-cuda/fattn-common.cuh +++ b/ggml/src/ggml-cuda/fattn-common.cuh @@ -11,10 +11,12 @@ #define SOFTMAX_FTZ_THRESHOLD -20.0f // Softmax exp. of values smaller than this are flushed to zero to avoid NaNs. // log(2) = 0.6931, by adding this to the KQ maximum used for the softmax the numerical range representable -// by the VKQ accumulators is effectively being shifted up by a factor of 8. +// by the VKQ accumulators is effectively being shifted up by a factor of 2. // This reduces issues with numerical overflow but also causes larger values to be flushed to zero. // However, as the output from FlashAttention will usually be used as an input for a matrix multiplication this should be negligible. -#define FATTN_KQ_MAX_OFFSET 0.6931f +// Still, the value range should be shifted as much as necessary but as little as possible. +// The macro on the following line shifts it by a factor of 2**3=8, as was needed to fix https://github.com/ggml-org/llama.cpp/issues/18606 . +#define FATTN_KQ_MAX_OFFSET (3.0f*0.6931f) typedef void (* fattn_kernel_t)( const char * __restrict__ Q, diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 1bbca225d2b..75269170c34 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -2973,15 +2973,16 @@ static bool is_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx, } // Check if the graph size has changed - if (cuda_ctx->cuda_graph->ggml_graph_properties.size() != (size_t)cgraph->n_nodes) { + if (cuda_ctx->cuda_graph->ggml_graph_properties.size() != (size_t)cgraph->n_nodes + cgraph->n_leafs) { cuda_graph_update_required = true; - cuda_ctx->cuda_graph->ggml_graph_properties.resize(cgraph->n_nodes); + cuda_ctx->cuda_graph->ggml_graph_properties.resize(cgraph->n_nodes + cgraph->n_leafs); } // Loop over nodes in GGML graph to determine if CUDA graph update is required // and store properties to allow this comparison for the next token for (int i = 0; i < cgraph->n_nodes; i++) { bool has_matching_properties = true; + if (!cuda_graph_update_required) { has_matching_properties = ggml_graph_node_has_matching_properties(cgraph->nodes[i], &cuda_ctx->cuda_graph->ggml_graph_properties[i]); } @@ -2991,6 +2992,17 @@ static bool is_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx, set_ggml_graph_node_properties(cgraph->nodes[i], &cuda_ctx->cuda_graph->ggml_graph_properties[i]); } + for (int i = 0; i < cgraph->n_leafs; i++) { + bool has_matching_properties = true; + if (!cuda_graph_update_required) { + has_matching_properties = ggml_graph_node_has_matching_properties(cgraph->leafs[i], &cuda_ctx->cuda_graph->ggml_graph_properties[cgraph->n_nodes + i]); + } + if (!has_matching_properties) { + cuda_graph_update_required = true; + } + set_ggml_graph_node_properties(cgraph->leafs[i], &cuda_ctx->cuda_graph->ggml_graph_properties[cgraph->n_nodes + i]); + } + return cuda_graph_update_required; } diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index d0e99b6fe29..c7afdfb8e92 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -2273,6 +2273,16 @@ static void ggml_webgpu_init_unary_pipeline(webgpu_context & webgpu_ctx) { ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_xielu_inplace_f32, "xielu_inplace_f32", constants); webgpu_ctx->unary_pipelines[GGML_UNARY_OP_XIELU][GGML_TYPE_F16][1] = ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_xielu_inplace_f16, "xielu_inplace_f16", constants); + + // CEIL + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_CEIL][GGML_TYPE_F32][0] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_ceil_f32, "ceil_f32", constants); + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_CEIL][GGML_TYPE_F16][0] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_ceil_f16, "ceil_f16", constants); + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_CEIL][GGML_TYPE_F32][1] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_ceil_inplace_f32, "ceil_inplace_f32", constants); + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_CEIL][GGML_TYPE_F16][1] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_ceil_inplace_f16, "ceil_inplace_f16", constants); } static void ggml_webgpu_init_scale_pipeline(webgpu_context & webgpu_ctx) { @@ -2528,6 +2538,7 @@ static bool ggml_backend_webgpu_device_supports_op(ggml_backend_dev_t dev, const case GGML_UNARY_OP_EXP: case GGML_UNARY_OP_GELU_ERF: case GGML_UNARY_OP_XIELU: + case GGML_UNARY_OP_CEIL: supports_op = supports_op = (op->type == GGML_TYPE_F32 || op->type == GGML_TYPE_F16) && (src0->type == op->type); break; diff --git a/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl b/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl index d474ab107b4..25fe2854518 100644 --- a/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +++ b/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl @@ -16,7 +16,8 @@ "HARDSWISH_FUNC": "{{MUTATE}}[dst_i] = src[src_i] * min(1.0, max(0.0, (src[src_i] + 3.0) / 6.0));", "GELU_FUNC": "{{MUTATE}}[dst_i] = 0.5 * src[src_i] * (1.0 + tanh(clamp(sqrt(2.0 / 3.14159265) * (src[src_i] + 0.044715 * pow(src[src_i], 3.0)), -9.010913, 9.010913))); // Regarding tanh() domain restrictions in wgsl https://github.com/gpuweb/gpuweb/issues/4458", "GELU_QUICK_FUNC": "{{MUTATE}}[dst_i] = src[src_i] * 0.5 * (1.0 + tanh(clamp(0.79788456 * (src[src_i] + 0.044715 * src[src_i] * src[src_i] * src[src_i]), -9.010913, 9.010913))); // Regarding tanh() domain restrictions in wgsl https://github.com/gpuweb/gpuweb/issues/4458", - "GELU_ERF_FUNC": "{{MUTATE}}[dst_i] = 0.5 * src[src_i] * (1.0 + tanh(clamp(0.79788456 * (src[src_i] + 0.044715 * src[src_i] * src[src_i] * src[src_i]), -9.010913, 9.010913))); // Regarding tanh() domain restrictions in wgsl https://github.com/gpuweb/gpuweb/issues/4458" + "GELU_ERF_FUNC": "{{MUTATE}}[dst_i] = 0.5 * src[src_i] * (1.0 + tanh(clamp(0.79788456 * (src[src_i] + 0.044715 * src[src_i] * src[src_i] * src[src_i]), -9.010913, 9.010913))); // Regarding tanh() domain restrictions in wgsl https://github.com/gpuweb/gpuweb/issues/4458", + "CEIL_FUNC": "{{MUTATE}}[dst_i] = ceil(src[src_i]);" } #end(REPL_TEMPLATES) @@ -357,6 +358,27 @@ "SHADER_NAME": "gelu_erf_inplace_f16", "REPLS": { "TYPE": "f16", "FUNC": "GELU_ERF_FUNC", "EXT_PARAMS": "", "MUTATE": "src" }, "DECLS": ["INPLACE"] + }, + + { + "SHADER_NAME": "ceil_f32", + "REPLS": { "TYPE": "f32", "FUNC": "CEIL_FUNC", "EXT_PARAMS": "", "MUTATE": "dst" }, + "DECLS": ["NOT_INPLACE"] + }, + { + "SHADER_NAME": "ceil_f16", + "REPLS": { "TYPE": "f16", "FUNC": "CEIL_FUNC", "EXT_PARAMS": "", "MUTATE": "dst" }, + "DECLS": ["NOT_INPLACE"] + }, + { + "SHADER_NAME": "ceil_inplace_f32", + "REPLS": { "TYPE": "f32", "FUNC": "CEIL_FUNC", "EXT_PARAMS": "", "MUTATE": "src" }, + "DECLS": ["INPLACE"] + }, + { + "SHADER_NAME": "ceil_inplace_f16", + "REPLS": { "TYPE": "f16", "FUNC": "CEIL_FUNC", "EXT_PARAMS": "", "MUTATE": "src" }, + "DECLS": ["INPLACE"] } ] diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index c8feca5679b..64c227799f4 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -104,6 +104,7 @@ class LLM: VOCAB_SIZE = "{arch}.vocab_size" CONTEXT_LENGTH = "{arch}.context_length" EMBEDDING_LENGTH = "{arch}.embedding_length" + EMBEDDING_LENGTH_OUT = "{arch}.embedding_length_out" FEATURES_LENGTH = "{arch}.features_length" BLOCK_COUNT = "{arch}.block_count" LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count" @@ -3038,6 +3039,7 @@ class MODEL_TENSOR(IntEnum): MODEL_TENSOR.ATTN_V, MODEL_TENSOR.ATTN_OUT, MODEL_TENSOR.OUTPUT, + MODEL_TENSOR.DENSE_2_OUT, # LFM2-ColBert-350M ], MODEL_ARCH.LFM2MOE: [ MODEL_TENSOR.TOKEN_EMBD, diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 612a978e4c3..a7506aa7934 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -681,6 +681,9 @@ def add_context_length(self, length: int) -> None: def add_embedding_length(self, length: int) -> None: self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length) + def add_embedding_length_out(self, length: int) -> None: + self.add_uint32(Keys.LLM.EMBEDDING_LENGTH_OUT.format(arch=self.arch), length) + def add_features_length(self, length: int) -> None: self.add_uint32(Keys.LLM.FEATURES_LENGTH.format(arch=self.arch), length) diff --git a/include/llama.h b/include/llama.h index bf4ce5f927f..05cb6532542 100644 --- a/include/llama.h +++ b/include/llama.h @@ -535,6 +535,7 @@ extern "C" { LLAMA_API int32_t llama_model_n_ctx_train(const struct llama_model * model); LLAMA_API int32_t llama_model_n_embd (const struct llama_model * model); LLAMA_API int32_t llama_model_n_embd_inp (const struct llama_model * model); + LLAMA_API int32_t llama_model_n_embd_out (const struct llama_model * model); LLAMA_API int32_t llama_model_n_layer (const struct llama_model * model); LLAMA_API int32_t llama_model_n_head (const struct llama_model * model); LLAMA_API int32_t llama_model_n_head_kv (const struct llama_model * model); diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp index 93fed1a9a3c..2ead965469a 100644 --- a/src/llama-arch.cpp +++ b/src/llama-arch.cpp @@ -152,6 +152,7 @@ static const std::map LLM_KV_NAMES = { { LLM_KV_VOCAB_SIZE, "%s.vocab_size" }, { LLM_KV_CONTEXT_LENGTH, "%s.context_length" }, { LLM_KV_EMBEDDING_LENGTH, "%s.embedding_length" }, + { LLM_KV_EMBEDDING_LENGTH_OUT, "%s.embedding_length_out" }, { LLM_KV_FEATURES_LENGTH, "%s.features_length" }, { LLM_KV_BLOCK_COUNT, "%s.block_count" }, { LLM_KV_LEADING_DENSE_BLOCK_COUNT, "%s.leading_dense_block_count" }, @@ -2075,6 +2076,7 @@ static std::set llm_get_tensor_names(llm_arch arch) { LLM_TENSOR_TOKEN_EMBD, LLM_TENSOR_OUTPUT_NORM_LFM2, LLM_TENSOR_OUTPUT, + LLM_TENSOR_DENSE_2_OUT, }; case LLM_ARCH_LFM2MOE: return { diff --git a/src/llama-arch.h b/src/llama-arch.h index 57e470a9f38..68ec6a18b18 100644 --- a/src/llama-arch.h +++ b/src/llama-arch.h @@ -156,6 +156,7 @@ enum llm_kv { LLM_KV_VOCAB_SIZE, LLM_KV_CONTEXT_LENGTH, LLM_KV_EMBEDDING_LENGTH, + LLM_KV_EMBEDDING_LENGTH_OUT, LLM_KV_FEATURES_LENGTH, LLM_KV_BLOCK_COUNT, LLM_KV_LEADING_DENSE_BLOCK_COUNT, diff --git a/src/llama-context.cpp b/src/llama-context.cpp index 9c2e1c17a3a..f220010a1b4 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -758,7 +758,8 @@ float * llama_context::get_embeddings_ith(int32_t i) { throw std::runtime_error(format("corrupt output buffer (j=%" PRId64 ", n_outputs=%d)", j, n_outputs)); } - return embd + j*model.hparams.n_embd; + const uint32_t n_embd_out = model.hparams.get_n_embd_out(); + return embd + j*n_embd_out; } catch (const std::exception & err) { LLAMA_LOG_ERROR("%s: invalid embeddings id %d, reason: %s\n", __func__, i, err.what()); #ifndef NDEBUG @@ -1194,9 +1195,10 @@ int llama_context::encode(const llama_batch & batch_inp) { { // extract token embeddings GGML_ASSERT(embd != nullptr); + const uint32_t n_embd_out = hparams.get_n_embd_out(); - GGML_ASSERT(n_tokens*n_embd <= (int64_t) embd_size); - ggml_backend_tensor_get_async(backend_embd, t_embd, embd, 0, n_tokens*n_embd*sizeof(float)); + GGML_ASSERT(n_tokens*n_embd_out <= (int64_t) embd_size); + ggml_backend_tensor_get_async(backend_embd, t_embd, embd, 0, n_tokens*n_embd_out*sizeof(float)); } break; case LLAMA_POOLING_TYPE_MEAN: case LLAMA_POOLING_TYPE_CLS: @@ -1600,12 +1602,13 @@ int llama_context::decode(const llama_batch & batch_inp) { { // extract token embeddings GGML_ASSERT(embd != nullptr); - float * embd_out = embd + n_outputs_prev*n_embd; + const uint32_t n_embd_out = hparams.get_n_embd_out(); + float * embd_out = embd + n_outputs_prev*n_embd_out; if (n_outputs) { GGML_ASSERT( n_outputs_prev + n_outputs <= n_outputs_all); - GGML_ASSERT((n_outputs_prev + n_outputs)*n_embd <= (int64_t) embd_size); - ggml_backend_tensor_get_async(backend_embd, t_embd, embd_out, 0, n_outputs*n_embd*sizeof(float)); + GGML_ASSERT((n_outputs_prev + n_outputs)*n_embd_out <= (int64_t) embd_size); + ggml_backend_tensor_get_async(backend_embd, t_embd, embd_out, 0, n_outputs*n_embd_out*sizeof(float)); } } break; case LLAMA_POOLING_TYPE_MEAN: @@ -1730,9 +1733,9 @@ uint32_t llama_context::output_reserve(int32_t n_outputs, const llama_batch & ba const int64_t n_outputs_max = std::max(n_outputs, n_seq_max()); - const auto n_batch = cparams.n_batch; - const auto n_vocab = vocab.n_tokens(); - const auto n_embd = hparams.n_embd; + const auto n_batch = cparams.n_batch; + const auto n_vocab = vocab.n_tokens(); + const auto n_embd_out = hparams.get_n_embd_out(); bool has_logits = true; bool has_embd = cparams.embeddings; @@ -1773,7 +1776,7 @@ uint32_t llama_context::output_reserve(int32_t n_outputs, const llama_batch & ba // Allocate CPU logits buffer only if needed by sequences in this batch logits_size = (has_logits && cpu_logits) ? n_vocab*n_outputs_max : 0; - embd_size = has_embd ? n_embd*n_outputs_max : 0; + embd_size = has_embd ? n_embd_out*n_outputs_max : 0; // TODO: avoid this branching by working with the worst-case if (!has_sampling) { diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp index 86c54726383..374ff1ebf3a 100644 --- a/src/llama-graph.cpp +++ b/src/llama-graph.cpp @@ -2071,14 +2071,18 @@ llm_graph_input_mem_hybrid * llm_graph_context::build_inp_mem_hybrid() const { void llm_graph_context::build_dense_out( ggml_tensor * dense_2, ggml_tensor * dense_3) const { - if (!cparams.embeddings || dense_2 == nullptr || dense_3 == nullptr) { + if (!cparams.embeddings || !(dense_2 || dense_3)) { return; } ggml_tensor * cur = res->t_embd_pooled != nullptr ? res->t_embd_pooled : res->t_embd; GGML_ASSERT(cur != nullptr && "missing t_embd_pooled/t_embd"); - cur = ggml_mul_mat(ctx0, dense_2, cur); - cur = ggml_mul_mat(ctx0, dense_3, cur); + if (dense_2) { + cur = ggml_mul_mat(ctx0, dense_2, cur); + } + if (dense_3) { + cur = ggml_mul_mat(ctx0, dense_3, cur); + } cb(cur, "result_embd_pooled", -1); res->t_embd_pooled = cur; ggml_build_forward_expand(gf, cur); diff --git a/src/llama-hparams.cpp b/src/llama-hparams.cpp index fe1fa4341d4..c847ef91b7a 100644 --- a/src/llama-hparams.cpp +++ b/src/llama-hparams.cpp @@ -72,6 +72,10 @@ uint32_t llama_hparams::n_embd_inp() const { return n_embd_inp; } +uint32_t llama_hparams::get_n_embd_out() const { + return n_embd_out > 0 ? n_embd_out : n_embd; +} + uint32_t llama_hparams::n_embd_k_gqa(uint32_t il) const { const uint32_t n_head_kv = this->n_head_kv(il); diff --git a/src/llama-hparams.h b/src/llama-hparams.h index fc5708fc4b0..7ae3ec292ef 100644 --- a/src/llama-hparams.h +++ b/src/llama-hparams.h @@ -162,6 +162,9 @@ struct llama_hparams { // for Classifiers uint32_t n_cls_out = 1; + // output embedding dimension (0 = use n_embd) + uint32_t n_embd_out = 0; + // llama4 smallthinker uint32_t n_moe_layer_step = 0; uint32_t n_no_rope_layer_step = 4; @@ -234,6 +237,9 @@ struct llama_hparams { // dimension of main + auxiliary input embeddings uint32_t n_embd_inp() const; + // dimension of output embeddings + uint32_t get_n_embd_out() const; + // dimension of key embeddings across all k-v heads uint32_t n_embd_k_gqa(uint32_t il = 0) const; diff --git a/src/llama-model-saver.cpp b/src/llama-model-saver.cpp index 563823dc35d..ae27c71ce23 100644 --- a/src/llama-model-saver.cpp +++ b/src/llama-model-saver.cpp @@ -146,6 +146,9 @@ void llama_model_saver::add_kv_from_model() { add_kv(LLM_KV_VOCAB_SIZE, vocab.n_tokens()); add_kv(LLM_KV_CONTEXT_LENGTH, hparams.n_ctx_train); add_kv(LLM_KV_EMBEDDING_LENGTH, hparams.n_embd); + if (hparams.n_embd_out > 0) { + add_kv(LLM_KV_EMBEDDING_LENGTH_OUT, hparams.n_embd_out); + } add_kv(LLM_KV_BLOCK_COUNT, hparams.n_layer); add_kv(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead); add_kv(LLM_KV_FEED_FORWARD_LENGTH, hparams.n_ff_arr, true); diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 28dcc2840f0..04c48b5fd3f 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -507,6 +507,7 @@ void llama_model::load_hparams(llama_model_loader & ml) { ml.get_key(LLM_KV_CONTEXT_LENGTH, hparams.n_ctx_train); ml.get_key(LLM_KV_EMBEDDING_LENGTH, hparams.n_embd); + ml.get_key(LLM_KV_EMBEDDING_LENGTH_OUT, hparams.n_embd_out, false); ml.get_key(LLM_KV_BLOCK_COUNT, hparams.n_layer); ml.get_key(LLM_KV_EXPERT_COUNT, hparams.n_expert, false); ml.get_key(LLM_KV_EXPERT_USED_COUNT, hparams.n_expert_used, false); @@ -6469,6 +6470,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) { layer.shortconv.out_proj = create_tensor(tn(LLM_TENSOR_SHORTCONV_OUTPROJ, "weight", i), {n_embd, n_embd}, 0); } } + + // for LFM2-ColBert-350M + dense_2_out_layers = create_tensor(tn(LLM_TENSOR_DENSE_2_OUT, "weight"), {n_embd, hparams.get_n_embd_out()}, TENSOR_NOT_REQUIRED); } break; case LLM_ARCH_SMALLTHINKER: { @@ -8003,6 +8007,10 @@ int32_t llama_model_n_embd_inp(const llama_model * model) { return model->hparams.n_embd_inp(); } +int32_t llama_model_n_embd_out(const llama_model * model) { + return model->hparams.get_n_embd_out(); +} + int32_t llama_model_n_layer(const llama_model * model) { return model->hparams.n_layer; } diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index 5a6223b29cb..33635a15866 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -1505,9 +1505,9 @@ struct server_context_impl { res->n_tokens = slot.task->n_tokens(); res->res_type = slot.task->params.res_type; - const int n_embd = llama_model_n_embd(model); + const int n_embd_out = llama_model_n_embd_out(model); - std::vector embd_res(n_embd, 0.0f); + std::vector embd_res(n_embd_out, 0.0f); for (int i = 0; i < batch.n_tokens; ++i) { if (!batch.logits[i] || batch.seq_id[i][0] != slot.id) { @@ -1524,18 +1524,18 @@ struct server_context_impl { if (embd == nullptr) { SLT_ERR(slot, "failed to get embeddings, token = %d, seq_id = %d\n", batch.token[i], batch.seq_id[i][0]); - res->embedding.push_back(std::vector(n_embd, 0.0f)); + res->embedding.push_back(std::vector(n_embd_out, 0.0f)); continue; } // normalize only when there is pooling if (llama_pooling_type(slot.ctx) != LLAMA_POOLING_TYPE_NONE) { - common_embd_normalize(embd, embd_res.data(), n_embd, slot.task->params.embd_normalize); + common_embd_normalize(embd, embd_res.data(), n_embd_out, slot.task->params.embd_normalize); res->embedding.push_back(embd_res); break; } - res->embedding.emplace_back(embd, embd + n_embd); + res->embedding.emplace_back(embd, embd + n_embd_out); } SLT_DBG(slot, "%s", "sending embeddings\n");