From 0ec3c4a95763be1b675317f4cac5233adc8e0b7a Mon Sep 17 00:00:00 2001
From: Victor Biederbeck <victor@moria.hiddencove.xyz>
Date: Sat, 14 Mar 2026 11:17:46 -0700
Subject: [PATCH] feat: expose attention_type parameter in Llama.__init__

---
 llama_cpp/llama.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
index 71d94ebd8..6f6184afb 100644
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@@ -81,6 +81,7 @@ def __init__(
             int
         ] = llama_cpp.LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED,
         pooling_type: int = llama_cpp.LLAMA_POOLING_TYPE_UNSPECIFIED,
+        attention_type: int = llama_cpp.LLAMA_ATTENTION_TYPE_UNSPECIFIED,
         rope_freq_base: float = 0.0,
         rope_freq_scale: float = 0.0,
         yarn_ext_factor: float = -1.0,
@@ -319,6 +320,7 @@ def __init__(
             else llama_cpp.LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED
         )
         self.context_params.pooling_type = pooling_type
+        self.context_params.attention_type = attention_type
         self.context_params.rope_freq_base = (
             rope_freq_base if rope_freq_base != 0.0 else 0
         )