From 798d4fc548fb8a4a232b368f690fc0c4aa7f0940 Mon Sep 17 00:00:00 2001
From: thisisayushg <94356464+thisisayushg@users.noreply.github.com>
Date: Sat, 14 Mar 2026 09:26:30 +0530
Subject: [PATCH] Clear kv cache and reset tokens after chat completion

---
 llama_cpp/llama_chat_format.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py
index f738ab9bb..f0fd0ddfd 100644
--- a/llama_cpp/llama_chat_format.py
+++ b/llama_cpp/llama_chat_format.py
@@ -696,6 +696,8 @@ def chat_completion_handler(
             return _convert_completion_to_chat_function(
                 tool_name, completion_or_chunks, stream
             )
+        llama.reset()
+        llama._ctx.kv_cache_clear()
         return _convert_completion_to_chat(completion_or_chunks, stream=stream)
 
     return chat_completion_handler