From 798d4fc548fb8a4a232b368f690fc0c4aa7f0940 Mon Sep 17 00:00:00 2001 From: thisisayushg <94356464+thisisayushg@users.noreply.github.com> Date: Sat, 14 Mar 2026 09:26:30 +0530 Subject: [PATCH] Clear kv cache and reset tokens after chat completion --- llama_cpp/llama_chat_format.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py index f738ab9bb..f0fd0ddfd 100644 --- a/llama_cpp/llama_chat_format.py +++ b/llama_cpp/llama_chat_format.py @@ -696,6 +696,8 @@ def chat_completion_handler( return _convert_completion_to_chat_function( tool_name, completion_or_chunks, stream ) + llama.reset() + llama._ctx.kv_cache_clear() return _convert_completion_to_chat(completion_or_chunks, stream=stream) return chat_completion_handler