diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py index f738ab9bb..f0fd0ddfd 100644 --- a/llama_cpp/llama_chat_format.py +++ b/llama_cpp/llama_chat_format.py @@ -696,6 +696,8 @@ def chat_completion_handler( return _convert_completion_to_chat_function( tool_name, completion_or_chunks, stream ) + llama.reset() + llama._ctx.kv_cache_clear() return _convert_completion_to_chat(completion_or_chunks, stream=stream) return chat_completion_handler