From 1deae056b73a0f089f11d47c0aeed607ff85d974 Mon Sep 17 00:00:00 2001 From: Dominik Polakovics Date: Thu, 13 Feb 2025 11:49:07 +0100 Subject: [PATCH] feat: change token calculation --- lua/chatgpt_nvim/init.lua | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/lua/chatgpt_nvim/init.lua b/lua/chatgpt_nvim/init.lua index a8a3a08..fcc3dfd 100644 --- a/lua/chatgpt_nvim/init.lua +++ b/lua/chatgpt_nvim/init.lua @@ -179,8 +179,16 @@ end -- New token estimation function. local function estimate_token_count(text) - -- Use a simple heuristic: assume an average of 4 characters per token. - return math.floor(#text / 4) + -- Instead of simply dividing the character count by 4, + -- split the text into non-whitespace chunks and further split each chunk + -- into sequences of alphanumeric characters and punctuation. + local token_count = 0 + for chunk in text:gmatch("%S+") do + for token in chunk:gmatch("(%w+|%p+)") do + token_count = token_count + 1 + end + end + return token_count end local function handle_step_by_step_if_needed(prompt, conf)