feat: change token calculation
This commit is contained in:
parent
07eceb4bee
commit
1deae056b7
1 changed files with 10 additions and 2 deletions
|
|
@ -179,8 +179,16 @@ end
|
|||
|
||||
-- New token estimation function.
|
||||
local function estimate_token_count(text)
|
||||
-- Use a simple heuristic: assume an average of 4 characters per token.
|
||||
return math.floor(#text / 4)
|
||||
-- Instead of simply dividing the character count by 4,
|
||||
-- split the text into non-whitespace chunks and further split each chunk
|
||||
-- into sequences of alphanumeric characters and punctuation.
|
||||
local token_count = 0
|
||||
for chunk in text:gmatch("%S+") do
|
||||
for token in chunk:gmatch("(%w+|%p+)") do
|
||||
token_count = token_count + 1
|
||||
end
|
||||
end
|
||||
return token_count
|
||||
end
|
||||
|
||||
local function handle_step_by_step_if_needed(prompt, conf)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue