feat: change max token handling
This commit is contained in:
@@ -373,7 +373,8 @@ func handleMultipartMessage(reader io.Reader, boundary string) string {
|
||||
}
|
||||
|
||||
mReader := multipart.NewReader(reader, boundary)
|
||||
var textContent string
|
||||
var textPlainContent string
|
||||
var textHTMLContent string
|
||||
partIndex := 0
|
||||
|
||||
for {
|
||||
@@ -387,39 +388,75 @@ func handleMultipartMessage(reader io.Reader, boundary string) string {
|
||||
}
|
||||
|
||||
contentType := part.Header.Get("Content-Type")
|
||||
contentDisposition := part.Header.Get("Content-Disposition")
|
||||
contentTransferEncoding := strings.ToLower(part.Header.Get("Content-Transfer-Encoding"))
|
||||
|
||||
logger.WithFields(logrus.Fields{
|
||||
"partIndex": partIndex,
|
||||
"partContentType": contentType,
|
||||
"partDisposition": contentDisposition,
|
||||
"partTransferEncoding": contentTransferEncoding,
|
||||
"partHeaders": part.Header,
|
||||
}).Debug("handleMultipartMessage: Processing part")
|
||||
|
||||
if strings.HasPrefix(contentType, "text/plain") {
|
||||
var partReader io.Reader = part
|
||||
if contentTransferEncoding == "quoted-printable" {
|
||||
partReader = quotedprintable.NewReader(part)
|
||||
}
|
||||
// Add handling for "base64" if needed in the future
|
||||
// else if contentTransferEncoding == "base64" {
|
||||
// partReader = base64.NewDecoder(base64.StdEncoding, part)
|
||||
// }
|
||||
|
||||
buf := new(bytes.Buffer)
|
||||
if _, err := buf.ReadFrom(partReader); err != nil {
|
||||
logger.WithError(err).WithField("partIndex", partIndex).Debug("Failed to read from partReader in multipart")
|
||||
continue // Or handle error more robustly
|
||||
}
|
||||
textContent = buf.String()
|
||||
// Assuming we only care about the first text/plain part found
|
||||
// If multiple text/plain parts could exist and need concatenation, this logic would need adjustment.
|
||||
break
|
||||
// Skip attachments
|
||||
if strings.HasPrefix(contentDisposition, "attachment") {
|
||||
logger.WithFields(logrus.Fields{
|
||||
"partIndex": partIndex,
|
||||
"filename": part.FileName(),
|
||||
}).Debug("Skipping attachment part")
|
||||
partIndex++
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip non-text content types (images, videos, applications, etc.)
|
||||
if !strings.HasPrefix(contentType, "text/plain") &&
|
||||
!strings.HasPrefix(contentType, "text/html") {
|
||||
logger.WithFields(logrus.Fields{
|
||||
"partIndex": partIndex,
|
||||
"contentType": contentType,
|
||||
}).Debug("Skipping non-text content type")
|
||||
partIndex++
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle quoted-printable encoding
|
||||
var partReader io.Reader = part
|
||||
if contentTransferEncoding == "quoted-printable" {
|
||||
partReader = quotedprintable.NewReader(part)
|
||||
}
|
||||
|
||||
buf := new(bytes.Buffer)
|
||||
if _, err := buf.ReadFrom(partReader); err != nil {
|
||||
logger.WithError(err).WithField("partIndex", partIndex).Debug("Failed to read from partReader in multipart")
|
||||
partIndex++
|
||||
continue
|
||||
}
|
||||
|
||||
// Store text/plain and text/html separately
|
||||
if strings.HasPrefix(contentType, "text/plain") {
|
||||
textPlainContent = buf.String()
|
||||
logger.WithField("textPlainLength", len(textPlainContent)).Debug("Found text/plain part")
|
||||
} else if strings.HasPrefix(contentType, "text/html") {
|
||||
textHTMLContent = buf.String()
|
||||
logger.WithField("textHTMLLength", len(textHTMLContent)).Debug("Found text/html part")
|
||||
}
|
||||
|
||||
partIndex++
|
||||
}
|
||||
|
||||
logger.WithField("textContentResult", textContent).Debug("handleMultipartMessage: Returning textContent")
|
||||
return textContent
|
||||
// Prefer text/plain over text/html
|
||||
if textPlainContent != "" {
|
||||
logger.Debug("handleMultipartMessage: Returning text/plain content")
|
||||
return textPlainContent
|
||||
}
|
||||
|
||||
if textHTMLContent != "" {
|
||||
logger.Debug("handleMultipartMessage: Converting text/html to plain text")
|
||||
return htmlToPlainText(textHTMLContent)
|
||||
}
|
||||
|
||||
logger.Debug("handleMultipartMessage: No text content found")
|
||||
return ""
|
||||
}
|
||||
|
||||
func handleSinglePartMessage(reader io.Reader) string {
|
||||
@@ -434,6 +471,48 @@ func handleSinglePartMessage(reader io.Reader) string {
|
||||
return content
|
||||
}
|
||||
|
||||
// htmlToPlainText converts HTML content to plain text by extracting text nodes
|
||||
func htmlToPlainText(htmlContent string) string {
|
||||
logger.WithField("htmlLength", len(htmlContent)).Debug("Converting HTML to plain text")
|
||||
|
||||
// Simple HTML tag stripping - removes all HTML tags and extracts text
|
||||
var result strings.Builder
|
||||
inTag := false
|
||||
|
||||
for _, char := range htmlContent {
|
||||
switch char {
|
||||
case '<':
|
||||
inTag = true
|
||||
case '>':
|
||||
inTag = false
|
||||
default:
|
||||
if !inTag {
|
||||
result.WriteRune(char)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
plainText := result.String()
|
||||
|
||||
// Clean up excessive whitespace
|
||||
plainText = strings.ReplaceAll(plainText, "\r\n", "\n")
|
||||
plainText = strings.ReplaceAll(plainText, "\r", "\n")
|
||||
|
||||
// Replace multiple spaces with single space
|
||||
lines := strings.Split(plainText, "\n")
|
||||
var cleanLines []string
|
||||
for _, line := range lines {
|
||||
trimmed := strings.TrimSpace(line)
|
||||
if trimmed != "" {
|
||||
cleanLines = append(cleanLines, trimmed)
|
||||
}
|
||||
}
|
||||
|
||||
result2 := strings.Join(cleanLines, "\n")
|
||||
logger.WithField("plainTextLength", len(result2)).Debug("HTML converted to plain text")
|
||||
return result2
|
||||
}
|
||||
|
||||
func cleanMessageContent(content string, performHeaderStripping bool) string {
|
||||
logger.WithField("inputContentLength", len(content)).Debug("cleanMessageContent: Starting")
|
||||
logger.WithField("performHeaderStripping", performHeaderStripping).Debug("cleanMessageContent: performHeaderStripping flag")
|
||||
|
||||
Reference in New Issue
Block a user