|
|
|
|
@@ -6,6 +6,7 @@ import (
|
|
|
|
|
"io"
|
|
|
|
|
"mime"
|
|
|
|
|
"mime/multipart"
|
|
|
|
|
"mime/quotedprintable"
|
|
|
|
|
"net/mail"
|
|
|
|
|
"paraclub-ai-mailer/config"
|
|
|
|
|
"paraclub-ai-mailer/internal/logger"
|
|
|
|
|
@@ -73,6 +74,7 @@ type Email struct {
|
|
|
|
|
ID string
|
|
|
|
|
Subject string
|
|
|
|
|
From string
|
|
|
|
|
Date time.Time
|
|
|
|
|
Body string
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@@ -194,6 +196,7 @@ func (ic *IMAPClient) FetchUnprocessedEmails() ([]Email, error) {
|
|
|
|
|
ID: msg.Envelope.MessageId,
|
|
|
|
|
Subject: msg.Envelope.Subject,
|
|
|
|
|
From: from,
|
|
|
|
|
Date: msg.Envelope.Date,
|
|
|
|
|
Body: bodyBuilder.String(),
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
@@ -259,7 +262,7 @@ func (ic *IMAPClient) SaveDraft(email Email, response string) (err error) {
|
|
|
|
|
response,
|
|
|
|
|
email.From,
|
|
|
|
|
email.Subject,
|
|
|
|
|
time.Now().Format("Mon, 02 Jan 2006 15:04:05 -0700"),
|
|
|
|
|
email.Date.Format("Mon, 02 Jan 2006 15:04:05 -0700"),
|
|
|
|
|
extractMessageContent(email.Body))
|
|
|
|
|
|
|
|
|
|
literal := &MessageLiteral{
|
|
|
|
|
@@ -285,17 +288,20 @@ func (ic *IMAPClient) SaveDraft(email Email, response string) (err error) {
|
|
|
|
|
// by removing email headers and MIME boundaries
|
|
|
|
|
func extractMessageContent(body string) string {
|
|
|
|
|
logger.WithField("bodyLength", len(body)).Debug("Starting message content extraction")
|
|
|
|
|
logger.WithField("rawInputBody", body).Debug("extractMessageContent: Raw input body")
|
|
|
|
|
msg, err := mail.ReadMessage(strings.NewReader(body))
|
|
|
|
|
if err != nil {
|
|
|
|
|
logger.WithFields(logrus.Fields{
|
|
|
|
|
"error": err,
|
|
|
|
|
"bodyLength": len(body),
|
|
|
|
|
}).Debug("Failed to parse email message, falling back to simple extraction")
|
|
|
|
|
return fallbackExtractContent(body)
|
|
|
|
|
// When ReadMessage fails, the body is raw, so header stripping is needed.
|
|
|
|
|
return cleanMessageContent(fallbackExtractContent(body), true)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
contentTypeHeader := msg.Header.Get("Content-Type")
|
|
|
|
|
logger.WithField("contentTypeHeader", contentTypeHeader).Debug("Got Content-Type header")
|
|
|
|
|
logger.WithField("parsedContentTypeHeader", contentTypeHeader).Debug("extractMessageContent: Parsed Content-Type header")
|
|
|
|
|
|
|
|
|
|
mediaType, params, err := mime.ParseMediaType(contentTypeHeader)
|
|
|
|
|
if err != nil {
|
|
|
|
|
@@ -303,34 +309,61 @@ func extractMessageContent(body string) string {
|
|
|
|
|
"error": err,
|
|
|
|
|
"contentTypeHeader": contentTypeHeader,
|
|
|
|
|
}).Debug("Failed to parse Content-Type header, falling back to simple extraction")
|
|
|
|
|
return fallbackExtractContent(body)
|
|
|
|
|
// When ParseMediaType fails, the body is raw, so header stripping is needed.
|
|
|
|
|
return cleanMessageContent(fallbackExtractContent(body), true)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
logger.WithFields(logrus.Fields{
|
|
|
|
|
"mediaType": mediaType,
|
|
|
|
|
"params": params,
|
|
|
|
|
}).Debug("Parsed message Content-Type")
|
|
|
|
|
logger.WithFields(logrus.Fields{
|
|
|
|
|
"mediaType": mediaType,
|
|
|
|
|
"params": params,
|
|
|
|
|
}).Debug("extractMessageContent: Parsed mediaType and params")
|
|
|
|
|
|
|
|
|
|
var content string
|
|
|
|
|
if strings.HasPrefix(mediaType, "multipart/") {
|
|
|
|
|
// For multipart, the handling of Content-Transfer-Encoding will be done within handleMultipartMessage for each part
|
|
|
|
|
logger.Debug("extractMessageContent: Handling as multipart message")
|
|
|
|
|
content = handleMultipartMessage(msg.Body, params["boundary"])
|
|
|
|
|
logger.WithField("contentFromMultipart", content).Debug("extractMessageContent: Content after handleMultipartMessage")
|
|
|
|
|
} else {
|
|
|
|
|
content = handleSinglePartMessage(msg.Body)
|
|
|
|
|
// For single part, handle Content-Transfer-Encoding here
|
|
|
|
|
var partReader io.Reader = msg.Body
|
|
|
|
|
transferEncoding := strings.ToLower(msg.Header.Get("Content-Transfer-Encoding"))
|
|
|
|
|
if transferEncoding == "quoted-printable" {
|
|
|
|
|
partReader = quotedprintable.NewReader(msg.Body)
|
|
|
|
|
}
|
|
|
|
|
// Add handling for "base64" if needed in the future
|
|
|
|
|
// else if transferEncoding == "base64" {
|
|
|
|
|
// partReader = base64.NewDecoder(base64.StdEncoding, msg.Body)
|
|
|
|
|
// }
|
|
|
|
|
logger.Debug("extractMessageContent: Handling as single part message")
|
|
|
|
|
content = handleSinglePartMessage(partReader)
|
|
|
|
|
logger.WithField("contentFromSinglePart", content).Debug("extractMessageContent: Content after handleSinglePartMessage")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if content == "" {
|
|
|
|
|
logger.Debug("No content extracted, falling back to simple extraction")
|
|
|
|
|
return fallbackExtractContent(body)
|
|
|
|
|
logger.Debug("extractMessageContent: No content from primary extraction, falling back.")
|
|
|
|
|
// When primary extraction yields no content, the body is raw, so header stripping is needed.
|
|
|
|
|
return cleanMessageContent(fallbackExtractContent(body), true)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Clean up the content
|
|
|
|
|
content = cleanMessageContent(content)
|
|
|
|
|
logger.WithField("contentBeforeClean", content).Debug("extractMessageContent: Content before cleanMessageContent")
|
|
|
|
|
// When ReadMessage succeeds, 'content' is already the message body (or part body),
|
|
|
|
|
// so no further header stripping should be done.
|
|
|
|
|
content = cleanMessageContent(content, false)
|
|
|
|
|
logger.WithField("contentAfterClean", content).Debug("extractMessageContent: Content after cleanMessageContent")
|
|
|
|
|
|
|
|
|
|
logger.WithField("contentLength", len(content)).Debug("Successfully extracted and cleaned message content")
|
|
|
|
|
return content
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func handleMultipartMessage(reader io.Reader, boundary string) string {
|
|
|
|
|
logger.WithField("boundary", boundary).Debug("handleMultipartMessage: Starting with boundary")
|
|
|
|
|
if boundary == "" {
|
|
|
|
|
logger.Debug("No boundary found in multipart message")
|
|
|
|
|
return ""
|
|
|
|
|
@@ -351,89 +384,128 @@ func handleMultipartMessage(reader io.Reader, boundary string) string {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
contentType := part.Header.Get("Content-Type")
|
|
|
|
|
contentTransferEncoding := strings.ToLower(part.Header.Get("Content-Transfer-Encoding"))
|
|
|
|
|
logger.WithFields(logrus.Fields{
|
|
|
|
|
"partIndex": partIndex,
|
|
|
|
|
"partContentType": contentType,
|
|
|
|
|
"partTransferEncoding": contentTransferEncoding,
|
|
|
|
|
"partHeaders": part.Header,
|
|
|
|
|
}).Debug("handleMultipartMessage: Processing part")
|
|
|
|
|
|
|
|
|
|
if strings.HasPrefix(contentType, "text/plain") {
|
|
|
|
|
var partReader io.Reader = part
|
|
|
|
|
if contentTransferEncoding == "quoted-printable" {
|
|
|
|
|
partReader = quotedprintable.NewReader(part)
|
|
|
|
|
}
|
|
|
|
|
// Add handling for "base64" if needed in the future
|
|
|
|
|
// else if contentTransferEncoding == "base64" {
|
|
|
|
|
// partReader = base64.NewDecoder(base64.StdEncoding, part)
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
buf := new(bytes.Buffer)
|
|
|
|
|
if _, err := buf.ReadFrom(part); err != nil {
|
|
|
|
|
continue
|
|
|
|
|
if _, err := buf.ReadFrom(partReader); err != nil {
|
|
|
|
|
logger.WithError(err).WithField("partIndex", partIndex).Debug("Failed to read from partReader in multipart")
|
|
|
|
|
continue // Or handle error more robustly
|
|
|
|
|
}
|
|
|
|
|
textContent = buf.String()
|
|
|
|
|
// Assuming we only care about the first text/plain part found
|
|
|
|
|
// If multiple text/plain parts could exist and need concatenation, this logic would need adjustment.
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
partIndex++
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
logger.WithField("textContentResult", textContent).Debug("handleMultipartMessage: Returning textContent")
|
|
|
|
|
return textContent
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func handleSinglePartMessage(reader io.Reader) string {
|
|
|
|
|
logger.Debug("handleSinglePartMessage: Starting")
|
|
|
|
|
buf := new(bytes.Buffer)
|
|
|
|
|
if _, err := buf.ReadFrom(reader); err != nil {
|
|
|
|
|
logger.WithError(err).Debug("Failed to read message body")
|
|
|
|
|
return ""
|
|
|
|
|
}
|
|
|
|
|
return buf.String()
|
|
|
|
|
content := buf.String()
|
|
|
|
|
logger.WithField("readContent", content).Debug("handleSinglePartMessage: Content read from reader")
|
|
|
|
|
return content
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cleanMessageContent(content string) string {
|
|
|
|
|
// Remove any remaining email headers that might be in the body
|
|
|
|
|
lines := strings.Split(content, "\n")
|
|
|
|
|
var cleanLines []string
|
|
|
|
|
headerSection := true
|
|
|
|
|
func cleanMessageContent(content string, performHeaderStripping bool) string {
|
|
|
|
|
logger.WithField("inputContentLength", len(content)).Debug("cleanMessageContent: Starting")
|
|
|
|
|
logger.WithField("performHeaderStripping", performHeaderStripping).Debug("cleanMessageContent: performHeaderStripping flag")
|
|
|
|
|
|
|
|
|
|
for _, line := range lines {
|
|
|
|
|
trimmed := strings.TrimSpace(line)
|
|
|
|
|
if performHeaderStripping {
|
|
|
|
|
// Remove any remaining email headers that might be in the body
|
|
|
|
|
lines := strings.Split(content, "\n")
|
|
|
|
|
var cleanLines []string
|
|
|
|
|
headerSection := true
|
|
|
|
|
|
|
|
|
|
// Empty line marks the end of headers
|
|
|
|
|
if headerSection && trimmed == "" {
|
|
|
|
|
headerSection = false
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
for _, line := range lines {
|
|
|
|
|
trimmed := strings.TrimSpace(line)
|
|
|
|
|
|
|
|
|
|
// Skip header lines
|
|
|
|
|
if headerSection && (strings.Contains(trimmed, ":") || trimmed == "") {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
// Empty line marks the end of headers
|
|
|
|
|
if headerSection && trimmed == "" {
|
|
|
|
|
headerSection = false
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Add non-header lines
|
|
|
|
|
if !headerSection {
|
|
|
|
|
// Skip header lines
|
|
|
|
|
if headerSection && (strings.Contains(trimmed, ":") || trimmed == "") {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Add non-header lines
|
|
|
|
|
// This condition was originally !headerSection, but if we are past the headers,
|
|
|
|
|
// we should always add the line. If headerSection is still true here, it means
|
|
|
|
|
// it's the first line of content after potential headers were skipped.
|
|
|
|
|
cleanLines = append(cleanLines, line)
|
|
|
|
|
}
|
|
|
|
|
content = strings.Join(cleanLines, "\n")
|
|
|
|
|
logger.WithField("contentAfterHeaderStripLength", len(content)).Debug("cleanMessageContent: Content after header stripping")
|
|
|
|
|
} else {
|
|
|
|
|
logger.Debug("cleanMessageContent: Skipping header stripping")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
content = strings.Join(cleanLines, "\n")
|
|
|
|
|
|
|
|
|
|
// Convert newlines to HTML breaks for display
|
|
|
|
|
content = strings.ReplaceAll(content, "\r\n", "<br>\n")
|
|
|
|
|
// First, normalize all newlines (\r\n or \n) to just \n
|
|
|
|
|
content = strings.ReplaceAll(content, "\r\n", "\n")
|
|
|
|
|
// Then, replace each \n with <br>\n
|
|
|
|
|
content = strings.ReplaceAll(content, "\n", "<br>\n")
|
|
|
|
|
logger.WithField("contentAfterNewlineConversionLength", len(content)).Debug("cleanMessageContent: Content after newline conversion")
|
|
|
|
|
|
|
|
|
|
// Remove any remaining email signature markers
|
|
|
|
|
content = strings.Split(content, "\n-- ")[0]
|
|
|
|
|
content = strings.Split(content, "<br>-- ")[0]
|
|
|
|
|
|
|
|
|
|
return strings.TrimSpace(content)
|
|
|
|
|
finalContent := strings.TrimSpace(content)
|
|
|
|
|
logger.WithField("finalCleanedContentLength", len(finalContent)).Debug("cleanMessageContent: Returning final cleaned content")
|
|
|
|
|
return finalContent
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// fallbackExtractContent is the previous implementation used as fallback
|
|
|
|
|
func fallbackExtractContent(body string) string {
|
|
|
|
|
logger.WithField("bodyLength", len(body)).Debug("Using fallback content extraction method")
|
|
|
|
|
logger.WithField("rawInputBodyFallbackLength", len(body)).Debug("fallbackExtractContent: Raw input body")
|
|
|
|
|
var content string
|
|
|
|
|
parts := strings.Split(body, "\r\n\r\n")
|
|
|
|
|
if len(parts) > 1 {
|
|
|
|
|
content := strings.Join(parts[1:], "\r\n\r\n")
|
|
|
|
|
content = strings.ReplaceAll(content, "\r\n", "<br>\n")
|
|
|
|
|
content = strings.ReplaceAll(content, "\n", "<br>\n")
|
|
|
|
|
content = strings.Join(parts[1:], "\r\n\r\n")
|
|
|
|
|
logger.WithFields(logrus.Fields{
|
|
|
|
|
"contentLength": len(content),
|
|
|
|
|
"partsCount": len(parts),
|
|
|
|
|
}).Debug("Successfully extracted content using fallback method")
|
|
|
|
|
return content
|
|
|
|
|
}).Debug("Successfully extracted content using fallback method (from parts)")
|
|
|
|
|
logger.WithField("extractedContentFallbackLength", len(content)).Debug("fallbackExtractContent: Content from splitting parts")
|
|
|
|
|
} else {
|
|
|
|
|
content = body
|
|
|
|
|
logger.WithFields(logrus.Fields{
|
|
|
|
|
"contentLength": len(content),
|
|
|
|
|
"fullBody": true,
|
|
|
|
|
}).Debug("Using full body as content in fallback method")
|
|
|
|
|
logger.WithField("finalContentFallbackLength", len(content)).Debug("fallbackExtractContent: Final content from full body (no parts split)")
|
|
|
|
|
}
|
|
|
|
|
content := body
|
|
|
|
|
content = strings.ReplaceAll(content, "\r\n", "<br>\n")
|
|
|
|
|
content = strings.ReplaceAll(content, "\n", "<br>\n")
|
|
|
|
|
logger.WithFields(logrus.Fields{
|
|
|
|
|
"contentLength": len(content),
|
|
|
|
|
"fullBody": true,
|
|
|
|
|
}).Debug("Using full body as content in fallback method")
|
|
|
|
|
// Newline conversion and signature stripping will be handled by cleanMessageContent,
|
|
|
|
|
// which is now called by the callers of fallbackExtractContent.
|
|
|
|
|
return content
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|