From b7e4c8d6ac5825b7964d1d341731328220ae1412 Mon Sep 17 00:00:00 2001 From: Dominik Polakovics Date: Thu, 13 Nov 2025 11:11:53 +0100 Subject: [PATCH] feat: better handling of oversized mails, and add junk option --- cmd/paraclub-ai-mailer/main.go | 55 ++++++++++++++++++++++++++++++---- config.yaml.example | 4 +++ config/config.go | 44 ++++++++++++++++----------- internal/imap/imap.go | 6 ++-- 4 files changed, 82 insertions(+), 27 deletions(-) diff --git a/cmd/paraclub-ai-mailer/main.go b/cmd/paraclub-ai-mailer/main.go index ab9d94f..dc80d5e 100644 --- a/cmd/paraclub-ai-mailer/main.go +++ b/cmd/paraclub-ai-mailer/main.go @@ -116,22 +116,64 @@ func processEmails(imapClient *imap.IMAPClient, fetcher *fetcher.Fetcher, aiProc logger.WithField("contextCount", len(contextContent)).Debug("Successfully fetched context content") // Process each email - var processedCount, errorCount int + var processedCount, errorCount, skippedCount int for _, email := range emails { + emailBodySize := len(email.Body) logger.WithFields(logrus.Fields{ - "subject": email.Subject, - "from": email.From, - "messageId": email.ID, + "subject": email.Subject, + "from": email.From, + "messageId": email.ID, + "bodySizeBytes": emailBodySize, }).Info("Processing email") - // Generate AI response - response, err := aiProcessor.GenerateReply(email.Body, contextContent) + // Check email size limit + if cfg.Processing.MaxEmailSizeBytes > 0 && emailBodySize > cfg.Processing.MaxEmailSizeBytes { + logger.WithFields(logrus.Fields{ + "subject": email.Subject, + "from": email.From, + "bodySizeBytes": emailBodySize, + "maxSizeBytes": cfg.Processing.MaxEmailSizeBytes, + }).Warn("Email body exceeds size limit, skipping") + skippedCount++ + + // Mark as AI-processed to prevent reprocessing + if markErr := imapClient.MarkAsAIProcessed(email); markErr != nil { + logger.WithFields(logrus.Fields{ + "subject": email.Subject, + "error": markErr, + }).Error("Failed to mark oversized email as AI-processed") + } + continue + } + + // Extract clean email content (remove MIME boundaries, headers, etc.) + cleanEmailContent := imap.ExtractMessageContent(email.Body) + cleanContentSize := len(cleanEmailContent) + logger.WithFields(logrus.Fields{ + "subject": email.Subject, + "rawSize": emailBodySize, + "cleanSize": cleanContentSize, + "sizeReduction": emailBodySize - cleanContentSize, + }).Debug("Extracted clean email content") + + // Generate AI response with clean content + response, err := aiProcessor.GenerateReply(cleanEmailContent, contextContent) if err != nil { logger.WithFields(logrus.Fields{ "subject": email.Subject, "error": err, }).Error("Failed to generate reply") errorCount++ + + // Mark as AI-processed even on failure to prevent infinite retry loop + if markErr := imapClient.MarkAsAIProcessed(email); markErr != nil { + logger.WithFields(logrus.Fields{ + "subject": email.Subject, + "error": markErr, + }).Error("Failed to mark failed email as AI-processed") + } else { + logger.WithField("subject", email.Subject).Info("Marked failed email as AI-processed to prevent reprocessing") + } continue } logger.WithField("responseLength", len(response)).Debug("Generated AI response") @@ -180,5 +222,6 @@ func processEmails(imapClient *imap.IMAPClient, fetcher *fetcher.Fetcher, aiProc "totalEmails": len(emails), "processed": processedCount, "errors": errorCount, + "skipped": skippedCount, }).Info("Completed email processing cycle") } diff --git a/config.yaml.example b/config.yaml.example index 61611d3..7d24cd6 100644 --- a/config.yaml.example +++ b/config.yaml.example @@ -23,6 +23,10 @@ context: polling: interval: "5m" # Examples: "30s", "1m", "1h" +processing: + max_email_size_bytes: 102400 # Maximum email body size in bytes (100KB), 0 = no limit + skip_junk_emails: false # Skip emails marked as junk/spam (not yet implemented) + logging: level: "info" # Options: "debug", "info", "warn", "error" file_path: "paraclub-ai-mailer.log" \ No newline at end of file diff --git a/config/config.go b/config/config.go index 7c1e37c..1606039 100644 --- a/config/config.go +++ b/config/config.go @@ -11,11 +11,12 @@ import ( ) type Config struct { - IMAP IMAPConfig `yaml:"imap"` - AI AIConfig `yaml:"ai"` - Context ContextConfig `yaml:"context"` - Polling PollingConfig `yaml:"polling"` - Logging LoggingConfig `yaml:"logging"` + IMAP IMAPConfig `yaml:"imap"` + AI AIConfig `yaml:"ai"` + Context ContextConfig `yaml:"context"` + Polling PollingConfig `yaml:"polling"` + Logging LoggingConfig `yaml:"logging"` + Processing ProcessingConfig `yaml:"processing"` } type IMAPConfig struct { @@ -49,6 +50,11 @@ type LoggingConfig struct { FilePath string `yaml:"file_path"` } +type ProcessingConfig struct { + MaxEmailSizeBytes int `yaml:"max_email_size_bytes"` // Maximum email body size in bytes (0 = no limit) + SkipJunkEmails bool `yaml:"skip_junk_emails"` // Skip emails marked as junk/spam +} + func readFileContent(path string) (string, error) { content, err := os.ReadFile(path) if err != nil { @@ -106,19 +112,21 @@ func Load(path string) (*Config, error) { } logger.WithFields(logrus.Fields{ - "imapServer": config.IMAP.Server, - "imapPort": config.IMAP.Port, - "imapUsername": config.IMAP.Username, - "imapMailboxIn": config.IMAP.MailboxIn, - "imapDraftBox": config.IMAP.DraftBox, - "imapUseTLS": config.IMAP.UseTLS, - "aiModel": config.AI.Model, - "aiTemperature": config.AI.Temperature, - "aiMaxTokens": config.AI.MaxTokens, - "contextUrlCount": len(config.Context.URLs), - "pollingInterval": config.Polling.Interval, - "loggingLevel": config.Logging.Level, - "loggingFilePath": config.Logging.FilePath, + "imapServer": config.IMAP.Server, + "imapPort": config.IMAP.Port, + "imapUsername": config.IMAP.Username, + "imapMailboxIn": config.IMAP.MailboxIn, + "imapDraftBox": config.IMAP.DraftBox, + "imapUseTLS": config.IMAP.UseTLS, + "aiModel": config.AI.Model, + "aiTemperature": config.AI.Temperature, + "aiMaxTokens": config.AI.MaxTokens, + "contextUrlCount": len(config.Context.URLs), + "pollingInterval": config.Polling.Interval, + "loggingLevel": config.Logging.Level, + "loggingFilePath": config.Logging.FilePath, + "maxEmailSizeBytes": config.Processing.MaxEmailSizeBytes, + "skipJunkEmails": config.Processing.SkipJunkEmails, }).Debug("Configuration loaded successfully") return &config, nil diff --git a/internal/imap/imap.go b/internal/imap/imap.go index 6daf25e..c233dc6 100644 --- a/internal/imap/imap.go +++ b/internal/imap/imap.go @@ -266,7 +266,7 @@ func (ic *IMAPClient) SaveDraft(email Email, response string) (err error) { email.From, email.Subject, email.Date.Format("Mon, 02 Jan 2006 15:04:05 -0700"), - extractMessageContent(email.Body)) + ExtractMessageContent(email.Body)) literal := &MessageLiteral{ content: []byte(draft), @@ -287,9 +287,9 @@ func (ic *IMAPClient) SaveDraft(email Email, response string) (err error) { return nil } -// extractMessageContent attempts to extract just the message content +// ExtractMessageContent attempts to extract just the message content // by removing email headers and MIME boundaries -func extractMessageContent(body string) string { +func ExtractMessageContent(body string) string { logger.WithField("bodyLength", len(body)).Debug("Starting message content extraction") logger.WithField("rawInputBody", body).Debug("extractMessageContent: Raw input body") msg, err := mail.ReadMessage(strings.NewReader(body))