Files
raven/internal/message/message.go

530 lines
13 KiB
Go
Raw Permalink Normal View History

// Package message handles email message parsing and reply composition.
package message
import (
"bytes"
"encoding/base64"
"errors"
"fmt"
"io"
"log/slog"
"strings"
"time"
"github.com/emersion/go-imap/v2"
"github.com/emersion/go-imap/v2/imapclient"
_ "github.com/emersion/go-message/charset"
"github.com/emersion/go-message/mail"
openai "github.com/sashabaranov/go-openai"
)
// RFC5322 defines the date format specified in RFC 5322 §3.3.
const RFC5322 = "Mon, 2 Jan 2006 15:04:05 -0700"
// Supported image MIME types for vision models.
var supportedImageTypes = map[string]bool{
"image/png": true,
"image/jpeg": true,
"image/gif": true,
"image/webp": true,
}
// maxPartSize is the maximum size in bytes for a single MIME part (32MB).
const maxPartSize = 32 << 20
// Part represents a single piece of message content.
type Part struct {
// Content holds text content for text parts, empty for images.
Content string
// ContentType is the MIME type (e.g., "text/plain", "image/png").
ContentType string
// Data holds raw bytes for binary content like images.
Data []byte
// Filename is set for attachment parts.
Filename string
// IsAttachment distinguishes attachments from inline content.
IsAttachment bool
}
// IsImage returns true if the part is a supported image type.
func (p *Part) IsImage() bool {
return supportedImageTypes[p.ContentType]
}
// IsText returns true if the part contains text content.
func (p *Part) IsText() bool {
return strings.HasPrefix(p.ContentType, "text/")
}
// Message represents a parsed email with its metadata and content.
type Message struct {
// Attachments are parts with Content-Disposition: attachment.
// Stored separately to allow appending after inline content.
Attachments []Part
Envelope *imap.Envelope
// Parts contains inline content in order of appearance.
Parts []Part
// References are Message-IDs from the References header,
// used for threading.
References []string
UID imap.UID
log *slog.Logger
}
func (msg *Message) TextFrom() string {
var str strings.Builder
if msg.Envelope == nil || len(msg.Envelope.From) == 0 {
return ""
}
str.WriteString("From: ")
for i, a := range msg.Envelope.From {
if a.Name == "" {
fmt.Fprintf(&str, "%s", a.Addr())
} else {
fmt.Fprintf(
&str, "%s <%s>",
a.Name, a.Addr(),
)
}
if i+1 < len(msg.Envelope.From) {
str.WriteString(", ")
}
}
return str.String()
}
// TextBody returns the concatenated text content from all text parts.
// Used for reply composition and quoting.
func (msg *Message) TextBody() string {
var sb strings.Builder
first := true
for _, p := range msg.Parts {
if p.IsText() {
if !first {
sb.WriteString("\n")
}
sb.WriteString(p.Content)
first = false
}
}
return sb.String()
}
// ToOpenAIMessages converts the message content to OpenAI chat message parts.
// Inline parts appear first in order, followed by attachments.
// Text parts become text content, supported images become image_url content.
func (msg *Message) ToOpenAIMessages() []openai.ChatMessagePart {
var parts = []openai.ChatMessagePart{}
if msg != nil && msg.Envelope != nil {
var str strings.Builder
if v := msg.TextFrom(); v != "" {
fmt.Fprintf(&str, "%s\n", v)
}
if !msg.Envelope.Date.IsZero() {
fmt.Fprintf(
&str,
"Date: %s\n",
msg.Envelope.Date.Format(time.RFC3339),
)
}
if msg.Envelope.Subject != "" {
fmt.Fprintf(&str, "Subject: %v\n", msg.Envelope.Subject)
}
if v := str.String(); v != "" {
parts = append(
parts,
openai.ChatMessagePart{
Type: openai.ChatMessagePartTypeText,
Text: v,
},
)
}
}
// Process inline parts first, preserving order.
for _, p := range msg.Parts {
if part, ok := convertPart(p); ok {
parts = append(parts, part)
}
}
// Append attachments at the end.
for _, p := range msg.Attachments {
if part, ok := convertPart(p); ok {
parts = append(parts, part)
}
}
return parts
}
// convertPart converts a Part to an OpenAI ChatMessagePart.
// Returns false if the part type is not supported for LLM input.
func convertPart(p Part) (openai.ChatMessagePart, bool) {
switch {
case p.IsText():
return openai.ChatMessagePart{
Type: openai.ChatMessagePartTypeText,
Text: p.Content,
}, true
case p.IsImage():
dataURI := fmt.Sprintf(
"data:%s;base64,%s",
p.ContentType,
base64.StdEncoding.EncodeToString(p.Data),
)
return openai.ChatMessagePart{
Type: openai.ChatMessagePartTypeImageURL,
ImageURL: &openai.ChatMessageImageURL{
URL: dataURI,
Detail: openai.ImageURLDetailAuto,
},
}, true
default:
return openai.ChatMessagePart{}, false
}
}
// composeAttribution builds the attribution line for quoted replies.
// Returns sender and timestamp in a standard format like:
// "On Mon, 2 Jan 2006 15:04:05 -0700, raven <raven@example.com> wrote:"
func (msg *Message) composeAttribution() string {
if len(msg.Envelope.From) == 0 {
return "> \n"
}
from := msg.Envelope.From[0]
sender := from.Addr()
if from.Name != "" {
sender = fmt.Sprintf("%s <%s>", from.Name, from.Addr())
}
if msg.Envelope.Date.IsZero() {
return fmt.Sprintf("%s wrote:\n", sender)
}
return fmt.Sprintf(
"On %s, %s wrote:\n",
msg.Envelope.Date.Format(RFC5322),
sender,
)
}
// composeBody assembles the reply body: user's response, attribution line,
// and quoted original message.
func (msg *Message) composeBody(res string) string {
var body strings.Builder
body.WriteString(strings.TrimRight(res, "\n\r \t"))
body.WriteString("\n\n")
body.WriteString(msg.composeAttribution())
body.WriteString(msg.QuotedBody())
return body.String()
}
// composeHeader builds RFC 5322-compliant headers for a reply.
// Sets From, To, Subject, Date, Message-ID, In-Reply-To, and References.
func (msg *Message) composeHeader(
date time.Time, from *mail.Address,
) (*mail.Header, error) {
h := &mail.Header{}
h.SetDate(date)
h.SetContentType("text/plain", map[string]string{"charset": "utf-8"})
h.SetAddressList("From", []*mail.Address{from})
h.SetAddressList("Reply-To", []*mail.Address{from})
to := msg.composeRecipients()
if len(to) == 0 {
return nil, errors.New("missing recipients")
}
h.SetAddressList("To", to)
h.SetSubject(msg.composeSubject())
// Use sender's domain for Message-ID per RFC 5322 recommendation.
parts := strings.SplitN(from.Address, "@", 2)
if len(parts) == 2 {
if err := h.GenerateMessageIDWithHostname(parts[1]); err != nil {
return nil, fmt.Errorf("generate message id: %w", err)
}
} else {
if err := h.GenerateMessageID(); err != nil {
return nil, fmt.Errorf("generate message id: %w", err)
}
}
inReplyTo, refs := msg.composeReferences()
if inReplyTo != "" {
h.SetMsgIDList("In-Reply-To", []string{inReplyTo})
}
if len(refs) > 0 {
h.SetMsgIDList("References", refs)
}
return h, nil
}
// composeReferences builds threading headers per RFC 5322 §3.6.4.
// In-Reply-To contains the parent's Message-ID.
// References contains the full thread ancestry.
func (msg *Message) composeReferences() (inReplyTo string, refs []string) {
if msg.Envelope.MessageID != "" {
inReplyTo = msg.Envelope.MessageID
refs = append([]string(nil), msg.References...)
refs = append(refs, msg.Envelope.MessageID)
} else {
refs = append([]string(nil), msg.References...)
}
return
}
// composeRecipients determines the To address for a reply.
// Uses Reply-To if present, otherwise From (per RFC 5322 §3.6.2).
func (msg *Message) composeRecipients() []*mail.Address {
src := msg.Envelope.ReplyTo
if len(src) == 0 {
src = msg.Envelope.From
}
to := make([]*mail.Address, 0, len(src))
for _, v := range src {
to = append(to, &mail.Address{
Name: v.Name,
Address: v.Addr(),
})
}
return to
}
// composeSubject prepends "Re: " if not already present (case-insensitive).
func (msg *Message) composeSubject() string {
s := msg.Envelope.Subject
if !strings.HasPrefix(strings.ToLower(s), "re:") {
s = "Re: " + s
}
return s
}
// ComposeReply creates a reply to this message.
// The reply includes proper threading headers and the original message quoted.
func (msg *Message) ComposeReply(
date time.Time, from *mail.Address, res string,
) (*Reply, error) {
if msg == nil || msg.Envelope == nil {
return nil, errors.New("missing envelope")
}
if from == nil || from.Address == "" {
return nil, errors.New("missing from address")
}
header, err := msg.composeHeader(date, from)
if err != nil {
return nil, fmt.Errorf("compose header: %v", err)
}
return &Reply{body: msg.composeBody(res), header: header}, nil
}
// QuotedBody returns the message text body with each line prefixed by "> ".
func (msg *Message) QuotedBody() string {
var quoted strings.Builder
for line := range strings.SplitSeq(msg.TextBody(), "\n") {
line = strings.TrimSuffix(line, "\r")
quoted.WriteString("> ")
quoted.WriteString(line)
quoted.WriteString("\n")
}
return quoted.String()
}
// New creates a Message from an IMAP fetch buffer.
// Handles multipart messages by collecting text parts and images inline,
// with attachments stored separately for appending later.
// Logs skipped parts and non-fatal errors.
func New(mb *imapclient.FetchMessageBuffer, log *slog.Logger) (*Message, error) {
if mb == nil {
return nil, errors.New("nil message buffer")
}
if mb.UID == 0 {
return nil, errors.New("message has no UID")
}
if mb.Envelope == nil {
return nil, errors.New("message has no envelope")
}
if log == nil {
log = slog.Default()
}
msg := &Message{
UID: mb.UID,
Envelope: mb.Envelope,
log: log,
}
// Try each body section until we successfully parse one.
var parseErr error
for _, section := range mb.BodySection {
if len(section.Bytes) == 0 {
continue
}
if err := parseBody(
msg,
bytes.NewReader(section.Bytes),
log,
); err != nil {
parseErr = err
continue
}
if len(msg.Parts) > 0 {
break
}
}
// Failed to parse any content — return an error.
if len(msg.Parts) == 0 && parseErr != nil {
return msg, fmt.Errorf("parse body: %w", parseErr)
}
return msg, nil
}
// parseBody extracts content from a MIME message body.
// mail.Reader automatically flattens nested multipart structures, returning
// only leaf parts (text/plain, attachments, etc.).
func parseBody(msg *Message, r io.Reader, log *slog.Logger) error {
reader, err := mail.CreateReader(r)
if err != nil {
return fmt.Errorf("create reader: %w", err)
}
defer reader.Close()
// Extract References header for threading.
refs, err := reader.Header.MsgIDList("References")
if err == nil && len(refs) > 0 {
msg.References = refs
}
// Process all parts.
for {
part, err := reader.NextPart()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
return fmt.Errorf("next part: %w", err)
}
if err := processPart(msg, part, log); err != nil {
log.Debug("skipped part",
slog.Any("error", err),
)
continue
}
}
return nil
}
// processPart handles a single MIME part returned by mail.Reader.
// Inline text and images are added to Parts; attachments go to Attachments.
func processPart(msg *Message, part *mail.Part, log *slog.Logger) error {
switch h := part.Header.(type) {
case *mail.InlineHeader:
ct, _, err := h.Header.ContentType()
if err != nil {
ct = "text/plain"
}
body, err := io.ReadAll(io.LimitReader(part.Body, maxPartSize))
if err != nil {
return fmt.Errorf("read body: %w", err)
}
switch {
case strings.HasPrefix(ct, "text/"):
msg.Parts = append(msg.Parts, Part{
Content: string(body),
ContentType: ct,
})
case supportedImageTypes[ct]:
msg.Parts = append(msg.Parts, Part{
ContentType: ct,
Data: body,
})
default:
log.Debug("skipped unsupported inline content type",
slog.String("content_type", ct),
)
}
case *mail.AttachmentHeader:
filename, _ := h.Filename()
ct, _, _ := h.Header.ContentType()
body, err := io.ReadAll(io.LimitReader(part.Body, maxPartSize))
if err != nil {
return fmt.Errorf("read attachment: %w", err)
}
// Only store attachments we can use (text or images).
switch {
case strings.HasPrefix(ct, "text/"):
msg.Attachments = append(msg.Attachments, Part{
Content: string(body),
ContentType: ct,
Filename: filename,
IsAttachment: true,
})
case supportedImageTypes[ct]:
msg.Attachments = append(msg.Attachments, Part{
ContentType: ct,
Data: body,
Filename: filename,
IsAttachment: true,
})
default:
log.Debug("skipped unsupported attachment type",
slog.String("content_type", ct),
slog.String("filename", filename),
)
}
}
return nil
}
// Reply holds a composed reply ready for sending.
type Reply struct {
body string
header *mail.Header
}
// Bytes serializes the reply to RFC 5322 wire format.
func (r *Reply) Bytes() ([]byte, error) {
var buf bytes.Buffer
mw, err := mail.CreateSingleInlineWriter(&buf, *r.header)
if err != nil {
return nil, fmt.Errorf("create writer: %w", err)
}
if _, err := mw.Write([]byte(r.body)); err != nil {
return nil, fmt.Errorf("write body: %w", err)
}
if err := mw.Close(); err != nil {
return nil, fmt.Errorf("close writer: %w", err)
}
return buf.Bytes(), nil
}
// Recipients returns the To addresses as formatted strings.
func (r *Reply) Recipients() ([]string, error) {
addrs, err := r.header.AddressList("To")
if err != nil {
return nil, fmt.Errorf("address list: %w", err)
}
to := make([]string, len(addrs))
for i, v := range addrs {
to[i] = v.Address
}
return to, nil
}