Browse Source

Add parsing of encoded subject and mailer

- Add parsing of base64/quoted printable encoded subject
- Make sasl socket wait forever to avoid EOF issues
- Change font to support cirillic
- Make headers non selectable
- Add content-transfer-encoding header read to parse encoded
  content
Alexey Edelev 5 years ago
parent
commit
eb5a8ffe94
7 changed files with 79 additions and 46 deletions
  1. 1 1
      go.mod
  2. 2 0
      go.sum
  3. 3 0
      sasl/sasl.go
  4. 58 41
      scanner/parser.go
  5. 9 0
      utils/regexp.go
  6. 3 1
      web/css/styles.css
  7. 3 3
      web/templates/details.html

+ 1 - 1
go.mod

@@ -6,7 +6,7 @@ require (
 	github.com/amsokol/protoc-gen-gotag v0.2.1 // indirect
 	github.com/fatih/structtag v1.2.0 // indirect
 	github.com/fsnotify/fsnotify v1.4.7
-	github.com/golang/protobuf v1.3.4
+	github.com/golang/protobuf v1.3.5
 	github.com/google/uuid v1.1.1
 	github.com/gorilla/sessions v1.2.0
 	github.com/gorilla/websocket v1.4.1

+ 2 - 0
go.sum

@@ -42,6 +42,8 @@ github.com/gogs/chardet v0.0.0-20150115103509-2404f7772561 h1:aBzukfDxQlCTVS0NBU
 github.com/gogs/chardet v0.0.0-20150115103509-2404f7772561/go.mod h1:Pcatq5tYkCW2Q6yrR2VRHlbHpZ/R4/7qyL1TCF7vl14=
 github.com/golang/protobuf v1.3.4 h1:87PNWwrRvUSnqS4dlcBU/ftvOIBep4sYuBLlh6rX2wk=
 github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
+github.com/golang/protobuf v1.3.5 h1:F768QJ1E9tib+q5Sc8MkdJi1RxLTbRcTf8LJV56aRls=
+github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=
 github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
 github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ=

+ 3 - 0
sasl/sasl.go

@@ -38,6 +38,7 @@ import (
 	"os"
 	"strconv"
 	"strings"
+	"time"
 
 	"git.semlanik.org/semlanik/gostfix/auth"
 	"github.com/google/uuid"
@@ -104,6 +105,8 @@ func (s *SaslServer) Run() {
 }
 
 func (s *SaslServer) handleRequest(conn net.Conn) {
+	conn.SetReadDeadline(time.Time{})
+
 	connectionReader := bufio.NewReader(conn)
 	continueState := ContinueStateNone
 	for {

+ 58 - 41
scanner/parser.go

@@ -28,13 +28,14 @@ package scanner
 import (
 	"bufio"
 	"bytes"
+	"encoding/base64"
 	"encoding/hex"
-	"errors"
 	"fmt"
+	"io/ioutil"
 	"log"
+	"mime/quotedprintable"
 	"os"
 	"strings"
-	"time"
 
 	"net/mail"
 
@@ -59,13 +60,14 @@ const (
 )
 
 type parseData struct {
-	state            int
-	mandatoryHeaders int
-	previousHeader   *string
-	email            *common.Mail
-	bodyContentType  string
-	bodyData         string
-	activeBoundary   string
+	state                   int
+	mandatoryHeaders        int
+	previousHeader          *string
+	email                   *common.Mail
+	contentTransferEncoding string
+	bodyContentType         string
+	bodyData                string
+	activeBoundary          string
 }
 
 func (pd *parseData) reset() {
@@ -91,8 +93,6 @@ func parseFile(file *utils.LockedFile) []*common.Mail {
 
 	scanner := bufio.NewScanner(file)
 	for scanner.Scan() {
-		log.Println("Scan next line")
-
 		currentText := scanner.Text()
 		if utils.RegExpUtilsInstance().MailIndicator.MatchString(currentText) {
 			if pd.mandatoryHeaders == AllHeaderMask {
@@ -116,30 +116,24 @@ func parseFile(file *utils.LockedFile) []*common.Mail {
 						pd.activeBoundary = ""
 					}
 					pd.state = StateBodyScan
+					//Header postprocessing
+					address, err := mail.ParseAddress(pd.email.Header.From)
+					if err == nil {
+						pd.email.Header.From = address.Name + "<" + address.Address + ">"
+					} else {
+						fmt.Printf("Unable to parse from email: %s", err)
+					}
 				}
 			} else {
 				pd.parseHeader(currentText)
 			}
 		case StateBodyScan:
-			// if currentText == "" {
-			// 	if pd.state == StateBodyScan && pd.activeBoundary == "" {
-			// 		if pd.mandatoryHeaders == AllHeaderMask {
-			// 			pd.parseBody()
-			// 			emails = append(emails, pd.email)
-			// 		}
-			// 		pd.reset()
-			// 		continue
-			// 	}
-			// }
-
-			// if pd.activeBoundary != "" {
 			pd.bodyData += currentText + "\n"
 			capture := utils.RegExpUtilsInstance().BoundaryEndFinder.FindStringSubmatch(currentText)
 			if len(capture) == 2 && pd.activeBoundary == capture[1] {
 				pd.state = StateBodyScan
 				pd.activeBoundary = ""
 			}
-			// }
 		}
 	}
 
@@ -155,6 +149,7 @@ func parseFile(file *utils.LockedFile) []*common.Mail {
 
 func (pd *parseData) parseHeader(headerRaw string) {
 	capture := utils.RegExpUtilsInstance().HeaderFinder.FindStringSubmatch(headerRaw)
+	encoded := false
 	//Parse header
 	if len(capture) == 3 {
 		// fmt.Printf("capture Header %s : %s\n", strings.ToLower(capture[0]), strings.ToLower(capture[1]))
@@ -178,17 +173,20 @@ func (pd *parseData) parseHeader(headerRaw string) {
 			pd.previousHeader = &pd.email.Header.Bcc
 			pd.mandatoryHeaders |= ToHeaderMask
 		case "subject":
+			encoded = true
 			pd.previousHeader = &pd.email.Header.Subject
 		case "date":
 			pd.previousHeader = nil
 
-			unixTime, err := mail.ParseDate(strings.Trim(capture[2], " \t")) //parseDate(strings.Trim(capture[2], " \t"))
+			unixTime, err := mail.ParseDate(strings.Trim(capture[2], " \t"))
 			if err == nil {
 				pd.email.Header.Date = unixTime.Unix()
 				pd.mandatoryHeaders |= DateHeaderMask
 			} else {
 				log.Printf("Unable to parse message: %s\n", err)
 			}
+		case "content-transfer-encoding":
+			pd.previousHeader = &pd.contentTransferEncoding
 		case "content-type":
 			pd.previousHeader = &pd.bodyContentType
 		default:
@@ -197,6 +195,9 @@ func (pd *parseData) parseHeader(headerRaw string) {
 
 		if pd.previousHeader != nil {
 			*pd.previousHeader = strings.Trim(capture[2], " \t")
+			if encoded {
+				*pd.previousHeader = decodeEncoded(*pd.previousHeader)
+			}
 		}
 		return
 	}
@@ -204,12 +205,12 @@ func (pd *parseData) parseHeader(headerRaw string) {
 	//Parse folding
 	capture = utils.RegExpUtilsInstance().FoldingFinder.FindStringSubmatch(headerRaw)
 	if len(capture) == 2 && pd.previousHeader != nil {
-		*pd.previousHeader += capture[1]
+		*pd.previousHeader += decodeEncoded(strings.Trim(capture[1], " \t"))
 	}
 }
 
 func (pd *parseData) parseBody() {
-	buffer := bytes.NewBufferString("content-type:" + pd.bodyContentType + "\n\n" + pd.bodyData)
+	buffer := bytes.NewBufferString("content-transfer-encoding: " + pd.contentTransferEncoding + "\ncontent-type: " + pd.bodyContentType + "\n\n" + pd.bodyData)
 	en, err := enmime.ReadEnvelope(buffer)
 	if err != nil {
 		log.Printf("Unable to read mail body %s\n\nBody content: %s\n\n", err, pd.bodyData)
@@ -239,21 +240,37 @@ func (pd *parseData) parseBody() {
 	}
 }
 
-func parseDate(stringDate string) (int64, error) {
-	formatsToTest := []string{
-		"Mon, _2 Jan 2006 15:04:05 -0700",
-		time.RFC1123Z,
-		time.RFC1123,
-		time.UnixDate,
-		"Mon,  _2 Jan 2006 15:04:05 -0700 (MST)",
-		"Mon, _2 Jan 2006 15:04:05 -0700 (MST)"}
-	var err error
-	for _, format := range formatsToTest {
-		dateTime, err := time.Parse(format, stringDate)
-		if err == nil {
-			return dateTime.Unix(), nil
+func decodeEncoded(dataEncoded string) string {
+	dataParts := utils.RegExpUtilsInstance().EncodedStringFinder.FindAllString(dataEncoded, -1)
+	if len(dataParts) <= 0 {
+		return dataEncoded
+	}
+
+	var decodedBuffer []byte
+	for _, headerPart := range dataParts {
+		headerPart = headerPart[2 : len(headerPart)-2]
+		headerPartParts := strings.Split(headerPart, "?")
+		if len(headerPartParts) == 3 {
+			switch strings.ToLower(headerPartParts[1]) {
+			case "b":
+				fmt.Printf("Decode base64: %s\n", headerPartParts[2])
+				decodedBase64, err := base64.StdEncoding.DecodeString(headerPartParts[2])
+				if err == nil {
+					decodedBuffer = append(decodedBuffer, decodedBase64...)
+				}
+			case "q":
+				decodedQuotedPrintable, err := ioutil.ReadAll(quotedprintable.NewReader(strings.NewReader(headerPartParts[2])))
+				if err == nil {
+					decodedBuffer = append(decodedBuffer, decodedQuotedPrintable...)
+				}
+			default:
+			}
 		}
 	}
 
-	return 0, errors.New("Invalid date format " + stringDate + " , " + err.Error())
+	if len(decodedBuffer) > 0 {
+		//TODO: check encoding here
+		return string(decodedBuffer)
+	}
+	return dataEncoded
 }

+ 9 - 0
utils/regexp.go

@@ -40,6 +40,7 @@ const (
 	BoundaryRegExp      = "boundary=\"(.*)\""
 	MailboxRegExp       = "^/m(\\d+)/?(.*)"
 	FullNameRegExp      = "^[\\w]+[\\w ]*$"
+	EncodedStringRegExp = "=\\?.+\\?="
 )
 
 const (
@@ -74,6 +75,7 @@ type regExpUtils struct {
 	BoundaryFinder      *regexp.Regexp
 	MailboxFinder       *regexp.Regexp
 	FullNameChecker     *regexp.Regexp
+	EncodedStringFinder *regexp.Regexp
 }
 
 func newRegExpUtils() (*regExpUtils, error) {
@@ -137,6 +139,12 @@ func newRegExpUtils() (*regExpUtils, error) {
 		return nil, err
 	}
 
+	encodedString, err := regexp.Compile(EncodedStringRegExp)
+	if err != nil {
+		log.Fatalf("Invalid regexp %s\n", err)
+		return nil, err
+	}
+
 	ru := &regExpUtils{
 		MailIndicator:       mailIndicator,
 		EmailChecker:        emailChecker,
@@ -148,6 +156,7 @@ func newRegExpUtils() (*regExpUtils, error) {
 		DomainChecker:       domainChecker,
 		MailboxFinder:       mailboxFinder,
 		FullNameChecker:     fullNameChecker,
+		EncodedStringFinder: encodedString,
 	}
 
 	return ru, nil

+ 3 - 1
web/css/styles.css

@@ -1,5 +1,7 @@
+@import url('https://fonts.googleapis.com/css?family=Nunito&display=swap');
+
 * {
-    font-family: 'Titillium Web';
+    font-family: 'Nunito';
 }
 
 :root {

+ 3 - 3
web/templates/details.html

@@ -2,9 +2,9 @@
     <div class="horizontalPaddingBox" style="flex-grow: 0!important;">
         <div style="width: 100%; display: flex; flex-direction: row;">
             <div class="elidedText" style="display: block; flex: 1 1 auto;">
-                <span class="primaryText">From: {{.From}}</span></br>
-                <span class="secondaryText">To: {{.To}}</span></br>
-                <span class="primaryText">Subject: {{.Subject}}</span></br>
+                <span class="primaryText"><span class="noselect">From: </span>{{.From}}</span></br>
+                <span class="secondaryText"><span class="noselect">To: </span>{{.To}}</span></br>
+                <span class="primaryText"><span class="noselect">Subject: </span>{{.Subject}}</span></br>
             </div>
             <img id="readIcon{{.MailId}}" class="iconBtn" style="width: 20px; margin-right: 10px;" onclick="toggleRead('{{.MailId}}', 'readIcon');" src="/assets/read.svg"/>
             <img id="restoreIcon" class="iconBtn" style="display:{{if .Trash}}block{{else}}none{{end}}; width: 20px; margin-right: 10px;" onclick="restoreMail({{.MailId}}, closeDetails);" src="/assets/restore.svg"/>