parser.go 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. /*
  2. * MIT License
  3. *
  4. * Copyright (c) 2020 Alexey Edelev <semlanik@gmail.com>
  5. *
  6. * This file is part of gostfix project https://git.semlanik.org/semlanik/gostfix
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy of this
  9. * software and associated documentation files (the "Software"), to deal in the Software
  10. * without restriction, including without limitation the rights to use, copy, modify,
  11. * merge, publish, distribute, sublicense, and/or sell copies of the Software, and
  12. * to permit persons to whom the Software is furnished to do so, subject to the following
  13. * conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in all copies
  16. * or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
  19. * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  20. * PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
  21. * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  22. * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  23. * DEALINGS IN THE SOFTWARE.
  24. */
  25. package scanner
  26. import (
  27. "bufio"
  28. "bytes"
  29. "encoding/hex"
  30. "log"
  31. "os"
  32. "strings"
  33. "time"
  34. "git.semlanik.org/semlanik/gostfix/common"
  35. "git.semlanik.org/semlanik/gostfix/config"
  36. utils "git.semlanik.org/semlanik/gostfix/utils"
  37. "github.com/google/uuid"
  38. enmime "github.com/jhillyerd/enmime"
  39. )
  40. const (
  41. StateHeaderScan = iota
  42. StateBodyScan
  43. )
  44. const (
  45. AtLeastOneHeaderMask = 1 << iota
  46. FromHeaderMask
  47. DateHeaderMask
  48. ToHeaderMask
  49. AllHeaderMask = 15
  50. )
  51. type parseData struct {
  52. state int
  53. mandatoryHeaders int
  54. previousHeader *string
  55. email *common.Mail
  56. bodyContentType string
  57. bodyData string
  58. activeBoundary string
  59. }
  60. func (pd *parseData) reset() {
  61. *pd = parseData{
  62. state: StateHeaderScan,
  63. previousHeader: nil,
  64. mandatoryHeaders: 0,
  65. email: NewEmail(),
  66. bodyContentType: "plain/text",
  67. bodyData: "",
  68. activeBoundary: "",
  69. }
  70. }
  71. func parseFile(file *utils.LockedFile) []*common.Mail {
  72. var emails []*common.Mail
  73. pd := &parseData{}
  74. pd.reset()
  75. scanner := bufio.NewScanner(file)
  76. for scanner.Scan() {
  77. switch pd.state {
  78. case StateHeaderScan:
  79. if scanner.Text() == "" {
  80. if pd.mandatoryHeaders&AtLeastOneHeaderMask == AtLeastOneHeaderMask { //Cause we read at least one header
  81. pd.previousHeader = nil
  82. boundaryCapture := utils.RegExpUtilsInstance().BoundaryFinder.FindStringSubmatch(pd.bodyContentType)
  83. if len(boundaryCapture) == 2 {
  84. pd.activeBoundary = boundaryCapture[1]
  85. } else {
  86. pd.activeBoundary = ""
  87. }
  88. pd.state = StateBodyScan
  89. }
  90. } else {
  91. pd.parseHeader(scanner.Text())
  92. }
  93. case StateBodyScan:
  94. if scanner.Text() == "" {
  95. if pd.state == StateBodyScan && pd.activeBoundary == "" {
  96. if pd.mandatoryHeaders == AllHeaderMask {
  97. pd.parseBody()
  98. emails = append(emails, pd.email)
  99. }
  100. pd.reset()
  101. continue
  102. }
  103. }
  104. if pd.activeBoundary != "" {
  105. pd.bodyData += scanner.Text() + "\n"
  106. capture := utils.RegExpUtilsInstance().BoundaryEndFinder.FindStringSubmatch(scanner.Text())
  107. if len(capture) == 2 && pd.activeBoundary == capture[1] {
  108. pd.state = StateBodyScan
  109. pd.activeBoundary = ""
  110. }
  111. }
  112. }
  113. }
  114. if pd.state == StateBodyScan {
  115. if pd.mandatoryHeaders == AllHeaderMask {
  116. pd.parseBody()
  117. emails = append(emails, pd.email)
  118. }
  119. pd.reset()
  120. }
  121. return emails
  122. }
  123. func (pd *parseData) parseHeader(headerRaw string) {
  124. capture := utils.RegExpUtilsInstance().HeaderFinder.FindStringSubmatch(headerRaw)
  125. //Parse header
  126. if len(capture) == 3 {
  127. // fmt.Printf("capture Header %s : %s\n", strings.ToLower(capture[0]), strings.ToLower(capture[1]))
  128. header := strings.ToLower(capture[1])
  129. pd.mandatoryHeaders |= AtLeastOneHeaderMask
  130. switch header {
  131. case "from":
  132. pd.previousHeader = &pd.email.Header.From
  133. pd.mandatoryHeaders |= FromHeaderMask
  134. case "to":
  135. pd.previousHeader = &pd.email.Header.To
  136. pd.mandatoryHeaders |= ToHeaderMask
  137. case "cc":
  138. pd.previousHeader = &pd.email.Header.Cc
  139. case "bcc":
  140. pd.previousHeader = &pd.email.Header.Bcc
  141. pd.mandatoryHeaders |= ToHeaderMask
  142. case "subject":
  143. pd.previousHeader = &pd.email.Header.Subject
  144. case "date":
  145. pd.previousHeader = nil
  146. time, err := time.Parse(time.RFC1123Z, strings.Trim(capture[2], " \t"))
  147. if err == nil {
  148. pd.email.Header.Date = time.Unix()
  149. pd.mandatoryHeaders |= DateHeaderMask
  150. } else {
  151. log.Printf("Invalid date format %s, %s", strings.Trim(capture[2], " \t"), err)
  152. }
  153. case "content-type":
  154. pd.previousHeader = &pd.bodyContentType
  155. default:
  156. pd.previousHeader = nil
  157. }
  158. if pd.previousHeader != nil {
  159. *pd.previousHeader = strings.Trim(capture[2], " \t")
  160. }
  161. return
  162. }
  163. //Parse folding
  164. capture = utils.RegExpUtilsInstance().FoldingFinder.FindStringSubmatch(headerRaw)
  165. if len(capture) == 2 && pd.previousHeader != nil {
  166. *pd.previousHeader += capture[1]
  167. }
  168. }
  169. func (pd *parseData) parseBody() {
  170. buffer := bytes.NewBufferString("content-type:" + pd.bodyContentType + "\n\n" + pd.bodyData)
  171. en, err := enmime.ReadEnvelope(buffer)
  172. if err != nil {
  173. log.Printf("Unable to read mail body %s\n\nBody content: %s\n\n", err, pd.bodyData)
  174. return
  175. }
  176. pd.email.Body = &common.MailBody{}
  177. pd.email.Body.PlainText = en.Text
  178. pd.email.Body.RichText = en.HTML
  179. for _, attachment := range en.Attachments {
  180. uuid := uuid.New()
  181. fileName := hex.EncodeToString(uuid[:])
  182. attachmentFile, err := os.Create(config.ConfigInstance().AttachmentsPath + "/" + fileName)
  183. log.Printf("Attachment found %s\n", fileName)
  184. if err != nil {
  185. log.Printf("Unable to save attachment %s %s\n", fileName, err)
  186. continue
  187. }
  188. pd.email.Body.Attachments = append(pd.email.Body.Attachments, &common.AttachmentHeader{
  189. Id: fileName,
  190. FileName: attachment.FileName,
  191. ContentType: attachment.ContentType,
  192. })
  193. attachmentFile.Write(attachment.Content)
  194. }
  195. }