From 84a56429ed8588523a5251f02b5b66ab30fabfdb Mon Sep 17 00:00:00 2001
From: Sebb <sebb@apache.org>
Date: Wed, 22 Feb 2023 15:30:46 +0000
Subject: [PATCH 1/4] Initial stab at improving folding

---
 .github/workflows/test.yml                  | 16 ++++----
 lib/mail/fields/unstructured_field.rb       | 18 +++++----
 spec/mail/encodings_spec.rb                 | 32 +++++++++++----
 spec/mail/fields/unstructured_field_spec.rb | 43 +++++++++++++++++++--
 4 files changed, 83 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 237c9c150..e71bd4395 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -32,14 +32,14 @@ jobs:
            timeout: 50
          - ruby: truffleruby-head
            timeout: 50
-         - ruby: jruby
-           timeout: 5
-         - ruby: jruby-head
-           timeout: 5
-         - ruby: jruby-9.4
-           timeout: 5
-         - ruby: jruby-9.3
-           timeout: 5
+        #  - ruby: jruby
+        #    timeout: 5
+        #  - ruby: jruby-head
+        #    timeout: 5
+        #  - ruby: jruby-9.4
+        #    timeout: 5
+        #  - ruby: jruby-9.3
+        #    timeout: 5
          - ruby: jruby-9.2
            timeout: 5
     steps:
diff --git a/lib/mail/fields/unstructured_field.rb b/lib/mail/fields/unstructured_field.rb
index 53e27c2c9..bef3a9d34 100644
--- a/lib/mail/fields/unstructured_field.rb
+++ b/lib/mail/fields/unstructured_field.rb
@@ -99,31 +99,35 @@ def wrap_lines(name, folded_lines)
     end
 
     def fold(prepend = 0) # :nodoc:
+      #  prepend is the length to allow for the header prefix on the first line (e.g. 'Subject: ')
       encoding       = normalized_encoding
+      encoding_overhead = "=?#{encoding}?Q??=".length
+      # The encoded string goes here      ^ (between the ??)
+      max_safe_word = 78 - encoding_overhead - 10 # allow for encoding overhead + prefix
       decoded_string = decoded.to_s
-      should_encode  = !decoded_string.ascii_only?
+      words = decoded_string.split(/[ \t]/)
+      should_encode  = !decoded_string.ascii_only? || words.any? {|word| word.length > max_safe_word}
       if should_encode
+        max_safe_re = Regexp.new(".{#{max_safe_word}}|.+$")
         first = true
-        words = decoded_string.split(/[ \t]/).map do |word|
+        words = words.map do |word|
           if first
             first = !first
           else
             word = " #{word}"
           end
-          if !word.ascii_only?
-            word
+          if word.ascii_only?
+            word.scan(max_safe_re)
           else
             word.scan(/.{7}|.+$/)
           end
         end.flatten
-      else
-        words = decoded_string.split(/[ \t]/)
       end
 
       folded_lines   = []
       while !words.empty?
         limit = 78 - prepend
-        limit = limit - 7 - encoding.length if should_encode
+        limit = limit - encoding_overhead if should_encode
         line = String.new
         first_word = true
         while !words.empty?
diff --git a/spec/mail/encodings_spec.rb b/spec/mail/encodings_spec.rb
index 7428b9383..4c74d6e17 100644
--- a/spec/mail/encodings_spec.rb
+++ b/spec/mail/encodings_spec.rb
@@ -283,18 +283,20 @@
       expect(Mail::Encodings.value_decode(string)).to eq result
     end
 
-    it "should not fold a long string that has no spaces" do
+    it "should fold a long non-ascii string that has no spaces" do
       original = "ВосстановлениеВосстановлениеВашегопароля"
       if original.respond_to?(:force_encoding)
         original = original.dup.force_encoding('UTF-8')
-        result = "Subject: =?UTF-8?Q?=D0=92=D0=BE=D1=81=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=D0=92=D0=BE=D1=81=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=D0=92=D0=B0=D1=88=D0=B5=D0=B3=D0=BE=D0=BF=D0=B0=D1=80=D0=BE=D0=BB=D1=8F?=\r\n"
-      else
-        result = "Subject: =?UTF-8?Q?=D0=92=D0=BE=D1=81=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=D0=92=D0=BE=D1=81=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=D0=92=D0=B0=D1=88=D0=B5=D0=B3=D0=BE=D0=BF=D0=B0=D1=80=D0=BE=D0=BB=D1=8F?=\r\n"
       end
+      result = "Subject: =?UTF-8?Q?=D0=92=D0=BE=D1=81=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=D0=92=D0=BE=D1=81=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=D0=92=D0=B0=D1=88=D0=B5=D0=B3=D0=BE=D0=BF=D0=B0=D1=80=D0=BE=D0=BB=D1=8F?=\r\n"
       mail = Mail.new
       mail.subject = original
       expect(mail[:subject].decoded).to eq original
-      expect(mail[:subject].encoded).to eq result
+      encoded = mail[:subject].encoded
+      # check lines are not too long
+      encoded.split("\r\n").each { |line| expect(line.length).to be <= 78 }
+      # check expected contents, ignoring folding
+      expect(encoded.gsub("?=\r\n =?UTF-8?Q?", '')).to eq result
     end
 
     it "should round trip a complex string properly" do
@@ -306,13 +308,27 @@
       mail = Mail.new
       mail.subject = original
       expect(mail[:subject].decoded).to eq original
-      expect(mail[:subject].encoded).to eq result
+      encoded = mail[:subject].encoded
+      # check lines are not too long
+      encoded.split("\r\n").each { |line| expect(line.length).to be <= 78 }
+      # check expected contents, ignoring folding
+      expect(encoded.gsub("?=\r\n =?UTF-8?Q?", '')).to eq result.gsub("?=\r\n =?UTF-8?Q?", '')
+
       mail = Mail.new(mail.encoded)
       expect(mail[:subject].decoded).to eq original
-      expect(mail[:subject].encoded).to eq result
+      encoded = mail[:subject].encoded
+      # check lines are not too long
+      encoded.split("\r\n").each { |line| expect(line.length).to be <= 78 }
+      # check expected contents, ignoring folding
+      expect(encoded.gsub("?=\r\n =?UTF-8?Q?", '')).to eq result.gsub("?=\r\n =?UTF-8?Q?", '')
+
       mail = Mail.new(mail.encoded)
       expect(mail[:subject].decoded).to eq original
-      expect(mail[:subject].encoded).to eq result
+      encoded = mail[:subject].encoded
+      # check lines are not too long
+      encoded.split("\r\n").each { |line| expect(line.length).to be <= 78 }
+      # check expected contents, ignoring folding
+      expect(encoded.gsub("?=\r\n =?UTF-8?Q?", '')).to eq result.gsub("?=\r\n =?UTF-8?Q?", '')
     end
 
     it "should round trip another complex string (koi-8)" do
diff --git a/spec/mail/fields/unstructured_field_spec.rb b/spec/mail/fields/unstructured_field_spec.rb
index af3407dad..a4434d377 100644
--- a/spec/mail/fields/unstructured_field_spec.rb
+++ b/spec/mail/fields/unstructured_field_spec.rb
@@ -149,8 +149,8 @@
       @field = Mail::UnstructuredField.new("Subject", string)
       string = string.dup.force_encoding('UTF-8')
       result = "Subject: =?UTF-8?Q?This_is_=E3=81=82_really_long_string_This_is_=E3=81=82?=\r\n\s=?UTF-8?Q?_really_long_string_This_is_=E3=81=82_really_long_string_This_is?=\r\n\s=?UTF-8?Q?_=E3=81=82_really_long_string_This_is_=E3=81=82_really_long?=\r\n\s=?UTF-8?Q?_string?=\r\n"
-      expect(@field.encoded).to eq result
       expect(@field.decoded).to eq string
+      expect(@field.encoded).to eq result
     end
 
     it "should fold properly with my actual complicated header" do
@@ -158,16 +158,53 @@
       @field = Mail::UnstructuredField.new("X-SMTPAPI", string)
       string = string.dup.force_encoding('UTF-8')
       result = "X-SMTPAPI: =?UTF-8?Q?{=22unique=5Fargs=22:_{=22mailing=5Fid=22:147,=22a?=\r\n =?UTF-8?Q?ccount=5Fid=22:2},_=22to=22:_[=22larspind@gmail.com=22],_=22categ?=\r\n =?UTF-8?Q?ory=22:_=22mailing=22,_=22filters=22:_{=22domainkeys=22:_{=22sett?=\r\n =?UTF-8?Q?ings=22:_{=22domain=22:1,=22enable=22:1}}},_=22sub=22:_{=22{{op?=\r\n =?UTF-8?Q?en=5Fimage=5Furl}}=22:_[=22http://betaling.larspind.local/O?=\r\n =?UTF-8?Q?/token/147/Mailing::FakeRecipient=22],_=22{{name}}=22:_[=22[FIRST?=\r\n =?UTF-8?Q?_NAME]=22],_=22{{signup=5Freminder}}=22:_[=22=28her_kommer_til_at?=\r\n =?UTF-8?Q?_st=C3=A5_hvorn=C3=A5r_folk_har_skrevet_sig_op_...=29=22],?=\r\n =?UTF-8?Q?_=22{{unsubscribe=5Furl}}=22:_[=22http://betaling.larspind.?=\r\n =?UTF-8?Q?local/U/token/147/Mailing::FakeRecipient=22],_=22{{email}}=22:?=\r\n =?UTF-8?Q?_[=22larspind@gmail.com=22],_=22{{link:308}}=22:_[=22http://beta?=\r\n =?UTF-8?Q?ling.larspind.local/L/308/0/Mailing::FakeRecipient=22],_=22{{con?=\r\n =?UTF-8?Q?firm=5Furl}}=22:_[=22=22],_=22{{ref}}=22:_[=22[REF]=22]}}?=\r\n"
-      expect(@field.encoded).to eq result
       expect(@field.decoded).to eq string
+      encoded = @field.encoded
+      # check lines are not too long
+      encoded.split("\r\n").each { |line| expect(line.length).to be <= 78 }
+      # check expected contents, ignoring folding
+      expect(encoded.gsub("?=\r\n =?UTF-8?Q?", '')).to eq result.gsub("?=\r\n =?UTF-8?Q?", '')
     end
 
     it "should fold properly with continuous spaces around the linebreak" do
-      @field = Mail::UnstructuredField.new("Subject", "This is a header that has continuous spaces around line break point,     which should be folded properly")
+      subject = "This is a header that has continuous spaces around line break point,     which should be folded properly"
+      @field = Mail::UnstructuredField.new("Subject", subject)
       result = "Subject: This is a header that has continuous spaces around line break point,\s\r\n\s\s\s\swhich should be folded properly\r\n"
+      expect(@field.decoded).to eq subject
       expect(@field.encoded).to eq result
     end
 
+    it "should fold an ASCII-only subject with more than 78 characters and no white space" do
+      value = "12345678901234567890123456789012345678901234567890123456789012345678901234567890"
+      expect(value.length).to be > 78 - "Subject: ".length
+      @field = Mail::UnstructuredField.new("Subject", value)
+      lines = @field.encoded.split("\r\n")
+      lines.each { |line| expect(line.length).to be <= 78 }
+    end
+
+    it "should fold an ASCII-only subject with more than 998 characters and no white space" do
+      value = "ThisIsASubjectHeaderMessageThatIsGoingToBeMoreThan998CharactersLong." * 20
+      @field = Mail::UnstructuredField.new("Subject", value)
+      lines = @field.encoded.split("\r\n")
+      lines.each { |line| expect(line.length).to be <= 78 }
+    end
+
+    # TODO: tweak unstructured_field to always generate lines of at most 78 chars
+    it "should fold a Japanese subject with more than 998 characters long and no white space" do
+      value = "これは非常に長い日本語のSubjectです。空白はありません。" * 1
+      @field = Mail::UnstructuredField.new("Subject", value)
+      lines = @field.encoded.split("\r\n")
+      lines.each { |line| expect(line.length).to be <= 90 }
+    end
+
+    # TODO: tweak unstructured_field to always generate lines of at most 78 chars
+    it "should fold full of emoji subject that is going to be more than 998 bytes unfolded" do
+      value = "😄" * 90
+      @field = Mail::UnstructuredField.new("Subject", value)
+      lines = @field.encoded.split("\r\n")
+      lines.each { |line| expect(line.length).to be < 110 }
+    end
+
   end
 
   describe "encoding non QP safe chars" do

From e000cb2d0770793920e276a0826b0948665d85b6 Mon Sep 17 00:00:00 2001
From: Sebb <sebb@apache.org>
Date: Wed, 22 Feb 2023 15:59:28 +0000
Subject: [PATCH 2/4] Add some comment

[skip CI]
---
 lib/mail/fields/unstructured_field.rb | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/lib/mail/fields/unstructured_field.rb b/lib/mail/fields/unstructured_field.rb
index bef3a9d34..9095c0dcb 100644
--- a/lib/mail/fields/unstructured_field.rb
+++ b/lib/mail/fields/unstructured_field.rb
@@ -98,6 +98,14 @@ def wrap_lines(name, folded_lines)
       result.join("\r\n\s")
     end
 
+    # folding strategy:
+    # The line is split into words separated by space or tab.
+    # If the line contains any non-ascii characters, or any words are expected to be too long to fit,
+    # then they are additionally split, and the line is marked for encoding
+    # 
+    # The list of individual words is then used to fill up the output lines without overflowing.
+    # This is not always guaranteed to work, because there is a wide variation in the number of
+    # characters that are needed to encode a given character.
     def fold(prepend = 0) # :nodoc:
       #  prepend is the length to allow for the header prefix on the first line (e.g. 'Subject: ')
       encoding       = normalized_encoding
@@ -144,9 +152,6 @@ def fold(prepend = 0) # :nodoc:
           word = encode_crlf(word)
           # Skip to next line if we're going to go past the limit
           # Unless this is the first word, in which case we're going to add it anyway
-          # Note: This means that a word that's longer than 998 characters is going to break the spec. Please fix if this is a problem for you.
-          # (The fix, it seems, would be to use encoded-word encoding on it, because that way you can break it across multiple lines and
-          # the linebreak will be ignored)
           break if !line.empty? && (line.length + word.length + 1 > limit)
           # Remove the word from the queue ...
           words.shift
@@ -160,7 +165,7 @@ def fold(prepend = 0) # :nodoc:
           # ... add it in encoded form to the current line
           line << word
         end
-        # Encode the line if necessary
+        # Mark the line as encoded if necessary
         line = "=?#{encoding}?Q?#{line}?=" if should_encode
         # Add the line to the output and reset the prepend
         folded_lines << line

From 382590a6da2eca75c68e35fc07667d205b549629 Mon Sep 17 00:00:00 2001
From: Sebb <sebb@apache.org>
Date: Wed, 22 Feb 2023 22:31:52 +0000
Subject: [PATCH 3/4] Re-work overlong lines

---
 lib/mail/fields/unstructured_field.rb       | 25 ++++++++++++++++-----
 spec/mail/fields/unstructured_field_spec.rb | 16 +++++++++----
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/lib/mail/fields/unstructured_field.rb b/lib/mail/fields/unstructured_field.rb
index 9095c0dcb..537d4d637 100644
--- a/lib/mail/fields/unstructured_field.rb
+++ b/lib/mail/fields/unstructured_field.rb
@@ -105,7 +105,8 @@ def wrap_lines(name, folded_lines)
     # 
     # The list of individual words is then used to fill up the output lines without overflowing.
     # This is not always guaranteed to work, because there is a wide variation in the number of
-    # characters that are needed to encode a given character.
+    # characters that are needed to encode a given character. If the resulting line would be too
+    # long, divide the original word into two chunks and add the pieces separately.
     def fold(prepend = 0) # :nodoc:
       #  prepend is the length to allow for the header prefix on the first line (e.g. 'Subject: ')
       encoding       = normalized_encoding
@@ -115,6 +116,7 @@ def fold(prepend = 0) # :nodoc:
       decoded_string = decoded.to_s
       words = decoded_string.split(/[ \t]/)
       should_encode  = !decoded_string.ascii_only? || words.any? {|word| word.length > max_safe_word}
+      encoding_overhead = 0 unless should_encode
       if should_encode
         max_safe_re = Regexp.new(".{#{max_safe_word}}|.+$")
         first = true
@@ -134,13 +136,14 @@ def fold(prepend = 0) # :nodoc:
 
       folded_lines   = []
       while !words.empty?
-        limit = 78 - prepend
-        limit = limit - encoding_overhead if should_encode
+        limit = 78 - prepend - encoding_overhead
         line = String.new
         first_word = true
         while !words.empty?
           break unless word = words.first.dup
 
+          original_word = word # in case we need to try again
+
           # Convert on 1.9+ only since we aren't sure of the current
           # charset encoding on 1.8. We'd need to track internal/external
           # charset on each field.
@@ -152,17 +155,29 @@ def fold(prepend = 0) # :nodoc:
           word = encode_crlf(word)
           # Skip to next line if we're going to go past the limit
           # Unless this is the first word, in which case we're going to add it anyway
-          break if !line.empty? && (line.length + word.length + 1 > limit)
+          break if !line.empty? && (line.length + word.length >= limit)
           # Remove the word from the queue ...
           words.shift
           # Add word separator
           if first_word
             first_word = false
           else
-            line << " " if !should_encode
+            line << " " unless should_encode
           end
 
           # ... add it in encoded form to the current line
+          #  but first check if we have overflowed
+          # should only happen for the first word on a line
+          if should_encode && (line.length + word.length > limit)
+            word, remain = original_word.scan(/.{3}|.+$/) # roughly half the original split
+            words.unshift remain # put the unused bit back
+            # re-encode shorter word
+            if charset && word.respond_to?(:encoding)
+              word = Encodings.transcode_charset(word, word.encoding, charset)
+            end
+            word = encode(word)
+            word = encode_crlf(word)  
+          end
           line << word
         end
         # Mark the line as encoded if necessary
diff --git a/spec/mail/fields/unstructured_field_spec.rb b/spec/mail/fields/unstructured_field_spec.rb
index a4434d377..1fddab608 100644
--- a/spec/mail/fields/unstructured_field_spec.rb
+++ b/spec/mail/fields/unstructured_field_spec.rb
@@ -178,6 +178,7 @@
       value = "12345678901234567890123456789012345678901234567890123456789012345678901234567890"
       expect(value.length).to be > 78 - "Subject: ".length
       @field = Mail::UnstructuredField.new("Subject", value)
+      expect(@field.decoded).to eq value
       lines = @field.encoded.split("\r\n")
       lines.each { |line| expect(line.length).to be <= 78 }
     end
@@ -185,6 +186,7 @@
     it "should fold an ASCII-only subject with more than 998 characters and no white space" do
       value = "ThisIsASubjectHeaderMessageThatIsGoingToBeMoreThan998CharactersLong." * 20
       @field = Mail::UnstructuredField.new("Subject", value)
+      expect(@field.decoded).to eq value
       lines = @field.encoded.split("\r\n")
       lines.each { |line| expect(line.length).to be <= 78 }
     end
@@ -193,16 +195,18 @@
     it "should fold a Japanese subject with more than 998 characters long and no white space" do
       value = "これは非常に長い日本語のSubjectです。空白はありません。" * 1
       @field = Mail::UnstructuredField.new("Subject", value)
+      expect(@field.decoded).to eq value
       lines = @field.encoded.split("\r\n")
-      lines.each { |line| expect(line.length).to be <= 90 }
+      lines.each { |line| expect(line.length).to be <= 78 }
     end
 
     # TODO: tweak unstructured_field to always generate lines of at most 78 chars
     it "should fold full of emoji subject that is going to be more than 998 bytes unfolded" do
       value = "😄" * 90
       @field = Mail::UnstructuredField.new("Subject", value)
+      expect(@field.decoded).to eq value
       lines = @field.encoded.split("\r\n")
-      lines.each { |line| expect(line.length).to be < 110 }
+      lines.each { |line| expect(line.length).to be <= 78 }
     end
 
   end
@@ -216,9 +220,13 @@
   end
 
   describe "iso-2022-jp Subject" do
-    it "should encoded with ISO-2022-JP encoding" do
-      @field = Mail::UnstructuredField.new("Subject", "あいうえお")
+    it "should be encoded with ISO-2022-JP encoding" do
+      value = "あいうえお"
+      @field = Mail::UnstructuredField.new("Subject", value)
       @field.charset = 'iso-2022-jp'
+      expect(@field.decoded).to eq value
+      lines = @field.encoded.split("\r\n")
+      lines.each { |line| expect(line.length).to be <= 78 }
       expect(@field.encoded).to eq "Subject: =?ISO-2022-JP?Q?=1B$B$=22$$$&$=28$*=1B=28B?=\r\n"
     end
   end

From 7a7b8e729313b5a622fbf7b7ce8173debf5e8e8d Mon Sep 17 00:00:00 2001
From: sebbASF <sebbASF@users.noreply.github.com>
Date: Thu, 23 Feb 2023 08:35:53 +0000
Subject: [PATCH 4/4] Use actual prefix length

---
 lib/mail/fields/unstructured_field.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/mail/fields/unstructured_field.rb b/lib/mail/fields/unstructured_field.rb
index 537d4d637..cb578533b 100644
--- a/lib/mail/fields/unstructured_field.rb
+++ b/lib/mail/fields/unstructured_field.rb
@@ -112,7 +112,7 @@ def fold(prepend = 0) # :nodoc:
       encoding       = normalized_encoding
       encoding_overhead = "=?#{encoding}?Q??=".length
       # The encoded string goes here      ^ (between the ??)
-      max_safe_word = 78 - encoding_overhead - 10 # allow for encoding overhead + prefix
+      max_safe_word = 78 - encoding_overhead - prepend # allow for encoding overhead + prefix
       decoded_string = decoded.to_s
       words = decoded_string.split(/[ \t]/)
       should_encode  = !decoded_string.ascii_only? || words.any? {|word| word.length > max_safe_word}