Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions Sources/Foundation/NSCFString.swift
Original file line number Diff line number Diff line change
Expand Up @@ -154,19 +154,25 @@ internal func _CFSwiftStringGetBytes(_ str: AnyObject, encoding: CFStringEncodin
// TODO: Don't treat many encodings like they are UTF8
case CFStringEncoding(kCFStringEncodingUTF8), CFStringEncoding(kCFStringEncodingISOLatin1), CFStringEncoding(kCFStringEncodingMacRoman), CFStringEncoding(kCFStringEncodingASCII), CFStringEncoding(kCFStringEncodingNonLossyASCII):
let encodingView = (str as! NSString).substring(with: NSRange(range)).utf8
var converted = 0
if let buffer = buffer {
for (idx, character) in encodingView.enumerated() {
if idx >= maxBufLen { break }
if encoding == CFStringEncoding(kCFStringEncodingASCII) && !Unicode.ASCII.isASCII(character) { break }
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line fixes the unit test failure. The issue was that this fix exposes the bug mentioned above on line 154:

// TODO: Don't treat many encodings like they are UTF8

Currently if the requested encoding is UTF8/ISOLatin1/MacRoman/ASCII/non-lossy ASCII we just encode UTF-8. Previously, we incorrectly reported the convertedLength as the UTF-8 length, but this fix updated that to correctly report the UTF-16 length processed. In plist encoding, when encoding a string the CoreFoundation code attempts getting ASCII bytes and if that fails it gets UTF-16 bytes encoded instead. Previously, it believed that the ASCII conversion failed because the reported UTF-8 length mismatched the known UTF-16 length. Now that the return value is correctly reporting the converted length, it began "passing" and thinking that it could encode the non-ASCII string as ASCII instead of falling over to UTF-16.

As a stopgap, this fixes the ASCII encoding by failing if we hit a non-ASCII character. We also need to do this same behavior (as the comment suggests) for the other non-UTF8 encodings here (and/or actually encode them properly 🙂). That's a larger change that I'm proposing we do as a followup since this is effectively already broken for the other encodings and I don't think it makes it worse (just perhaps more discoverable) and the other encodings are likely far less common than ASCII.

buffer.advanced(by: idx).initialize(to: character)
converted += 1
}
}
usedBufLen?.pointee = encodingView.count
convertedLength = encodingView.count
usedBufLen?.pointee = converted
convertedLength = converted

case CFStringEncoding(kCFStringEncodingUTF16):
let encodingView = (str as! NSString)._swiftObject.utf16
let start = encodingView.startIndex
var converted = 0
if let buffer = buffer {
for idx in 0..<range.length {
if idx * 2 >= maxBufLen { break }
// Since character is 2 bytes but the buffer is in term of 1 byte values, we have to split it up
let character = encodingView[encodingView.index(start, offsetBy: idx + range.location)]
#if _endian(big)
Expand All @@ -178,11 +184,12 @@ internal func _CFSwiftStringGetBytes(_ str: AnyObject, encoding: CFStringEncodin
#endif
buffer.advanced(by: idx * 2).initialize(to: byte0)
buffer.advanced(by: (idx * 2) + 1).initialize(to: byte1)
converted += 1
}
}
// Every character was 2 bytes
usedBufLen?.pointee = range.length * 2
convertedLength = range.length
usedBufLen?.pointee = converted * 2
convertedLength = converted

default:
fatalError("Attempted to get bytes of a Swift string using an unsupported encoding")
Expand Down
31 changes: 31 additions & 0 deletions Tests/Foundation/TestPropertyListSerialization.swift
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//

import CoreFoundation

class TestPropertyListSerialization : XCTestCase {
func test_BasicConstruction() {
let dict = NSMutableDictionary(capacity: 0)
Expand Down Expand Up @@ -80,4 +82,33 @@ class TestPropertyListSerialization : XCTestCase {
XCTAssertEqual(nserror.userInfo[NSDebugDescriptionErrorKey] as? String, "Cannot parse a NULL or zero-length data")
}
}

func test_decodeOverflowUnicodeString() throws {
var native = "ФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФФ"
let ns = native.withCString {
NSString(utf8String: $0)!
}
do {
var buffer = Array<UInt8>(repeating: 1, count: native.utf8.count)
buffer.withUnsafeMutableBufferPointer { bufferPtr in
var used: CFIndex = 0
CFStringGetBytes(unsafeBitCast(ns, to: CFString.self), CFRangeMake(0, ns.length), CFStringBuiltInEncodings.UTF8.rawValue, 0xF, false, bufferPtr.baseAddress!, 10, &used)
XCTAssertEqual(used, 10)
}
for i in 10 ..< buffer.count {
XCTAssertEqual(buffer[i], 1)
}
}
do {
var buffer = Array<UInt16>(repeating: 1, count: native.utf8.count)
buffer.withUnsafeMutableBufferPointer { bufferPtr in
var used: CFIndex = 0
CFStringGetBytes(unsafeBitCast(ns, to: CFString.self), CFRangeMake(0, ns.length), CFStringBuiltInEncodings.UTF16.rawValue, 0xF, false, bufferPtr.baseAddress!, 10, &used)
XCTAssertEqual(used, 10)
}
for i in 10 ..< buffer.count {
XCTAssertEqual(buffer[i], 1)
}
}
}
}