@@ -169,14 +169,27 @@ fn write(
169
169
}
170
170
171
171
fn write_valid_utf8_to_console ( handle : c:: HANDLE , utf8 : & str ) -> io:: Result < usize > {
172
+ debug_assert ! ( !utf8. is_empty( ) ) ;
173
+
172
174
let mut utf16 = [ MaybeUninit :: < u16 > :: uninit ( ) ; MAX_BUFFER_SIZE / 2 ] ;
173
- let mut len_utf16 = 0 ;
174
- for ( chr, dest) in utf8. encode_utf16 ( ) . zip ( utf16. iter_mut ( ) ) {
175
- * dest = MaybeUninit :: new ( chr) ;
176
- len_utf16 += 1 ;
177
- }
178
- // Safety: We've initialized `len_utf16` values.
179
- let utf16: & [ u16 ] = unsafe { MaybeUninit :: slice_assume_init_ref ( & utf16[ ..len_utf16] ) } ;
175
+ let utf8 = & utf8[ ..utf8. floor_char_boundary ( utf16. len ( ) ) ] ;
176
+
177
+ let utf16: & [ u16 ] = unsafe {
178
+ // Note that this theoretically checks validity twice in the (most common) case
179
+ // where the underlying byte sequence is valid utf-8 (given the check in `write()`).
180
+ let result = c:: MultiByteToWideChar (
181
+ c:: CP_UTF8 , // CodePage
182
+ c:: MB_ERR_INVALID_CHARS , // dwFlags
183
+ utf8. as_ptr ( ) as c:: LPCCH , // lpMultiByteStr
184
+ utf8. len ( ) as c:: c_int , // cbMultiByte
185
+ utf16. as_mut_ptr ( ) as c:: LPWSTR , // lpWideCharStr
186
+ utf16. len ( ) as c:: c_int , // cchWideChar
187
+ ) ;
188
+ assert ! ( result != 0 , "Unexpected error in MultiByteToWideChar" ) ;
189
+
190
+ // Safety: MultiByteToWideChar initializes `result` values.
191
+ MaybeUninit :: slice_assume_init_ref ( & utf16[ ..result as usize ] )
192
+ } ;
180
193
181
194
let mut written = write_u16s ( handle, & utf16) ?;
182
195
@@ -189,8 +202,8 @@ fn write_valid_utf8_to_console(handle: c::HANDLE, utf8: &str) -> io::Result<usiz
189
202
// a missing surrogate can be produced (and also because of the UTF-8 validation above),
190
203
// write the missing surrogate out now.
191
204
// Buffering it would mean we have to lie about the number of bytes written.
192
- let first_char_remaining = utf16[ written] ;
193
- if first_char_remaining >= 0xDCEE && first_char_remaining <= 0xDFFF {
205
+ let first_code_unit_remaining = utf16[ written] ;
206
+ if first_code_unit_remaining >= 0xDCEE && first_code_unit_remaining <= 0xDFFF {
194
207
// low surrogate
195
208
// We just hope this works, and give up otherwise
196
209
let _ = write_u16s ( handle, & utf16[ written..written + 1 ] ) ;
@@ -212,6 +225,7 @@ fn write_valid_utf8_to_console(handle: c::HANDLE, utf8: &str) -> io::Result<usiz
212
225
}
213
226
214
227
fn write_u16s ( handle : c:: HANDLE , data : & [ u16 ] ) -> io:: Result < usize > {
228
+ debug_assert ! ( data. len( ) < u32 :: MAX as usize ) ;
215
229
let mut written = 0 ;
216
230
cvt ( unsafe {
217
231
c:: WriteConsoleW (
@@ -365,26 +379,32 @@ fn read_u16s(handle: c::HANDLE, buf: &mut [MaybeUninit<u16>]) -> io::Result<usiz
365
379
Ok ( amount as usize )
366
380
}
367
381
368
- #[ allow( unused) ]
369
382
fn utf16_to_utf8 ( utf16 : & [ u16 ] , utf8 : & mut [ u8 ] ) -> io:: Result < usize > {
370
- let mut written = 0 ;
371
- for chr in char:: decode_utf16 ( utf16. iter ( ) . cloned ( ) ) {
372
- match chr {
373
- Ok ( chr) => {
374
- chr. encode_utf8 ( & mut utf8[ written..] ) ;
375
- written += chr. len_utf8 ( ) ;
376
- }
377
- Err ( _) => {
378
- // We can't really do any better than forget all data and return an error.
379
- return Err ( io:: const_io_error!(
380
- io:: ErrorKind :: InvalidData ,
381
- "Windows stdin in console mode does not support non-UTF-16 input; \
382
- encountered unpaired surrogate",
383
- ) ) ;
384
- }
385
- }
383
+ debug_assert ! ( utf16. len( ) <= c:: c_int:: MAX as usize ) ;
384
+ debug_assert ! ( utf8. len( ) <= c:: c_int:: MAX as usize ) ;
385
+
386
+ let result = unsafe {
387
+ c:: WideCharToMultiByte (
388
+ c:: CP_UTF8 , // CodePage
389
+ c:: WC_ERR_INVALID_CHARS , // dwFlags
390
+ utf16. as_ptr ( ) , // lpWideCharStr
391
+ utf16. len ( ) as c:: c_int , // cchWideChar
392
+ utf8. as_mut_ptr ( ) as c:: LPSTR , // lpMultiByteStr
393
+ utf8. len ( ) as c:: c_int , // cbMultiByte
394
+ ptr:: null ( ) , // lpDefaultChar
395
+ ptr:: null_mut ( ) , // lpUsedDefaultChar
396
+ )
397
+ } ;
398
+ if result == 0 {
399
+ // We can't really do any better than forget all data and return an error.
400
+ Err ( io:: const_io_error!(
401
+ io:: ErrorKind :: InvalidData ,
402
+ "Windows stdin in console mode does not support non-UTF-16 input; \
403
+ encountered unpaired surrogate",
404
+ ) )
405
+ } else {
406
+ Ok ( result as usize )
386
407
}
387
- Ok ( written)
388
408
}
389
409
390
410
impl IncompleteUtf8 {
0 commit comments