1
1
#![ unstable( issue = "0" , feature = "windows_stdio" ) ]
2
2
3
3
use cell:: Cell ;
4
+ use char:: decode_utf16;
4
5
use cmp;
5
6
use io;
6
7
use ptr;
@@ -64,22 +65,27 @@ fn write(handle_id: c::DWORD, data: &[u8]) -> io::Result<usize> {
64
65
//
65
66
// If the data is not valid UTF-8 we write out as many bytes as are valid.
66
67
// Only when there are no valid bytes (which will happen on the next call), return an error.
67
- let len = cmp:: min ( data. len ( ) , MAX_BUFFER_SIZE ) ;
68
+ let len = cmp:: min ( data. len ( ) , MAX_BUFFER_SIZE / 2 ) ;
68
69
let utf8 = match str:: from_utf8 ( & data[ ..len] ) {
69
70
Ok ( s) => s,
70
71
Err ( ref e) if e. valid_up_to ( ) == 0 => {
71
72
return Err ( io:: Error :: new ( io:: ErrorKind :: InvalidData ,
72
- "Windows stdio in console mode does not support non-UTF-8 byte sequences; \
73
- see https://github.com/rust-lang/rust/issues/23344") )
73
+ "Windows stdio in console mode does not support writing non-UTF-8 byte sequences" ) )
74
74
} ,
75
75
Err ( e) => str:: from_utf8 ( & data[ ..e. valid_up_to ( ) ] ) . unwrap ( ) ,
76
76
} ;
77
- let utf16 = utf8. encode_utf16 ( ) . collect :: < Vec < u16 > > ( ) ;
77
+ let mut utf16 = [ 0u16 ; MAX_BUFFER_SIZE / 2 ] ;
78
+ let mut len_utf16 = 0 ;
79
+ for ( chr, dest) in utf8. encode_utf16 ( ) . zip ( utf16. iter_mut ( ) ) {
80
+ * dest = chr;
81
+ len_utf16 += 1 ;
82
+ }
83
+ let utf16 = & utf16[ ..len_utf16] ;
78
84
79
85
let mut written = write_u16s ( handle, & utf16) ?;
80
86
81
87
// Figure out how many bytes of as UTF-8 were written away as UTF-16.
82
- if written > = utf16. len ( ) {
88
+ if written = = utf16. len ( ) {
83
89
Ok ( utf8. len ( ) )
84
90
} else {
85
91
// Make sure we didn't end up writing only half of a surrogate pair (even though the chance
@@ -90,7 +96,7 @@ fn write(handle_id: c::DWORD, data: &[u8]) -> io::Result<usize> {
90
96
let first_char_remaining = utf16[ written] ;
91
97
if first_char_remaining >= 0xDCEE && first_char_remaining <= 0xDFFF { // low surrogate
92
98
// We just hope this works, and give up otherwise
93
- let _ = write_u16s ( handle, & utf16[ written..written] ) ;
99
+ let _ = write_u16s ( handle, & utf16[ written..written+ 1 ] ) ;
94
100
written += 1 ;
95
101
}
96
102
// Calculate the number of bytes of `utf8` that were actually written.
@@ -103,6 +109,7 @@ fn write(handle_id: c::DWORD, data: &[u8]) -> io::Result<usize> {
103
109
_ => 3 ,
104
110
} ;
105
111
}
112
+ debug_assert ! ( String :: from_utf16( & utf16[ ..written] ) . unwrap( ) == utf8[ ..count] ) ;
106
113
Ok ( count)
107
114
}
108
115
}
@@ -137,7 +144,7 @@ impl Stdin {
137
144
return Ok ( 0 ) ;
138
145
} else if buf. len ( ) < 4 {
139
146
return Err ( io:: Error :: new ( io:: ErrorKind :: InvalidInput ,
140
- "Windows stdin in console mode does not support a buffer too small to; \
147
+ "Windows stdin in console mode does not support a buffer too small to \
141
148
guarantee holding one arbitrary UTF-8 character (4 bytes)") )
142
149
}
143
150
@@ -147,27 +154,14 @@ impl Stdin {
147
154
// lost.
148
155
let amount = cmp:: min ( buf. len ( ) / 3 , utf16_buf. len ( ) ) ;
149
156
let read = self . read_u16s_fixup_surrogates ( handle, & mut utf16_buf, amount) ?;
150
- let utf16 = & utf16_buf[ ..read] ;
151
157
152
- // FIXME: it would be nice if we could directly decode into the buffer instead of doing an
153
- // allocation.
154
- let data = match String :: from_utf16 ( & utf16) {
155
- Ok ( utf8) => utf8. into_bytes ( ) ,
156
- Err ( ..) => {
157
- // We can't really do any better than forget all data and return an error.
158
- return Err ( io:: Error :: new ( io:: ErrorKind :: InvalidData ,
159
- "Windows stdin in console mode does not support non-UTF-16 input; \
160
- encountered unpaired surrogate") )
161
- } ,
162
- } ;
163
- buf. copy_from_slice ( & data) ;
164
- Ok ( data. len ( ) )
158
+ utf16_to_utf8 ( & utf16_buf[ ..read] , buf)
165
159
}
166
160
167
161
// We assume that if the last `u16` is an unpaired surrogate they got sliced apart by our
168
162
// buffer size, and keep it around for the next read hoping to put them together.
169
163
// This is a best effort, and may not work if we are not the only reader on Stdin.
170
- pub fn read_u16s_fixup_surrogates ( & self , handle : c:: HANDLE , buf : & mut [ u16 ] , mut amount : usize )
164
+ fn read_u16s_fixup_surrogates ( & self , handle : c:: HANDLE , buf : & mut [ u16 ] , mut amount : usize )
171
165
-> io:: Result < usize >
172
166
{
173
167
// Insert possibly remaining unpaired surrogate from last read.
@@ -223,6 +217,26 @@ fn read_u16s(handle: c::HANDLE, buf: &mut [u16]) -> io::Result<usize> {
223
217
Ok ( amount as usize )
224
218
}
225
219
220
+ #[ allow( unused) ]
221
+ fn utf16_to_utf8 ( utf16 : & [ u16 ] , utf8 : & mut [ u8 ] ) -> io:: Result < usize > {
222
+ let mut written = 0 ;
223
+ for chr in decode_utf16 ( utf16. iter ( ) . cloned ( ) ) {
224
+ match chr {
225
+ Ok ( chr) => {
226
+ chr. encode_utf8 ( & mut utf8[ written..] ) ;
227
+ written += chr. len_utf8 ( ) ;
228
+ }
229
+ Err ( _) => {
230
+ // We can't really do any better than forget all data and return an error.
231
+ return Err ( io:: Error :: new ( io:: ErrorKind :: InvalidData ,
232
+ "Windows stdin in console mode does not support non-UTF-16 input; \
233
+ encountered unpaired surrogate") )
234
+ }
235
+ }
236
+ }
237
+ Ok ( written)
238
+ }
239
+
226
240
impl Stdout {
227
241
pub fn new ( ) -> io:: Result < Stdout > {
228
242
Ok ( Stdout )
0 commit comments