@ -397,33 +397,11 @@ pub fn from_reader<R: std::io::Read + ?Sized>(
let mut buf_out = [ 0 u8 ; BUF_SIZE ] ;
let mut buf_out = [ 0 u8 ; BUF_SIZE ] ;
let mut builder = RopeBuilder ::new ( ) ;
let mut builder = RopeBuilder ::new ( ) ;
// By default, the encoding of the text is auto-detected by
let ( encoding , has_bom , mut decoder , read ) =
// `encoding_rs` for_bom, and if it fails, from `chardetng`
read_and_detect_encoding ( reader , encoding , & mut buf ) ? ;
// crate which requires sample data from the reader.
// As a manual override to this auto-detection is possible, the
// same data is read into `buf` to ensure symmetry in the upcoming
// loop.
let ( encoding , has_bom , mut decoder , mut slice , mut is_empty ) = {
let read = reader . read ( & mut buf ) ? ;
let is_empty = read = = 0 ;
let ( encoding , has_bom ) = encoding
. map ( | encoding | ( encoding , false ) )
. or_else ( | | {
encoding ::Encoding ::for_bom ( & buf ) . map ( | ( encoding , _bom_size ) | ( encoding , true ) )
} )
. unwrap_or_else ( | | {
let mut encoding_detector = chardetng ::EncodingDetector ::new ( ) ;
encoding_detector . feed ( & buf , is_empty ) ;
( encoding_detector . guess ( None , true ) , false )
} ) ;
let decoder = encoding . new_decoder ( ) ;
// If the amount of bytes read from the reader is less than
let mut slice = & buf [ .. read ] ;
// `buf.len()`, it is undesirable to read the bytes afterwards.
let mut is_empty = read = = 0 ;
let slice = & buf [ .. read ] ;
( encoding , has_bom , decoder , slice , is_empty )
} ;
// `RopeBuilder::append()` expects a `&str`, so this is the "real"
// `RopeBuilder::append()` expects a `&str`, so this is the "real"
// output buffer. When decoding, the number of bytes in the output
// output buffer. When decoding, the number of bytes in the output
@ -493,6 +471,81 @@ pub fn from_reader<R: std::io::Read + ?Sized>(
Ok ( ( rope , encoding , has_bom ) )
Ok ( ( rope , encoding , has_bom ) )
}
}
pub fn read_to_string < R : std ::io ::Read + ? Sized > (
reader : & mut R ,
encoding : Option < & ' static Encoding > ,
) -> Result < ( String , & ' static Encoding , bool ) , Error > {
let mut buf = [ 0 u8 ; BUF_SIZE ] ;
let ( encoding , has_bom , mut decoder , read ) =
read_and_detect_encoding ( reader , encoding , & mut buf ) ? ;
let mut slice = & buf [ .. read ] ;
let mut is_empty = read = = 0 ;
let mut buf_string = String ::with_capacity ( buf . len ( ) ) ;
loop {
let mut total_read = 0 usize ;
loop {
let ( result , read , .. ) =
decoder . decode_to_string ( & slice [ total_read .. ] , & mut buf_string , is_empty ) ;
total_read + = read ;
match result {
encoding ::CoderResult ::InputEmpty = > {
debug_assert_eq! ( slice . len ( ) , total_read ) ;
break ;
}
encoding ::CoderResult ::OutputFull = > {
debug_assert! ( slice . len ( ) > total_read ) ;
buf_string . reserve ( buf . len ( ) )
}
}
}
if is_empty {
debug_assert_eq! ( reader . read ( & mut buf ) ? , 0 ) ;
break ;
}
let read = reader . read ( & mut buf ) ? ;
slice = & buf [ .. read ] ;
is_empty = read = = 0 ;
}
Ok ( ( buf_string , encoding , has_bom ) )
}
/// Reads the first chunk from a Reader into the given buffer
/// and detects the encoding.
///
/// By default, the encoding of the text is auto-detected by
/// `encoding_rs` for_bom, and if it fails, from `chardetng`
/// crate which requires sample data from the reader.
/// As a manual override to this auto-detection is possible, the
/// same data is read into `buf` to ensure symmetry in the upcoming
/// loop.
fn read_and_detect_encoding < R : std ::io ::Read + ? Sized > (
reader : & mut R ,
encoding : Option < & ' static Encoding > ,
buf : & mut [ u8 ] ,
) -> Result < ( & ' static Encoding , bool , encoding ::Decoder , usize ) , Error > {
let read = reader . read ( buf ) ? ;
let is_empty = read = = 0 ;
let ( encoding , has_bom ) = encoding
. map ( | encoding | ( encoding , false ) )
. or_else ( | | encoding ::Encoding ::for_bom ( buf ) . map ( | ( encoding , _bom_size ) | ( encoding , true ) ) )
. unwrap_or_else ( | | {
let mut encoding_detector = chardetng ::EncodingDetector ::new ( ) ;
encoding_detector . feed ( buf , is_empty ) ;
( encoding_detector . guess ( None , true ) , false )
} ) ;
let decoder = encoding . new_decoder ( ) ;
Ok ( ( encoding , has_bom , decoder , read ) )
}
// The documentation and implementation of this function should be up-to-date with
// The documentation and implementation of this function should be up-to-date with
// its sibling function, `from_reader()`.
// its sibling function, `from_reader()`.
//
//