@@ -20,15 +20,23 @@ use std::os::unix::fs::MetadataExt;
2020#[ cfg( target_os = "windows" ) ]
2121use std:: os:: windows:: fs:: MetadataExt ;
2222
23+ /// for --bytes, so really large number limits can be expressed, like 1Y.
24+ #[ cfg( not( feature = "cmp_bytes_limit_128_bit" ) ) ]
25+ pub type Bytes = u64 ;
26+ #[ cfg( feature = "cmp_bytes_limit_128_bit" ) ]
27+ pub type Bytes = u128 ;
28+ // ignore initial is currently limited to u64, as take(skip) is used.
29+ pub type IgnInit = u64 ;
30+
2331#[ derive( Clone , Debug , Default , Eq , PartialEq ) ]
2432pub struct Params {
2533 executable : OsString ,
2634 from : OsString ,
2735 to : OsString ,
2836 print_bytes : bool ,
29- skip_a : Option < usize > ,
30- skip_b : Option < usize > ,
31- max_bytes : Option < usize > ,
37+ skip_a : Option < IgnInit > ,
38+ skip_b : Option < IgnInit > ,
39+ max_bytes : Option < Bytes > ,
3240 verbose : bool ,
3341 quiet : bool ,
3442}
@@ -66,13 +74,13 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
6674 } ;
6775 let executable_str = executable. to_string_lossy ( ) . to_string ( ) ;
6876
69- let parse_skip = |param : & str , skip_desc : & str | -> Result < usize , String > {
77+ let parse_skip = |param : & str , skip_desc : & str | -> Result < IgnInit , String > {
7078 let suffix_start = param
7179 . find ( |b : char | !b. is_ascii_digit ( ) )
7280 . unwrap_or ( param. len ( ) ) ;
73- let mut num = match param[ ..suffix_start] . parse :: < usize > ( ) {
81+ let mut num = match param[ ..suffix_start] . parse :: < IgnInit > ( ) {
7482 Ok ( num) => num,
75- Err ( e) if * e. kind ( ) == std:: num:: IntErrorKind :: PosOverflow => usize :: MAX ,
83+ Err ( e) if * e. kind ( ) == std:: num:: IntErrorKind :: PosOverflow => IgnInit :: MAX ,
7684 Err ( _) => {
7785 return Err ( format ! (
7886 "{executable_str}: invalid --ignore-initial value '{skip_desc}'"
@@ -83,7 +91,7 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
8391 if suffix_start != param. len ( ) {
8492 // Note that GNU cmp advertises supporting up to Y, but fails if you try
8593 // to actually use anything beyond E.
86- let multiplier: usize = match & param[ suffix_start..] {
94+ let multiplier: IgnInit = match & param[ suffix_start..] {
8795 "kB" => 1_000 ,
8896 "K" => 1_024 ,
8997 "MB" => 1_000_000 ,
@@ -106,10 +114,11 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
106114 #[ cfg( not( target_pointer_width = "64" ) ) ]
107115 usize:: MAX
108116 }
109- "ZB" => usize:: MAX , // 1_000_000_000_000_000_000_000,
110- "Z" => usize:: MAX , // 1_180_591_620_717_411_303_424,
111- "YB" => usize:: MAX , // 1_000_000_000_000_000_000_000_000,
112- "Y" => usize:: MAX , // 1_208_925_819_614_629_174_706_176,
117+ // TODO setting usize:MAX does not mimic GNU cmp behavior, it should be an error.
118+ "ZB" => IgnInit :: MAX , // 1_000_000_000_000_000_000_000,
119+ "Z" => IgnInit :: MAX , // 1_180_591_620_717_411_303_424,
120+ "YB" => IgnInit :: MAX , // 1_000_000_000_000_000_000_000_000,
121+ "Y" => IgnInit :: MAX , // 1_208_925_819_614_629_174_706_176,
113122 _ => {
114123 return Err ( format ! (
115124 "{executable_str}: invalid --ignore-initial value '{skip_desc}'"
@@ -119,7 +128,7 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
119128
120129 num = match num. overflowing_mul ( multiplier) {
121130 ( n, false ) => n,
122- _ => usize :: MAX ,
131+ _ => IgnInit :: MAX ,
123132 }
124133 }
125134
@@ -173,9 +182,10 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
173182 let ( _, arg) = param_str. split_once ( '=' ) . unwrap ( ) ;
174183 arg. to_string ( )
175184 } ;
176- let max_bytes = match max_bytes. parse :: < usize > ( ) {
185+ let max_bytes = match max_bytes. parse :: < Bytes > ( ) {
177186 Ok ( num) => num,
178- Err ( e) if * e. kind ( ) == std:: num:: IntErrorKind :: PosOverflow => usize:: MAX ,
187+ // TODO limit to MAX is dangerous, this should become an error like in GNU cmp.
188+ Err ( e) if * e. kind ( ) == std:: num:: IntErrorKind :: PosOverflow => Bytes :: MAX ,
179189 Err ( _) => {
180190 return Err ( format ! (
181191 "{executable_str}: invalid --bytes value '{max_bytes}'"
@@ -233,7 +243,7 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
233243 }
234244
235245 // Do as GNU cmp, and completely disable printing if we are
236- // outputing to /dev/null.
246+ // outputting to /dev/null.
237247 #[ cfg( not( target_os = "windows" ) ) ]
238248 if is_stdout_dev_null ( ) {
239249 params. quiet = true ;
@@ -285,7 +295,7 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
285295
286296fn prepare_reader (
287297 path : & OsString ,
288- skip : & Option < usize > ,
298+ skip : & Option < u64 > ,
289299 params : & Params ,
290300) -> Result < Box < dyn BufRead > , String > {
291301 let mut reader: Box < dyn BufRead > = if path == "-" {
@@ -326,7 +336,7 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
326336 let mut from = prepare_reader ( & params. from , & params. skip_a , params) ?;
327337 let mut to = prepare_reader ( & params. to , & params. skip_b , params) ?;
328338
329- let mut offset_width = params. max_bytes . unwrap_or ( usize :: MAX ) ;
339+ let mut offset_width = params. max_bytes . unwrap_or ( Bytes :: MAX ) ;
330340
331341 if let ( Ok ( a_meta) , Ok ( b_meta) ) = ( fs:: metadata ( & params. from ) , fs:: metadata ( & params. to ) ) {
332342 #[ cfg( not( target_os = "windows" ) ) ]
@@ -341,7 +351,7 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
341351 return Ok ( Cmp :: Different ) ;
342352 }
343353
344- let smaller = cmp:: min ( a_size, b_size) as usize ;
354+ let smaller = cmp:: min ( a_size, b_size) as Bytes ;
345355 offset_width = cmp:: min ( smaller, offset_width) ;
346356 }
347357
@@ -350,8 +360,8 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
350360 // Capacity calc: at_byte width + 2 x 3-byte octal numbers + 2 x 4-byte value + 4 spaces
351361 let mut output = Vec :: < u8 > :: with_capacity ( offset_width + 3 * 2 + 4 * 2 + 4 ) ;
352362
353- let mut at_byte = 1 ;
354- let mut at_line = 1 ;
363+ let mut at_byte: Bytes = 1 ;
364+ let mut at_line: u64 = 1 ;
355365 let mut start_of_line = true ;
356366 let mut stdout = BufWriter :: new ( io:: stdout ( ) . lock ( ) ) ;
357367 let mut compare = Cmp :: Equal ;
@@ -401,8 +411,8 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
401411 if from_buf[ ..consumed] == to_buf[ ..consumed] {
402412 let last = from_buf[ ..consumed] . last ( ) . unwrap ( ) ;
403413
404- at_byte += consumed;
405- at_line += from_buf[ ..consumed] . iter ( ) . filter ( |& c| * c == b'\n' ) . count ( ) ;
414+ at_byte += consumed as Bytes ;
415+ at_line += ( from_buf[ ..consumed] . iter ( ) . filter ( |& c| * c == b'\n' ) . count ( ) ) as u64 ;
406416
407417 start_of_line = * last == b'\n' ;
408418
@@ -590,7 +600,7 @@ fn format_visible_byte(byte: u8) -> String {
590600fn format_verbose_difference (
591601 from_byte : u8 ,
592602 to_byte : u8 ,
593- at_byte : usize ,
603+ at_byte : Bytes ,
594604 offset_width : usize ,
595605 output : & mut Vec < u8 > ,
596606 params : & Params ,
@@ -655,7 +665,7 @@ fn format_verbose_difference(
655665}
656666
657667#[ inline]
658- fn report_eof ( at_byte : usize , at_line : usize , start_of_line : bool , eof_on : & str , params : & Params ) {
668+ fn report_eof ( at_byte : Bytes , at_line : u64 , start_of_line : bool , eof_on : & str , params : & Params ) {
659669 if params. quiet {
660670 return ;
661671 }
@@ -707,7 +717,7 @@ fn is_posix_locale() -> bool {
707717}
708718
709719#[ inline]
710- fn report_difference ( from_byte : u8 , to_byte : u8 , at_byte : usize , at_line : usize , params : & Params ) {
720+ fn report_difference ( from_byte : u8 , to_byte : u8 , at_byte : Bytes , at_line : u64 , params : & Params ) {
711721 if params. quiet {
712722 return ;
713723 }
@@ -804,7 +814,7 @@ mod tests {
804814 from: os( "foo" ) ,
805815 to: os( "bar" ) ,
806816 skip_a: Some ( 1 ) ,
807- skip_b: Some ( usize :: MAX ) ,
817+ skip_b: Some ( IgnInit :: MAX ) ,
808818 ..Default :: default ( )
809819 } ) ,
810820 parse_params(
@@ -982,7 +992,7 @@ mod tests {
982992 executable: os( "cmp" ) ,
983993 from: os( "foo" ) ,
984994 to: os( "bar" ) ,
985- max_bytes: Some ( usize :: MAX ) ,
995+ max_bytes: Some ( Bytes :: MAX ) ,
986996 ..Default :: default ( )
987997 } ) ,
988998 parse_params(
@@ -999,6 +1009,7 @@ mod tests {
9991009 ) ;
10001010
10011011 // Failure case
1012+ // TODO This is actually fine in GNU cmp. --bytes does not have a unit parser yet.
10021013 assert_eq ! (
10031014 Err ( "cmp: invalid --bytes value '1K'" . to_string( ) ) ,
10041015 parse_params(
@@ -1044,8 +1055,8 @@ mod tests {
10441055 executable: os( "cmp" ) ,
10451056 from: os( "foo" ) ,
10461057 to: os( "bar" ) ,
1047- skip_a: Some ( usize :: MAX ) ,
1048- skip_b: Some ( usize :: MAX ) ,
1058+ skip_a: Some ( IgnInit :: MAX ) ,
1059+ skip_b: Some ( IgnInit :: MAX ) ,
10491060 ..Default :: default ( )
10501061 } ) ,
10511062 parse_params(
@@ -1116,8 +1127,12 @@ mod tests {
11161127 . enumerate ( )
11171128 {
11181129 let values = [
1119- 1_000usize . checked_pow ( ( i + 1 ) as u32 ) . unwrap_or ( usize:: MAX ) ,
1120- 1024usize . checked_pow ( ( i + 1 ) as u32 ) . unwrap_or ( usize:: MAX ) ,
1130+ ( 1_000 as IgnInit )
1131+ . checked_pow ( ( i + 1 ) as u32 )
1132+ . unwrap_or ( IgnInit :: MAX ) ,
1133+ ( 1024 as IgnInit )
1134+ . checked_pow ( ( i + 1 ) as u32 )
1135+ . unwrap_or ( IgnInit :: MAX ) ,
11211136 ] ;
11221137 for ( j, v) in values. iter ( ) . enumerate ( ) {
11231138 assert_eq ! (
0 commit comments