@@ -490,22 +490,57 @@ mod tests {
490
490
Ok ( ( ) )
491
491
}
492
492
493
- #[ test]
494
- fn create_hashes_binary ( ) -> Result < ( ) > {
495
- let byte_array = Arc :: new ( BinaryArray :: from_vec ( vec ! [
496
- & [ 4 , 3 , 2 ] ,
497
- & [ 4 , 3 , 2 ] ,
498
- & [ 1 , 2 , 3 ] ,
499
- ] ) ) ;
493
+ macro_rules! create_hash_binary {
494
+ ( $NAME: ident, $ARRAY: ty) => {
495
+ #[ cfg( not( feature = "force_hash_collisions" ) ) ]
496
+ #[ test]
497
+ fn $NAME( ) {
498
+ let binary = [
499
+ Some ( b"short" . to_byte_slice( ) ) ,
500
+ None ,
501
+ Some ( b"long but different 12 bytes string" ) ,
502
+ Some ( b"short2" ) ,
503
+ Some ( b"Longer than 12 bytes string" ) ,
504
+ Some ( b"short" ) ,
505
+ Some ( b"Longer than 12 bytes string" ) ,
506
+ ] ;
507
+
508
+ let binary_array = Arc :: new( binary. iter( ) . cloned( ) . collect:: <$ARRAY>( ) ) ;
509
+ let ref_array = Arc :: new( binary. iter( ) . cloned( ) . collect:: <BinaryArray >( ) ) ;
510
+
511
+ let random_state = RandomState :: with_seeds( 0 , 0 , 0 , 0 ) ;
512
+
513
+ let mut binary_hashes = vec![ 0 ; binary. len( ) ] ;
514
+ create_hashes( & [ binary_array] , & random_state, & mut binary_hashes)
515
+ . unwrap( ) ;
516
+
517
+ let mut ref_hashes = vec![ 0 ; binary. len( ) ] ;
518
+ create_hashes( & [ ref_array] , & random_state, & mut ref_hashes) . unwrap( ) ;
519
+
520
+ // Null values result in a zero hash,
521
+ for ( val, hash) in binary. iter( ) . zip( binary_hashes. iter( ) ) {
522
+ match val {
523
+ Some ( _) => assert_ne!( * hash, 0 ) ,
524
+ None => assert_eq!( * hash, 0 ) ,
525
+ }
526
+ }
500
527
501
- let random_state = RandomState :: with_seeds ( 0 , 0 , 0 , 0 ) ;
502
- let hashes_buff = & mut vec ! [ 0 ; byte_array. len( ) ] ;
503
- let hashes = create_hashes ( & [ byte_array] , & random_state, hashes_buff) ?;
504
- assert_eq ! ( hashes. len( ) , 3 , ) ;
528
+ // same logical values should hash to the same hash value
529
+ assert_eq!( binary_hashes, ref_hashes) ;
505
530
506
- Ok ( ( ) )
531
+ // Same values should map to same hash values
532
+ assert_eq!( binary[ 0 ] , binary[ 5 ] ) ;
533
+ assert_eq!( binary[ 4 ] , binary[ 6 ] ) ;
534
+
535
+ // different binary should map to different hash values
536
+ assert_ne!( binary[ 0 ] , binary[ 2 ] ) ;
537
+ }
538
+ } ;
507
539
}
508
540
541
+ create_hash_binary ! ( binary_array, BinaryArray ) ;
542
+ create_hash_binary ! ( binary_view_array, BinaryViewArray ) ;
543
+
509
544
#[ test]
510
545
fn create_hashes_fixed_size_binary ( ) -> Result < ( ) > {
511
546
let input_arg = vec ! [ vec![ 1 , 2 ] , vec![ 5 , 6 ] , vec![ 5 , 6 ] ] ;
@@ -521,6 +556,64 @@ mod tests {
521
556
Ok ( ( ) )
522
557
}
523
558
559
+ macro_rules! create_hash_string {
560
+ ( $NAME: ident, $ARRAY: ty) => {
561
+ #[ cfg( not( feature = "force_hash_collisions" ) ) ]
562
+ #[ test]
563
+ fn $NAME( ) {
564
+ let strings = [
565
+ Some ( "short" ) ,
566
+ None ,
567
+ Some ( "long but different 12 bytes string" ) ,
568
+ Some ( "short2" ) ,
569
+ Some ( "Longer than 12 bytes string" ) ,
570
+ Some ( "short" ) ,
571
+ Some ( "Longer than 12 bytes string" ) ,
572
+ ] ;
573
+
574
+ let string_array = Arc :: new( strings. iter( ) . cloned( ) . collect:: <$ARRAY>( ) ) ;
575
+ let dict_array = Arc :: new(
576
+ strings
577
+ . iter( )
578
+ . cloned( )
579
+ . collect:: <DictionaryArray <Int8Type >>( ) ,
580
+ ) ;
581
+
582
+ let random_state = RandomState :: with_seeds( 0 , 0 , 0 , 0 ) ;
583
+
584
+ let mut string_hashes = vec![ 0 ; strings. len( ) ] ;
585
+ create_hashes( & [ string_array] , & random_state, & mut string_hashes)
586
+ . unwrap( ) ;
587
+
588
+ let mut dict_hashes = vec![ 0 ; strings. len( ) ] ;
589
+ create_hashes( & [ dict_array] , & random_state, & mut dict_hashes) . unwrap( ) ;
590
+
591
+ // Null values result in a zero hash,
592
+ for ( val, hash) in strings. iter( ) . zip( string_hashes. iter( ) ) {
593
+ match val {
594
+ Some ( _) => assert_ne!( * hash, 0 ) ,
595
+ None => assert_eq!( * hash, 0 ) ,
596
+ }
597
+ }
598
+
599
+ // same logical values should hash to the same hash value
600
+ assert_eq!( string_hashes, dict_hashes) ;
601
+
602
+ // Same values should map to same hash values
603
+ assert_eq!( strings[ 0 ] , strings[ 5 ] ) ;
604
+ assert_eq!( strings[ 4 ] , strings[ 6 ] ) ;
605
+
606
+ // different strings should map to different hash values
607
+ assert_ne!( strings[ 0 ] , strings[ 2 ] ) ;
608
+ }
609
+ } ;
610
+ }
611
+
612
+ create_hash_string ! ( string_array, StringArray ) ;
613
+ create_hash_string ! ( large_string_array, LargeStringArray ) ;
614
+ create_hash_string ! ( string_view_array, StringArray ) ;
615
+ create_hash_string ! ( dict_string_array, DictionaryArray <Int8Type >) ;
616
+
524
617
#[ test]
525
618
// Tests actual values of hashes, which are different if forcing collisions
526
619
#[ cfg( not( feature = "force_hash_collisions" ) ) ]
0 commit comments