@@ -7,16 +7,14 @@ use wide::*;
7
7
// use std::simd::Simd;
8
8
// use std::simd::cmp::SimdPartialEq;
9
9
10
- use numpy:: ndarray:: { Array2 , ArrayView2 } ;
11
- use numpy:: IntoPyArray ;
10
+ use numpy:: ndarray:: Array2 ;
12
11
use numpy:: PyArray1 ;
13
12
use numpy:: PyArrayMethods ;
13
+ use numpy:: PyReadonlyArray2 ;
14
14
use numpy:: PyUntypedArrayMethods ;
15
- use numpy:: ToPyArray ;
16
- use numpy:: { PyArray2 , PyReadonlyArray2 } ;
17
15
18
- use rayon:: prelude:: * ;
19
- use rayon:: ThreadPoolBuilder ;
16
+ // use rayon::prelude::*;
17
+ // use rayon::ThreadPoolBuilder;
20
18
use std:: sync:: Arc ;
21
19
22
20
#[ pyfunction]
@@ -425,7 +423,7 @@ pub fn prepare_array_for_axis<'py>(
425
423
if is_c && axis == 1 {
426
424
if let Ok ( slice) = array. as_slice ( ) {
427
425
return Ok ( PreparedBool2D {
428
- data : unsafe { std:: mem:: transmute ( slice) } , // &[bool] → &[u8]
426
+ data : unsafe { std:: mem:: transmute :: < & [ bool ] , & [ u8 ] > ( slice) } , // &[bool] → &[u8]
429
427
nrows,
430
428
ncols,
431
429
_keepalive : None ,
@@ -438,7 +436,7 @@ pub fn prepare_array_for_axis<'py>(
438
436
let transposed = array_view. reversed_axes ( ) ;
439
437
if let Some ( slice) = transposed. as_standard_layout ( ) . as_slice_memory_order ( ) {
440
438
return Ok ( PreparedBool2D {
441
- data : unsafe { std:: mem:: transmute ( slice) } ,
439
+ data : unsafe { std:: mem:: transmute :: < & [ bool ] , & [ u8 ] > ( slice) } ,
442
440
nrows,
443
441
ncols,
444
442
_keepalive : None ,
@@ -458,7 +456,7 @@ pub fn prepare_array_for_axis<'py>(
458
456
. expect ( "newly allocated Array2 must be contiguous" ) ;
459
457
460
458
Ok ( PreparedBool2D {
461
- data : unsafe { std:: mem:: transmute ( slice) } ,
459
+ data : unsafe { std:: mem:: transmute :: < & [ bool ] , & [ u8 ] > ( slice) } ,
462
460
nrows,
463
461
ncols,
464
462
_keepalive : Some ( Arc :: new ( array_owned) ) ,
@@ -491,6 +489,7 @@ pub fn first_true_2d<'py>(
491
489
let mut i;
492
490
493
491
if forward {
492
+ #[ allow( clippy:: needless_range_loop) ]
494
493
for row in 0 ..rows {
495
494
let ptr = unsafe { base_ptr. add ( row * row_len) } ;
496
495
i = 0 ;
@@ -514,6 +513,7 @@ pub fn first_true_2d<'py>(
514
513
}
515
514
} else {
516
515
// Backward search
516
+ #[ allow( clippy:: needless_range_loop) ]
517
517
for row in 0 ..rows {
518
518
let ptr = unsafe { base_ptr. add ( row * row_len) } ;
519
519
@@ -553,111 +553,111 @@ pub fn first_true_2d<'py>(
553
553
Ok ( pyarray)
554
554
}
555
555
556
- #[ pyfunction]
557
- #[ pyo3( signature = ( array, * , forward=true , axis) ) ]
558
- pub fn first_true_2d_b < ' py > (
559
- py : Python < ' py > ,
560
- array : PyReadonlyArray2 < ' py , bool > ,
561
- forward : bool ,
562
- axis : isize ,
563
- ) -> PyResult < Bound < ' py , PyArray1 < isize > > > {
564
- let prepared = prepare_array_for_axis ( array, axis) ?;
565
- let data = prepared. data ;
566
- let rows = prepared. nrows ;
567
- let row_len = prepared. ncols ;
568
-
569
- let mut result = vec ! [ -1isize ; rows] ;
570
-
571
- // Dynamically select thread count
572
- let max_threads = if rows < 100 {
573
- 1
574
- } else if rows < 1000 {
575
- 1
576
- } else if rows < 10000 {
577
- 1
578
- } else {
579
- 16
580
- } ;
556
+ // #[pyfunction]
557
+ // #[pyo3(signature = (array, *, forward=true, axis))]
558
+ // pub fn first_true_2d_b<'py>(
559
+ // py: Python<'py>,
560
+ // array: PyReadonlyArray2<'py, bool>,
561
+ // forward: bool,
562
+ // axis: isize,
563
+ // ) -> PyResult<Bound<'py, PyArray1<isize>>> {
564
+ // let prepared = prepare_array_for_axis(array, axis)?;
565
+ // let data = prepared.data;
566
+ // let rows = prepared.nrows;
567
+ // let row_len = prepared.ncols;
568
+
569
+ // let mut result = vec![-1isize; rows];
570
+
571
+ // // Dynamically select thread count
572
+ // let max_threads = if rows < 100 {
573
+ // 1
574
+ // } else if rows < 1000 {
575
+ // 1
576
+ // } else if rows < 10000 {
577
+ // 1
578
+ // } else {
579
+ // 16
580
+ // };
581
581
582
- py. allow_threads ( || {
583
- let base_ptr = data. as_ptr ( ) as usize ;
584
- const LANES : usize = 32 ;
585
- let ones = u8x32:: splat ( 1 ) ;
582
+ // py.allow_threads(|| {
583
+ // let base_ptr = data.as_ptr() as usize;
584
+ // const LANES: usize = 32;
585
+ // let ones = u8x32::splat(1);
586
586
587
- let process_row = |row : usize | -> isize {
588
- let ptr = ( base_ptr + row * row_len) as * const u8 ;
589
- let mut found = -1isize ;
587
+ // let process_row = |row: usize| -> isize {
588
+ // let ptr = (base_ptr + row * row_len) as *const u8;
589
+ // let mut found = -1isize;
590
590
591
- unsafe {
592
- if forward {
593
- let mut i = 0 ;
594
- while i + LANES <= row_len {
595
- let chunk = & * ( ptr. add ( i) as * const [ u8 ; LANES ] ) ;
596
- let vec = u8x32:: from ( * chunk) ;
597
- if vec. cmp_eq ( ones) . any ( ) {
598
- break ;
599
- }
600
- i += LANES ;
601
- }
602
- while i < row_len {
603
- if * ptr. add ( i) != 0 {
604
- found = i as isize ;
605
- break ;
606
- }
607
- i += 1 ;
608
- }
609
- } else {
610
- let mut i = row_len;
611
- while i >= LANES {
612
- i -= LANES ;
613
- let chunk = & * ( ptr. add ( i) as * const [ u8 ; LANES ] ) ;
614
- let vec = u8x32:: from ( * chunk) ;
615
- if vec. cmp_eq ( ones) . any ( ) {
616
- for j in ( i..i + LANES ) . rev ( ) {
617
- if * ptr. add ( j) != 0 {
618
- found = j as isize ;
619
- break ;
620
- }
621
- }
622
- break ;
623
- }
624
- }
625
- if i > 0 && i < LANES {
626
- for j in ( 0 ..i) . rev ( ) {
627
- if * ptr. add ( j) != 0 {
628
- found = j as isize ;
629
- break ;
630
- }
631
- }
632
- }
633
- }
634
- }
591
+ // unsafe {
592
+ // if forward {
593
+ // let mut i = 0;
594
+ // while i + LANES <= row_len {
595
+ // let chunk = &*(ptr.add(i) as *const [u8; LANES]);
596
+ // let vec = u8x32::from(*chunk);
597
+ // if vec.cmp_eq(ones).any() {
598
+ // break;
599
+ // }
600
+ // i += LANES;
601
+ // }
602
+ // while i < row_len {
603
+ // if *ptr.add(i) != 0 {
604
+ // found = i as isize;
605
+ // break;
606
+ // }
607
+ // i += 1;
608
+ // }
609
+ // } else {
610
+ // let mut i = row_len;
611
+ // while i >= LANES {
612
+ // i -= LANES;
613
+ // let chunk = &*(ptr.add(i) as *const [u8; LANES]);
614
+ // let vec = u8x32::from(*chunk);
615
+ // if vec.cmp_eq(ones).any() {
616
+ // for j in (i..i + LANES).rev() {
617
+ // if *ptr.add(j) != 0 {
618
+ // found = j as isize;
619
+ // break;
620
+ // }
621
+ // }
622
+ // break;
623
+ // }
624
+ // }
625
+ // if i > 0 && i < LANES {
626
+ // for j in (0..i).rev() {
627
+ // if *ptr.add(j) != 0 {
628
+ // found = j as isize;
629
+ // break;
630
+ // }
631
+ // }
632
+ // }
633
+ // }
634
+ // }
635
635
636
- found
637
- } ;
636
+ // found
637
+ // };
638
638
639
- if max_threads == 1 {
640
- // Single-threaded path
641
- for row in 0 ..rows {
642
- result[ row] = process_row ( row) ;
643
- }
644
- } else {
645
- // Multi-threaded path with Rayon
646
- let pool = rayon:: ThreadPoolBuilder :: new ( )
647
- . num_threads ( max_threads)
648
- . build ( )
649
- . unwrap ( ) ;
650
-
651
- pool. install ( || {
652
- result. par_iter_mut ( ) . enumerate ( ) . for_each ( |( row, out) | {
653
- * out = process_row ( row) ;
654
- } ) ;
655
- } ) ;
656
- }
657
- } ) ;
639
+ // if max_threads == 1 {
640
+ // // Single-threaded path
641
+ // for row in 0..rows {
642
+ // result[row] = process_row(row);
643
+ // }
644
+ // } else {
645
+ // // Multi-threaded path with Rayon
646
+ // let pool = rayon::ThreadPoolBuilder::new()
647
+ // .num_threads(max_threads)
648
+ // .build()
649
+ // .unwrap();
650
+
651
+ // pool.install(|| {
652
+ // result.par_iter_mut().enumerate().for_each(|(row, out)| {
653
+ // *out = process_row(row);
654
+ // });
655
+ // });
656
+ // }
657
+ // });
658
658
659
- Ok ( PyArray1 :: from_vec ( py, result) )
660
- }
659
+ // Ok(PyArray1::from_vec(py, result))
660
+ // }
661
661
662
662
//------------------------------------------------------------------------------
663
663
0 commit comments