Skip to content

Commit e6df613

Browse files
committed
prgress
1 parent d221721 commit e6df613

File tree

4 files changed

+279
-11
lines changed

4 files changed

+279
-11
lines changed

Cargo.lock

Lines changed: 26 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,19 @@ crate-type = ["cdylib"]
1010

1111
[dependencies]
1212
pyo3 = "0.25.0"
13-
numpy = "0.25.0"
13+
numpy = "0.25.0"
14+
wide = "0.7.33"
15+
16+
17+
[profile.release]
18+
opt-level = 3
19+
lto = "fat"
20+
codegen-units = 1
21+
22+
[profile.dev]
23+
opt-level = 3
24+
lto = "fat"
25+
codegen-units = 1
26+
27+
[build]
28+
rustflags = ["-C", "target-cpu=native"]

doc/articles/first_true_1d.py

Lines changed: 59 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,12 @@
1010
from arraykit import first_true_1d as ak_first_true_1d
1111
import arraykit as ak
1212

13-
from arrayredox import first_true_1d as ar_first_true_1d
13+
from arrayredox import first_true_1d_a as ar_first_true_1d_a
14+
from arrayredox import first_true_1d_b as ar_first_true_1d_b
15+
from arrayredox import first_true_1d_c as ar_first_true_1d_c
16+
from arrayredox import first_true_1d_d as ar_first_true_1d_d
17+
from arrayredox import first_true_1d_e as ar_first_true_1d_e
18+
from arrayredox import first_true_1d_f as ar_first_true_1d_f
1419

1520
import matplotlib.pyplot as plt
1621
import numpy as np
@@ -35,13 +40,55 @@ class AKFirstTrue(ArrayProcessor):
3540
def __call__(self):
3641
_ = ak_first_true_1d(self.array, forward=True)
3742

38-
class ARFirstTrue(ArrayProcessor):
39-
NAME = 'ar.first_true_1d()'
43+
class ARFirstTrueA(ArrayProcessor):
44+
NAME = 'ar.first_true_1d_a()'
4045
SORT = 0
4146

4247
def __call__(self):
4348
# _ = ar_first_true_1d(self.array, forward=True)
44-
_ = ar_first_true_1d(self.array)
49+
_ = ar_first_true_1d_a(self.array)
50+
51+
class ARFirstTrueB(ArrayProcessor):
52+
NAME = 'ar.first_true_1d_b()'
53+
SORT = 0
54+
55+
def __call__(self):
56+
# _ = ar_first_true_1d(self.array, forward=True)
57+
_ = ar_first_true_1d_b(self.array)
58+
59+
class ARFirstTrueC(ArrayProcessor):
60+
NAME = 'ar.first_true_1d_c()'
61+
SORT = 0
62+
63+
def __call__(self):
64+
# _ = ar_first_true_1d(self.array, forward=True)
65+
_ = ar_first_true_1d_c(self.array)
66+
67+
class ARFirstTrueD(ArrayProcessor):
68+
NAME = 'ar.first_true_1d_d()'
69+
SORT = 0
70+
71+
def __call__(self):
72+
# _ = ar_first_true_1d(self.array, forward=True)
73+
_ = ar_first_true_1d_d(self.array)
74+
75+
class ARFirstTrueE(ArrayProcessor):
76+
NAME = 'ar.first_true_1d_e()'
77+
SORT = 0
78+
79+
def __call__(self):
80+
# _ = ar_first_true_1d(self.array, forward=True)
81+
_ = ar_first_true_1d_e(self.array)
82+
83+
class ARFirstTrueF(ArrayProcessor):
84+
NAME = 'ar.first_true_1d_f()'
85+
SORT = 0
86+
87+
def __call__(self):
88+
# _ = ar_first_true_1d(self.array, forward=True)
89+
_ = ar_first_true_1d_f(self.array)
90+
91+
4592

4693

4794
class PYLoop(ArrayProcessor):
@@ -77,7 +124,7 @@ def __call__(self):
77124
_ = np.argmax(self.array)
78125

79126
#-------------------------------------------------------------------------------
80-
NUMBER = 100
127+
NUMBER = 10000
81128

82129
def seconds_to_display(seconds: float) -> str:
83130
seconds /= NUMBER
@@ -258,9 +305,14 @@ def get_versions() -> str:
258305

259306
CLS_PROCESSOR = (
260307
AKFirstTrue,
261-
ARFirstTrue,
262-
NPNonZero,
308+
# ARFirstTrueA,
309+
# ARFirstTrueB,
310+
# ARFirstTrueC,
311+
# ARFirstTrueD,
312+
ARFirstTrueE,
313+
ARFirstTrueF,
263314
NPArgMax,
315+
# NPNonZero,
264316
# NPNotAnyArgMax,
265317
# PYLoop,
266318
)

src/lib.rs

Lines changed: 178 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,192 @@
11
use pyo3::prelude::*;
2-
use numpy::PyReadonlyArray1;
2+
use wide::*;
3+
use numpy::{PyReadonlyArray1};
34

45
#[pyfunction]
5-
fn first_true_1d(array: PyReadonlyArray1<bool>) -> isize {
6+
fn first_true_1d_a(array: PyReadonlyArray1<bool>) -> isize {
67
match array.as_slice() {
78
Ok(slice) => slice.iter().position(|&v| v).map(|i| i as isize).unwrap_or(-1),
89
Err(_) => -1, // Should not happen for 1D arrays, but fallback to -1
910
}
1011
}
1112

13+
// Release the GIL, still doing slice iteration
14+
#[pyfunction]
15+
fn first_true_1d_b(py: Python, array: PyReadonlyArray1<bool>) -> isize {
16+
if let Ok(slice) = array.as_slice() {
17+
py.allow_threads(|| {
18+
for (i, &value) in slice.iter().enumerate() {
19+
if value {
20+
return i as isize;
21+
}
22+
}
23+
-1
24+
})
25+
} else {
26+
let array_view = array.as_array();
27+
py.allow_threads(|| {
28+
for (idx, &val) in array_view.iter().enumerate() {
29+
if val {
30+
return idx as isize;
31+
}
32+
}
33+
-1
34+
})
35+
}
36+
}
37+
38+
#[pyfunction]
39+
fn first_true_1d_c(array: PyReadonlyArray1<bool>) -> isize {
40+
if let Ok(slice) = array.as_slice() {
41+
let len = slice.len();
42+
let mut i = 0;
43+
44+
// Process 8 elements at a time
45+
while i + 8 <= len {
46+
if slice[i] { return i as isize; }
47+
if slice[i+1] { return (i+1) as isize; }
48+
if slice[i+2] { return (i+2) as isize; }
49+
if slice[i+3] { return (i+3) as isize; }
50+
if slice[i+4] { return (i+4) as isize; }
51+
if slice[i+5] { return (i+5) as isize; }
52+
if slice[i+6] { return (i+6) as isize; }
53+
if slice[i+7] { return (i+7) as isize; }
54+
i += 8;
55+
}
56+
57+
// Handle remainder
58+
while i < len {
59+
if slice[i] { return i as isize; }
60+
i += 1;
61+
}
62+
-1
63+
} else {
64+
array.as_array().iter().position(|&v| v).map(|i| i as isize).unwrap_or(-1)
65+
}
66+
}
67+
68+
69+
#[pyfunction]
70+
fn first_true_1d_d(array: PyReadonlyArray1<bool>) -> isize {
71+
if let Ok(slice) = array.as_slice() {
72+
let len = slice.len();
73+
let mut i = 0;
74+
75+
unsafe {
76+
// Process 8 elements at a time
77+
while i + 8 <= len {
78+
if *slice.get_unchecked(i) { return i as isize; }
79+
if *slice.get_unchecked(i+1) { return (i+1) as isize; }
80+
if *slice.get_unchecked(i+2) { return (i+2) as isize; }
81+
if *slice.get_unchecked(i+3) { return (i+3) as isize; }
82+
if *slice.get_unchecked(i+4) { return (i+4) as isize; }
83+
if *slice.get_unchecked(i+5) { return (i+5) as isize; }
84+
if *slice.get_unchecked(i+6) { return (i+6) as isize; }
85+
if *slice.get_unchecked(i+7) { return (i+7) as isize; }
86+
i += 8;
87+
}
88+
89+
// Handle remainder
90+
while i < len {
91+
if *slice.get_unchecked(i) { return i as isize; }
92+
i += 1;
93+
}
94+
}
95+
-1
96+
} else {
97+
array.as_array().iter().position(|&v| v).map(|i| i as isize).unwrap_or(-1)
98+
}
99+
}
100+
101+
102+
#[pyfunction]
103+
fn first_true_1d_e(array: PyReadonlyArray1<bool>) -> isize {
104+
if let Ok(slice) = array.as_slice() {
105+
let len = slice.len();
106+
let ptr = slice.as_ptr() as *const u8;
107+
108+
unsafe {
109+
// Process 8 bytes at a time as u64
110+
let mut i = 0;
111+
while i + 8 <= len {
112+
// Check 8 bytes at once
113+
let chunk = *(ptr.add(i) as *const u64);
114+
if chunk != 0 {
115+
// Found a true value in this chunk, check each byte
116+
for j in 0..8 {
117+
if i + j < len && *ptr.add(i + j) != 0 {
118+
return (i + j) as isize;
119+
}
120+
}
121+
}
122+
i += 8;
123+
}
124+
125+
// Handle remainder
126+
while i < len {
127+
if *ptr.add(i) != 0 {
128+
return i as isize;
129+
}
130+
i += 1;
131+
}
132+
}
133+
-1
134+
} else {
135+
array.as_array().iter().position(|&v| v).map(|i| i as isize).unwrap_or(-1)
136+
}
137+
}
138+
139+
140+
#[pyfunction]
141+
fn first_true_1d_f(py: Python, array: PyReadonlyArray1<bool>) -> isize {
142+
if let Ok(slice) = array.as_slice() {
143+
py.allow_threads(|| {
144+
let len = slice.len();
145+
let ptr = slice.as_ptr() as *const u8;
146+
let mut i = 0;
147+
148+
let ones = u8x32::splat(1);
149+
unsafe {
150+
// Process 32 bytes at a time with SIMD
151+
while i + 32 <= len {
152+
// Cast pointer to array reference
153+
let bytes = &*(ptr.add(i) as *const [u8; 32]);
154+
155+
// Convert to SIMD vector
156+
let chunk = u8x32::from(*bytes);
157+
let equal_one = chunk.cmp_eq(ones);
158+
if equal_one.any() {
159+
break;
160+
}
161+
162+
i += 32;
163+
}
164+
// // Handle final remainder
165+
while i < len.min(i + 32) {
166+
if *ptr.add(i) != 0 {
167+
return i as isize;
168+
}
169+
i += 1;
170+
}
171+
-1
172+
}
173+
})
174+
} else {
175+
let array_view = array.as_array();
176+
py.allow_threads(|| {
177+
array_view.iter().position(|&v| v).map(|i| i as isize).unwrap_or(-1)
178+
})
179+
}
180+
}
181+
12182

13183
#[pymodule]
14184
fn arrayredox(m: &Bound<'_, PyModule>) -> PyResult<()> {
15-
m.add_function(wrap_pyfunction!(first_true_1d, m)?)?;
185+
m.add_function(wrap_pyfunction!(first_true_1d_a, m)?)?;
186+
m.add_function(wrap_pyfunction!(first_true_1d_b, m)?)?;
187+
m.add_function(wrap_pyfunction!(first_true_1d_c, m)?)?;
188+
m.add_function(wrap_pyfunction!(first_true_1d_d, m)?)?;
189+
m.add_function(wrap_pyfunction!(first_true_1d_e, m)?)?;
190+
m.add_function(wrap_pyfunction!(first_true_1d_f, m)?)?;
16191
Ok(())
17192
}

0 commit comments

Comments
 (0)