19
19
* See the License for the specific language governing permissions and
20
20
* limitations under the License.
21
21
*/
22
+ use std:: {
23
+ any:: Any ,
24
+ fmt:: Debug ,
25
+ iter:: zip,
26
+ sync:: { Mutex , MutexGuard } ,
27
+ } ;
28
+
22
29
use arrow:: {
23
30
array:: AsArray ,
24
31
datatypes:: {
@@ -31,16 +38,6 @@ use datafusion::{
31
38
prelude:: Expr ,
32
39
scalar:: ScalarValue ,
33
40
} ;
34
- use log:: info;
35
- use num_format:: { Locale , ToFormattedString } ;
36
-
37
- use std:: {
38
- any:: Any ,
39
- fmt:: Debug ,
40
- iter:: zip,
41
- sync:: { Mutex , MutexGuard } ,
42
- time:: { Duration , Instant } ,
43
- } ;
44
41
45
42
use super :: sketch:: { update_sketch, DataSketchVariant , K } ;
46
43
@@ -50,12 +47,10 @@ use super::sketch::{update_sketch, DataSketchVariant, K};
50
47
/// so we can inject them into a sketch for later retrieval. The query should look something like:
51
48
/// `SELECT sketch(row_key_col1, row_key_col2, ...), row_key_col2, value_col1, value_col2, ... FROM blah...`
52
49
/// so the sketch function can see each row key column, but only returns the first.
53
-
54
50
pub ( crate ) struct SketchUDF {
55
51
signature : Signature ,
56
52
invoke_count : Mutex < usize > ,
57
53
sketch : Mutex < Vec < DataSketchVariant > > ,
58
- bench_stats : Mutex < BenchStats > ,
59
54
}
60
55
61
56
impl Debug for SketchUDF {
@@ -69,21 +64,13 @@ impl Debug for SketchUDF {
69
64
}
70
65
71
66
impl SketchUDF {
72
- const BENCH_DURATION : Duration = Duration :: from_secs ( 10 ) ;
73
67
/// Create a new sketch function based on the schema of the row key fields.
74
68
///
75
69
pub fn new ( schema : & DFSchema , row_keys : & [ String ] ) -> Self {
76
70
Self {
77
71
signature : Signature :: exact ( get_row_key_types ( schema, row_keys) , Volatility :: Immutable ) ,
78
72
invoke_count : Mutex :: default ( ) ,
79
73
sketch : Mutex :: new ( make_sketches_for_schema ( schema, row_keys) ) ,
80
- bench_stats : Mutex :: new ( BenchStats {
81
- start_time : Instant :: now ( ) ,
82
- last_instant_measure : Instant :: now ( ) ,
83
- last_instant_row_speed : 0 ,
84
- rows_since_instant : 0 ,
85
- rows_since_start : 0 ,
86
- } ) ,
87
74
}
88
75
}
89
76
@@ -94,35 +81,6 @@ impl SketchUDF {
94
81
pub fn get_invoke_count ( & self ) -> usize {
95
82
* self . invoke_count . lock ( ) . unwrap ( )
96
83
}
97
-
98
- fn bench_report ( & self , rows : usize ) {
99
- let mut stats_lock = self . bench_stats . lock ( ) . unwrap ( ) ;
100
- stats_lock. rows_since_start += rows;
101
- stats_lock. rows_since_instant += rows;
102
- let now = Instant :: now ( ) ;
103
-
104
- if now. duration_since ( stats_lock. last_instant_measure ) > Self :: BENCH_DURATION {
105
- stats_lock. last_instant_row_speed = stats_lock. rows_since_instant as u64
106
- / std:: cmp:: max (
107
- now. duration_since ( stats_lock. last_instant_measure )
108
- . as_secs ( ) ,
109
- 1 ,
110
- ) ;
111
- stats_lock. last_instant_measure = now;
112
- stats_lock. rows_since_instant = 0 ;
113
- let rows_speed_from_start = stats_lock. rows_since_start as u64
114
- / std:: cmp:: max ( now. duration_since ( stats_lock. start_time ) . as_secs ( ) , 1 ) ;
115
-
116
- info ! (
117
- "Bench speeds: {} rows/sec. ({} second rolling avg.) {} rows/sec. lifetime avg." ,
118
- stats_lock
119
- . last_instant_row_speed
120
- . to_formatted_string( & Locale :: en) ,
121
- Self :: BENCH_DURATION . as_secs( ) ,
122
- rows_speed_from_start. to_formatted_string( & Locale :: en)
123
- ) ;
124
- }
125
- }
126
84
}
127
85
128
86
/// Create a [`Vec`] of data types for this schema from the row keys.
@@ -191,7 +149,6 @@ impl ScalarUDFImpl for SketchUDF {
191
149
for ( sketch, col) in zip ( sk_lock. iter_mut ( ) , & args. args ) {
192
150
match col {
193
151
ColumnarValue :: Array ( array) => {
194
- self . bench_report ( array. len ( ) ) ;
195
152
// dynamic dispatch. Match the datatype to the type of sketch to update.
196
153
match array. data_type ( ) {
197
154
DataType :: Int32 => update_sketch ( sketch, & array. as_primitive :: < Int32Type > ( ) ) ,
@@ -229,23 +186,19 @@ impl ScalarUDFImpl for SketchUDF {
229
186
| ScalarValue :: LargeUtf8 ( Some ( value) )
230
187
| ScalarValue :: Utf8View ( Some ( value) ) ,
231
188
) => {
232
- self . bench_report ( 1 ) ;
233
189
sketch. update ( value) ;
234
190
}
235
191
ColumnarValue :: Scalar (
236
192
ScalarValue :: Binary ( Some ( value) )
237
193
| ScalarValue :: LargeBinary ( Some ( value) )
238
194
| ScalarValue :: BinaryView ( Some ( value) ) ,
239
195
) => {
240
- self . bench_report ( 1 ) ;
241
196
sketch. update ( value) ;
242
197
}
243
198
ColumnarValue :: Scalar ( ScalarValue :: Int32 ( Some ( value) ) ) => {
244
- self . bench_report ( 1 ) ;
245
199
sketch. update ( value) ;
246
200
}
247
201
ColumnarValue :: Scalar ( ScalarValue :: Int64 ( Some ( value) ) ) => {
248
- self . bench_report ( 1 ) ;
249
202
sketch. update ( value) ;
250
203
}
251
204
x @ ColumnarValue :: Scalar ( _) => {
0 commit comments