Skip to content

Add compilation stats #1339

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 44 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
8d91878
Get the max size for each type
DiegoCivi Jul 30, 2025
47026ac
Add new stats to Statistics
DiegoCivi Jul 31, 2025
b546c91
Add temp func to get types names
DiegoCivi Jul 31, 2025
314a4fa
Fix how to get the max size
DiegoCivi Jul 31, 2025
f9e223f
Refactor of map of sizes
DiegoCivi Jul 31, 2025
9f4ceaa
Add stats for returns and params
DiegoCivi Jul 31, 2025
89f60b7
Add stats about sizes in sierra
DiegoCivi Jul 31, 2025
319e533
Add stat about llvmir params
DiegoCivi Jul 31, 2025
a2859e1
Add circuit gates count stats
DiegoCivi Aug 1, 2025
f6402c7
Add circuits count stats
DiegoCivi Aug 1, 2025
f8c4b7e
Fix params stats
DiegoCivi Aug 4, 2025
2e816c1
Add gates count per circuit
DiegoCivi Aug 4, 2025
e07e01e
Refactor
DiegoCivi Aug 4, 2025
7d20787
Minor changes
DiegoCivi Aug 4, 2025
6ccb11a
Create func to reduce code in new_into()
DiegoCivi Aug 4, 2025
32a047d
Refactor of gates count stat
DiegoCivi Aug 5, 2025
dd29aa6
Minor changes
DiegoCivi Aug 5, 2025
0c8e562
Remove avg params stat
DiegoCivi Aug 5, 2025
9381d9f
Refactor of params size stat
DiegoCivi Aug 5, 2025
cd9c418
Fix error on return types size stat
DiegoCivi Aug 5, 2025
be7f0ac
Clippy
DiegoCivi Aug 5, 2025
0d14a00
Merge branch 'main' into add-stats
DiegoCivi Aug 5, 2025
83afe53
Refactor of sierra funcs stats
DiegoCivi Aug 6, 2025
aa1c762
Refactor of sierra declared types stats
DiegoCivi Aug 6, 2025
d298e8c
Skip builtins type
DiegoCivi Aug 6, 2025
1cb390a
Merge branch 'add-stats' of github.com:lambdaclass/cairo_native into …
DiegoCivi Aug 6, 2025
647e2d9
Refactor of gates stats and remove circuit count
DiegoCivi Aug 7, 2025
25fea5a
Remove llvm ir params stat
DiegoCivi Aug 7, 2025
65eec68
Change stats method to be a utility function
DiegoCivi Aug 7, 2025
ddb303b
Use gate evaluation algorithm
DiegoCivi Aug 7, 2025
7e959ce
Remove concrete_type field from SierraDeclaredTypeStats
DiegoCivi Aug 8, 2025
6c7a10b
Handle name of recursive types
DiegoCivi Aug 8, 2025
4cc0901
Revert "Remove concrete_type field from SierraDeclaredTypeStats"
DiegoCivi Aug 8, 2025
a7710d3
Revert "Handle name of recursive types"
DiegoCivi Aug 8, 2025
2cc0d61
Handle recursion on correct function
DiegoCivi Aug 10, 2025
24d8fe4
Change fold to map and sum
DiegoCivi Aug 18, 2025
78fe43f
Use native_panic! instead of panic
DiegoCivi Aug 18, 2025
5642941
Remove the stats about quantity in functions
DiegoCivi Aug 18, 2025
c404dd4
Docu for type_to_name
DiegoCivi Aug 18, 2025
5580b49
Refactor of CircuitGatesStats fields names
DiegoCivi Aug 18, 2025
bd5e08f
contract -> program
DiegoCivi Aug 18, 2025
4979bce
Merge branch 'main' into add-stats
DiegoCivi Aug 18, 2025
642552b
Fix errors
DiegoCivi Aug 18, 2025
ceeb6a2
Add times a func is called stat
DiegoCivi Aug 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 54 additions & 21 deletions src/debug.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
use std::{
any::{Any, TypeId},
collections::HashSet,
};

use cairo_lang_sierra::{
extensions::{
array::ArrayConcreteLibfunc,
Expand Down Expand Up @@ -429,62 +434,90 @@ pub fn generic_type_to_name(
registry: &ProgramRegistry<CoreType, CoreLibfunc>,
name: &str,
args: &[ConcreteTypeId],
visited_types: HashSet<TypeId>,
) -> String {
format!(
"{}<{}>",
name,
args.iter()
.map(|field_type| {
registry
let concrete_type = registry
.get_type(field_type)
.expect("failed to find type in registry")
.expect("failed to find type in registry");
type_to_name(registry, concrete_type, visited_types.clone())
})
.map(|field_type| type_to_name(registry, field_type))
.filter(|type_name| !type_name.is_empty())
.join(",")
)
}

/// Builds a string representation of a `CoreTypeConcrete` name
/// by recursively iterating its structure.
///
/// Since this can lead to infinite recursion, a `HashSet` is used to
/// track visited types and stop the iteration if a type has already
/// been encountered.
pub fn type_to_name(
registry: &ProgramRegistry<CoreType, CoreLibfunc>,
ty: &CoreTypeConcrete,
mut visited_types: HashSet<TypeId>,
) -> String {
let type_id = ty.type_id();
if visited_types.contains(&type_id) {
return String::from("");
}
visited_types.insert(type_id);
match ty {
CoreTypeConcrete::Struct(info) => generic_type_to_name(registry, "struct", &info.members),
CoreTypeConcrete::Enum(info) => generic_type_to_name(registry, "enum", &info.variants),
CoreTypeConcrete::Struct(info) => {
generic_type_to_name(registry, "struct", &info.members, visited_types)
}
CoreTypeConcrete::Enum(info) => {
generic_type_to_name(registry, "enum", &info.variants, visited_types)
}
CoreTypeConcrete::BoundedInt(info) => {
format!("bounded_int<{},{}>", info.range.lower, info.range.upper)
}
CoreTypeConcrete::Array(info) => {
generic_type_to_name(registry, "array", &[info.ty.clone()])
generic_type_to_name(registry, "array", &[info.ty.clone()], visited_types)
}
CoreTypeConcrete::Snapshot(info) => {
generic_type_to_name(registry, "snapshot", &[info.ty.clone()])
generic_type_to_name(registry, "snapshot", &[info.ty.clone()], visited_types)
}
CoreTypeConcrete::Span(info) => generic_type_to_name(registry, "span", &[info.ty.clone()]),
CoreTypeConcrete::Felt252Dict(info) => {
generic_type_to_name(registry, "felt252_dict", &[info.ty.clone()])
}
CoreTypeConcrete::Felt252DictEntry(info) => {
generic_type_to_name(registry, "felt252_dict_entry", &[info.ty.clone()])
CoreTypeConcrete::Span(info) => {
generic_type_to_name(registry, "span", &[info.ty.clone()], visited_types)
}
CoreTypeConcrete::SquashedFelt252Dict(info) => {
generic_type_to_name(registry, "squashed_felt252_dict", &[info.ty.clone()])
CoreTypeConcrete::Felt252Dict(info) => {
generic_type_to_name(registry, "felt252_dict", &[info.ty.clone()], visited_types)
}
CoreTypeConcrete::Felt252DictEntry(info) => generic_type_to_name(
registry,
"felt252_dict_entry",
&[info.ty.clone()],
visited_types,
),
CoreTypeConcrete::SquashedFelt252Dict(info) => generic_type_to_name(
registry,
"squashed_felt252_dict",
&[info.ty.clone()],
visited_types,
),
CoreTypeConcrete::NonZero(info) => {
generic_type_to_name(registry, "non_zero", &[info.ty.clone()])
generic_type_to_name(registry, "non_zero", &[info.ty.clone()], visited_types)
}
CoreTypeConcrete::Box(info) => {
generic_type_to_name(registry, "box", &[info.ty.clone()], visited_types)
}
CoreTypeConcrete::Box(info) => generic_type_to_name(registry, "box", &[info.ty.clone()]),
CoreTypeConcrete::Uninitialized(info) => {
generic_type_to_name(registry, "uninitialized", &[info.ty.clone()])
generic_type_to_name(registry, "uninitialized", &[info.ty.clone()], visited_types)
}
CoreTypeConcrete::Nullable(info) => {
generic_type_to_name(registry, "nullable", &[info.ty.clone()])
generic_type_to_name(registry, "nullable", &[info.ty.clone()], visited_types)
}
CoreTypeConcrete::Const(info) => {
generic_type_to_name(registry, "const", &[info.inner_ty.clone()])
generic_type_to_name(registry, "const", &[info.inner_ty.clone()], visited_types)
}
CoreTypeConcrete::IntRange(info) => {
generic_type_to_name(registry, "int_range", &[info.ty.clone()])
generic_type_to_name(registry, "int_range", &[info.ty.clone()], visited_types)
}
CoreTypeConcrete::Starknet(selector) => match selector {
StarknetTypeConcrete::ClassHash(_) => String::from("class_hash"),
Expand Down
83 changes: 77 additions & 6 deletions src/executor/contract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ use crate::{
arch::AbiArgument,
clone_option_mut,
context::NativeContext,
debug::libfunc_to_name,
debug::{libfunc_to_name, type_to_name},
error::{panic::ToNativeAssertError, Error, Result},
execution_result::{
BuiltinStats, ContractExecutionResult, ADD_MOD_BUILTIN_SIZE, BITWISE_BUILTIN_SIZE,
Expand All @@ -47,21 +47,22 @@ use crate::{
module::NativeModule,
native_assert, native_panic,
starknet::{handler::StarknetSyscallHandlerCallbacks, StarknetSyscallHandler},
statistics::Statistics,
statistics::{SierraDeclaredTypeStats, SierraFuncStats, Statistics},
types::TypeBuilder,
utils::{
decode_error_message, generate_function_name, get_integer_layout, libc_free, libc_malloc,
BuiltinCosts,
decode_error_message, generate_function_name, get_integer_layout, get_types_total_size,
libc_free, libc_malloc, BuiltinCosts,
},
OptLevel,
};
use bumpalo::Bump;
use cairo_lang_sierra::{
extensions::{
circuit::CircuitTypeConcrete,
core::{CoreLibfunc, CoreType, CoreTypeConcrete},
core::{CoreConcreteLibfunc, CoreLibfunc, CoreType, CoreTypeConcrete},
gas::CostTokenType,
starknet::StarknetTypeConcrete,
ConcreteLibfunc,
},
ids::FunctionId,
program::{GenFunction, GenStatement, Program, StatementIdx},
Expand All @@ -80,7 +81,7 @@ use starknet_types_core::felt::Felt;
use std::{
alloc::Layout,
cmp::Ordering,
collections::BTreeMap,
collections::{BTreeMap, HashSet},
ffi::c_void,
fs::{self, File},
io,
Expand Down Expand Up @@ -240,11 +241,81 @@ impl AotContractExecutor {
)?;

if let Some(&mut ref mut stats) = stats {
for type_declaration in &program.type_declarations {
if let Ok(type_concrete) = registry.get_type(&type_declaration.id) {
let type_id = type_declaration.id.to_string();
let type_size = type_concrete.layout(&registry).unwrap().size();
if !type_concrete.is_builtin() {
// We dont want to add the builtins to the stats
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are builtins skipped?

Copy link
Contributor Author

@DiegoCivi DiegoCivi Aug 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I figured they are always the same size. So I though it didn't make much sense to have them.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its not worth treating them any differently. There are many types that are always the same size (i.e. u8). I would keep them, it would also make the code a bit simpler.

stats.sierra_declared_types_stats.insert(
type_id,
SierraDeclaredTypeStats {
concrete_type: type_to_name(
&registry,
type_concrete,
HashSet::new(),
),
size: type_size,
as_param_count: 0,
},
);
}

if let CoreTypeConcrete::Circuit(CircuitTypeConcrete::Circuit(info)) =
type_concrete
{
stats.add_circuit_gates(&info.circuit_info)?;
}
}
}

for statement in &program.statements {
if let GenStatement::Invocation(invocation) = statement {
let libfunc = registry.get_libfunc(&invocation.libfunc_id)?;
let name = libfunc_to_name(libfunc).to_string();
*stats.sierra_libfunc_frequency.entry(name).or_insert(0) += 1;

for param in libfunc.param_signatures() {
let param_ty = param.ty.to_string();
if let Some(type_stats) =
stats.sierra_declared_types_stats.get_mut(&param_ty)
{
type_stats.as_param_count += 1;
}
}
}
}

for func in &program.funcs {
let func_id = func.id.to_string();
// Params
let params_total_size =
get_types_total_size(&func.signature.param_types, &registry);
// Return types
let return_types_total_size =
get_types_total_size(&func.signature.ret_types, &registry);

stats.sierra_func_stats.insert(
func_id,
SierraFuncStats {
params_total_size,
return_types_total_size,
times_used: 0,
},
);
}

for statement in &program.statements {
match statement {
GenStatement::Invocation(gen_invocation) => {
let libfunc = registry.get_libfunc(&gen_invocation.libfunc_id).unwrap();
if let CoreConcreteLibfunc::FunctionCall(function_call_libfunc) = libfunc {
let func_id = function_call_libfunc.function.id.to_string();
let func_entry = stats.sierra_func_stats.get_mut(&func_id).unwrap();
func_entry.times_used += 1;
}
}
GenStatement::Return(_) => continue,
}
}
}
Expand Down
99 changes: 98 additions & 1 deletion src/statistics.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use cairo_lang_sierra::extensions::circuit::{CircuitInfo, GateOffsets};
use serde::Serialize;
use std::collections::BTreeMap;

use serde::Serialize;
use crate::{error::Result, native_panic};

/// A set of compilation statistics gathered during the compilation.
/// It should be completely filled at the end of the compilation.
Expand All @@ -14,8 +16,14 @@ pub struct Statistics {
pub sierra_statement_count: Option<usize>,
/// Number of user functions defined in the Sierra code.
pub sierra_func_count: Option<usize>,
/// Stats of the declared types in Sierra.
pub sierra_declared_types_stats: BTreeMap<String, SierraDeclaredTypeStats>,
/// Stats about params and return types of each Sierra function.
pub sierra_func_stats: BTreeMap<String, SierraFuncStats>,
/// Number of statements for each distinct libfunc.
pub sierra_libfunc_frequency: BTreeMap<String, u128>,
/// Number of times each circuit gate is used.
sierra_circuit_gates_count: CircuitGatesStats,
Comment on lines +25 to +26
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this field be public?

/// Number of MLIR operations generated.
pub mlir_operation_count: Option<u128>,
/// Number of MLIR operations generated for each distinct libfunc.
Expand Down Expand Up @@ -44,6 +52,37 @@ pub struct Statistics {
pub object_size_bytes: Option<usize>,
}

/// Contains the following stats about a Sierra function:
/// - params_total_size: Total size of all the params
/// - return_types_total_size: Total size of all the params
Comment on lines +56 to +57
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please move each bullet point to the appropiate field doc comment. Idem for the other struct.

Something like:

/// Contains the following stats about a Sierra function:
pub struct SierraFuncStats {
    /// Total size of all the params
    pub params_total_size: usize,
    /// Total size of all the params
    pub return_types_total_size: usize,
    pub times_used: usize,
}

#[derive(Debug, Default, Serialize)]
pub struct SierraFuncStats {
pub params_total_size: usize,
pub return_types_total_size: usize,
pub times_used: usize,
}

/// Contains the stats for each Sierra declared type:
/// - concrete_type: The concrete type (e.g Struct)
/// - size: Layout size of the whole type
/// - as_param_count: Number of times the type is used as a param in a libfunc
#[derive(Debug, Default, Serialize)]
pub struct SierraDeclaredTypeStats {
pub concrete_type: String,
pub size: usize,
pub as_param_count: usize,
}

/// Contains the quantity of each circuit gate
/// in a program
#[derive(Debug, Default, Serialize)]
struct CircuitGatesStats {
add_gate_count: usize,
sub_gate_count: usize,
mul_gate_count: usize,
inverse_gate_count: usize,
}

impl Statistics {
pub fn validate(&self) -> bool {
self.sierra_type_count.is_some()
Expand All @@ -62,6 +101,64 @@ impl Statistics {
&& self.compilation_linking_time_ms.is_some()
&& self.object_size_bytes.is_some()
}

/// Counts the gates in a circuit. It uses the same algorithm used
/// to evaluate the gates on a circuit when evaluating it.
pub fn add_circuit_gates(&mut self, info: &CircuitInfo) -> Result<()> {
let mut known_gates = vec![false; 1 + info.n_inputs + info.values.len()];
known_gates[0] = true;
for i in 0..info.n_inputs {
known_gates[i + 1] = true;
}

let mut add_offsets = info.add_offsets.iter().peekable();
let mut mul_offsets = info.mul_offsets.iter();

loop {
while let Some(&add_gate_offset) = add_offsets.peek() {
let lhs = known_gates[add_gate_offset.lhs].to_owned();
let rhs = known_gates[add_gate_offset.rhs].to_owned();
let output = known_gates[add_gate_offset.output].to_owned();

match (lhs, rhs, output) {
(true, true, false) => {
// ADD
self.sierra_circuit_gates_count.add_gate_count += 1;
known_gates[add_gate_offset.output] = true;
}
(false, true, true) => {
// SUB
self.sierra_circuit_gates_count.sub_gate_count += 1;
known_gates[add_gate_offset.lhs] = true;
}
_ => break,
}
add_offsets.next();
}

if let Some(&GateOffsets { lhs, rhs, output }) = mul_offsets.next() {
let lhs_value = known_gates[lhs];
let rhs_value = known_gates[rhs];
let output_value = known_gates[output];

match (lhs_value, rhs_value, output_value) {
(true, true, false) => {
// MUL
self.sierra_circuit_gates_count.mul_gate_count += 1;
known_gates[output] = true;
}
(false, true, true) => {
self.sierra_circuit_gates_count.inverse_gate_count += 1;
known_gates[lhs] = true;
}
_ => native_panic!("Imposible circuit"), // It should never reach this point, since it would have failed in the compilation before
}
} else {
break;
}
}
Ok(())
}
}

/// Clones a variable of type `Option<&mut T>` without consuming self
Expand Down
Loading
Loading