Skip to content

Add in-source Tracy profiler #1284

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 42 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
b673ced
Add Tracy to CMakeLists
foxtran Mar 30, 2025
4292c77
Enable Tracy profiler for whole app's lifetime
foxtran Mar 30, 2025
c894af0
Set thread name
foxtran Mar 31, 2025
07df91d
Profile main with zones
foxtran Mar 31, 2025
19eb2da
Count start-up time with zone
foxtran Mar 31, 2025
4da9d76
Add profiler for geoopt_driver
foxtran Mar 31, 2025
30a6630
Profile ANCOPT microiters
foxtran Mar 31, 2025
43329ad
Profile relax procedure
foxtran Mar 31, 2025
1229493
Profile xtb/calculator
foxtran Mar 31, 2025
a5f0d2a
Profile build_SDQH0
foxtran Mar 31, 2025
ac09e2d
Profile build_dSDQH0[_noreset]
foxtran Mar 31, 2025
dd32dd6
Profile scf_module
foxtran Mar 31, 2025
20edb48
Profile scc
foxtran Mar 31, 2025
b5c91cd
Profile solve and dmat
foxtran Mar 31, 2025
71f498b
Profile pseudodiag
foxtran Mar 31, 2025
2f91e43
Reduce number of allocations
foxtran Mar 31, 2025
f76ff2e
Profile factorized overlap in scc_core
foxtran Mar 31, 2025
9389b1f
Profile eigensolver
foxtran Mar 31, 2025
f89cd06
Profile Wiberg, fermismear, buildIso, buildIsoAniso, build_h0
foxtran Apr 4, 2025
e48fda6
Profile aespot.F90
foxtran Apr 4, 2025
b87e965
Profile qsh2qat and mpopsh
foxtran Apr 4, 2025
79b9e78
Profile grad
foxtran Apr 4, 2025
e30730d
Profile dftd4.F90
foxtran Apr 4, 2025
fb7c251
Profile model_hessian
foxtran Apr 18, 2025
c264ff7
Add tracying
foxtran May 19, 2025
24ea6d0
Add zone profiler
foxtran May 19, 2025
15a8601
Add Tracy colors for non-Tracy builds
foxtran May 19, 2025
a8d6813
Use xtb_tracying in aespot.F90
foxtran May 19, 2025
9c893f3
Use xtb_tracying in geoopt_driver
foxtran May 19, 2025
4bb39cf
Use xtb_tracying in disp/dftd4
foxtran May 19, 2025
86c4ced
Use xtb_tracying in prog/
foxtran May 19, 2025
58e5cbc
Use xtb_tracying in xtb/
foxtran May 19, 2025
8b890cd
Use xtb_tracying in mctc/lapack/eigensolve
foxtran May 19, 2025
052ae5e
Use xtb_tracying in model_hessian
foxtran May 19, 2025
ead59fe
Use xtb_tracying in scf_module
foxtran May 19, 2025
edcea88
Use xtb_tracying in pseudodiag
foxtran May 19, 2025
33d81ec
Add frame support
foxtran May 19, 2025
dde104d
Use xtb_tracying in optimizer
foxtran May 19, 2025
d7b28e9
Use xtb_tracying in scc_core
foxtran May 19, 2025
f78a51f
Rename xtb_zone_context -> xtb_zone
foxtran May 19, 2025
d62b8ac
Use zone instead of ctx
foxtran May 19, 2025
fd321e3
Avoid extra calls if tracy is disabled
foxtran May 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
cmake_minimum_required(VERSION 3.17)
option(WITH_OBJECT "To build using object library" TRUE)
option(INSTALL_MODULES "Install Fortran module files to include directory." FALSE)
option(WITH_TRACY "Add low-latency runtime profiler" FALSE)

# Buggy CMake versions
if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.27.0 AND CMAKE_VERSION VERSION_LESS 3.28.0)
Expand All @@ -29,6 +30,9 @@ project(
VERSION "6.7.1"
LANGUAGES "C" "Fortran"
)
if(WITH_TRACY)
enable_language(CXX)
endif()

enable_testing()

Expand Down Expand Up @@ -58,6 +62,25 @@ if(NOT TARGET "test-drive::test-drive")
find_package("test-drive" REQUIRED)
endif()

if(WITH_TRACY)
option(TRACY_Fortran "" ON)
option(TRACY_DELAYED_INIT "" ON)
option(TRACY_MANUAL_LIFETIME "" ON)
option(TRACY_NO_FRAME_IMAGE "" ON)

FetchContent_Declare(
tracy
GIT_REPOSITORY https://github.com/wolfpld/tracy.git
GIT_TAG master
GIT_SHALLOW TRUE
GIT_PROGRESS TRUE
)

FetchContent_MakeAvailable(tracy)

add_compile_definitions(WITH_TRACY)
endif()

# Sources: initialize program sources (prog) and library sources (srcs) empty
set(prog)
set(srcs)
Expand Down Expand Up @@ -107,6 +130,8 @@ if(WITH_OBJECT)
"mctc-lib::mctc-lib"
"$<$<BOOL:${WITH_CPCMX}>:cpcmx::cpcmx>"
"$<$<BOOL:${WITH_TBLITE}>:tblite::tblite>"
"$<$<BOOL:${WITH_TRACY}>:TracyClient>"
"$<$<BOOL:${WITH_TRACY}>:TracyClientF90>"
"$<$<BOOL:${WITH_OpenMP}>:OpenMP::OpenMP_Fortran>"
)

Expand Down Expand Up @@ -149,7 +174,9 @@ target_link_libraries(
"$<$<BOOL:${WITH_OpenMP}>:OpenMP::OpenMP_Fortran>"
"mctc-lib::mctc-lib"
"$<$<BOOL:${WITH_CPCMX}>:cpcmx::cpcmx>"
$<$<BOOL:${WITH_TBLITE}>:tblite::tblite>
"$<$<BOOL:${WITH_TBLITE}>:tblite::tblite>"
"$<$<BOOL:${WITH_TRACY}>:TracyClient>"
"$<$<BOOL:${WITH_TRACY}>:TracyClientF90>"
)

set_target_properties(
Expand Down Expand Up @@ -190,6 +217,8 @@ if (WITH_OBJECT)
"mctc-lib::mctc-lib"
"$<$<BOOL:${WITH_CPCMX}>:cpcmx::cpcmx>"
"$<$<BOOL:${WITH_TBLITE}>:tblite::tblite>"
"$<$<BOOL:${WITH_TRACY}>:TracyClient>"
"$<$<BOOL:${WITH_TRACY}>:TracyClientF90>"
)

set_target_properties(
Expand Down Expand Up @@ -232,6 +261,7 @@ set_target_properties(
Fortran_MODULE_DIRECTORY ${PROJECT_BINARY_DIR}/include
RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}
OUTPUT_NAME "${PROJECT_NAME}"
LINKER_LANGUAGE Fortran
)

target_include_directories(
Expand Down
11 changes: 6 additions & 5 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ list(APPEND srcs
"${dir}/foden.f90"
"${dir}/fragment.f90"
"${dir}/generate_wsc.f90"
"${dir}/geoopt_driver.f90"
"${dir}/geoopt_driver.F90"
"${dir}/geosum.f90"
"${dir}/getkopt.f90"
"${dir}/getname.f90"
Expand Down Expand Up @@ -108,11 +108,11 @@ list(APPEND srcs
"${dir}/mdoptim.f90"
"${dir}/metadynamic.f90"
"${dir}/modef.f90"
"${dir}/model_hessian.f90"
"${dir}/model_hessian.F90"
"${dir}/neighbor.f90"
"${dir}/onetri.f90"
"${dir}/oniom.f90"
"${dir}/optimizer.f90"
"${dir}/optimizer.F90"
"${dir}/pbc.f90"
"${dir}/pbc_tools.f90"
"${dir}/peeq_module.f90"
Expand All @@ -121,7 +121,7 @@ list(APPEND srcs
"${dir}/printmold.f90"
"${dir}/printout.f90"
"${dir}/prmat.f90"
"${dir}/pseudodiag.f90"
"${dir}/pseudodiag.F90"
"${dir}/qpot.f90"
"${dir}/qsort.f90"
"${dir}/rdcoord2.f90"
Expand All @@ -135,7 +135,7 @@ list(APPEND srcs
"${dir}/rmsd.f90"
"${dir}/scan_driver.f90"
"${dir}/scanparam.f90"
"${dir}/scc_core.f90"
"${dir}/scc_core.F90"
"${dir}/scf_module.F90"
"${dir}/screening.f90"
"${dir}/set_module.f90"
Expand All @@ -153,6 +153,7 @@ list(APPEND srcs
"${dir}/thermo.f90"
"${dir}/timing.f90"
"${dir}/topology.f90"
"${dir}/tracying.F90"
"${dir}/vertical.f90"
"${dir}/wrbas.f90"
"${dir}/wrgbw.f90"
Expand Down
49 changes: 48 additions & 1 deletion src/aespot.F90
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ module xtb_aespot
! ndp,nqp : number of elements to be computed in Fock matrix with X-dip and X-qpole terms
! matdlst,matqlst : index list, to which AO, the ndp/nqp potential terms refer to
subroutine setdqlist(nao,ndp,nqp,thr,dpint,qpint,matdlst,matqlst)
use xtb_tracying
implicit none
integer, intent(in) :: nao
integer, intent(inout) :: ndp,nqp
Expand All @@ -46,6 +47,10 @@ subroutine setdqlist(nao,ndp,nqp,thr,dpint,qpint,matdlst,matqlst)

integer i,j,k,l,m,ii,jj,ll,kk,mq,md,ij

type(xtb_zone) :: zone

if (do_tracying) call zone%start("src/aespot.F90", "setdqlist", __LINE__, color=TracyColors%OliveDrab1)

! INFO: this threshold must be slightly larger than max(0,thr2),
! where thr2 is the one used in screening in routine aesdqint
thr2 = thr*1.0d-2 ! we compare squared int-elements
Expand Down Expand Up @@ -82,6 +87,7 @@ subroutine setdqlist(nao,ndp,nqp,thr,dpint,qpint,matdlst,matqlst)
enddo
ndp = md
nqp = mq

end subroutine setdqlist

! scalecamm: scale all anisotropic CAMMs by element-specific parameters
Expand Down Expand Up @@ -134,6 +140,7 @@ end subroutine unscalecamm
! dipm(3,nat) : cumulative atomic dipole moments (x,y,z)
! qp(6,nat) : traceless(!) cumulative atomic quadrupole moments (xx,xy,yy,xz,yz,zz)
subroutine mmompop(nat,nao,aoat2,xyz,p,s,dpint,qpint,dipm,qp)
use xtb_tracying
implicit none
integer, intent(in) :: nao,nat,aoat2(:)
real(wp), intent(in) :: s(:, :)
Expand All @@ -144,6 +151,9 @@ subroutine mmompop(nat,nao,aoat2,xyz,p,s,dpint,qpint,dipm,qp)
real(wp), intent(out):: dipm(:, :)
real(wp), intent(out):: qp(:, :)

type(xtb_zone) :: zone
if (do_tracying) call zone%start("src/aespot.F90", "mmompop", __LINE__, color=TracyColors%OliveDrab1)

#ifdef XTB_GPU
call mmompop_gpu(nat,nao,aoat2,xyz,p,s,dpint,qpint,dipm,qp)
#else
Expand Down Expand Up @@ -401,6 +411,7 @@ subroutine mmompop_cpu(nat,nao,aoat2,xyz,p,s,dpint,qpint,dipm,qp)
enddo

end subroutine mmompop_cpu

end subroutine mmompop


Expand All @@ -416,6 +427,7 @@ end subroutine mmompop
! e : E_AES
subroutine aniso_electro(aesData,nat,at,xyz,q,dipm,qp,gab3,gab5,e,epol)
use xtb_lin, only : lin
use xtb_tracying
implicit none
class(TMultipoleData), intent(in) :: aesData
integer, intent(in) :: nat,at(:)
Expand All @@ -428,6 +440,10 @@ subroutine aniso_electro(aesData,nat,at,xyz,q,dipm,qp,gab3,gab5,e,epol)
real(wp), intent(in) :: gab3(:,:),gab5(:,:)
real(wp), intent(in) :: dipm(:,:),qp(:,:)

type(xtb_zone) :: zone

if (do_tracying) call zone%start("src/aespot.F90", "aniso_electro", __LINE__, color=TracyColors%OliveDrab1)

#ifdef XTB_GPU
call aniso_electro_gpu(aesData,nat,at,xyz,q,dipm,qp,gab3,gab5,e,epol)
#else
Expand Down Expand Up @@ -541,7 +557,6 @@ subroutine aniso_electro_gpu(aesData,nat,at,xyz,q,dipm,qp,gab3,gab5,e,epol)
! acc& at, xyz, q, dipm, qp, gab3, gab5)
end subroutine aniso_electro_gpu


subroutine aniso_electro_cpu(aesData,nat,at,xyz,q,dipm,qp,gab3,gab5,e,epol)

implicit none
Expand Down Expand Up @@ -651,6 +666,7 @@ end subroutine aniso_electro
! vq(6,nat) : quadrupole proportional potential
subroutine fockelectro(nat,nao,aoat2,p,s,dpint,qpint,vs,vd,vq,e)
use xtb_lin, only : lin
use xtb_tracying
implicit none
integer, intent(in) :: nat,nao,aoat2(nao)
real(wp), intent(in) :: dpint(3,nao,nao),s(nao,nao)
Expand All @@ -659,6 +675,11 @@ subroutine fockelectro(nat,nao,aoat2,p,s,dpint,qpint,vs,vd,vq,e)
real(wp), intent(out) :: e
real(wp) eaes,pji,fji
integer i,j,k,l,ii,jj,ij,kl,kj

type(xtb_zone) :: zone

if (do_tracying) call zone%start("src/aespot.F90", "fockelectro", __LINE__, color=TracyColors%OliveDrab1)

! CAMM
eaes = 0.0_wp
ij = 0
Expand All @@ -682,6 +703,7 @@ subroutine fockelectro(nat,nao,aoat2,p,s,dpint,qpint,vs,vd,vq,e)
eaes = 0.250_wp*eaes
! write(*,*) 'EAES',eaes
e = eaes

end subroutine fockelectro


Expand All @@ -702,6 +724,7 @@ end subroutine fockelectro
! vq(6,nat) : qpole-int proportional potential from all atoms acting on atom i
subroutine setvsdq(aesData,nat,at,xyz,q,dipm,qp,gab3,gab5,vs,vd,vq)
use xtb_lin, only : lin
use xtb_tracying
implicit none
class(TMultipoleData), intent(in) :: aesData
integer, intent(in) :: nat,at(:)
Expand All @@ -714,6 +737,10 @@ subroutine setvsdq(aesData,nat,at,xyz,q,dipm,qp,gab3,gab5,vs,vd,vq)
real(wp) r2ab,t1b,t2b,t3b,t4b,dum3b,dum5b,dtmp(3),qtmp(6),g3,g5
real(wp) qs1,qs2
integer i,j,k,l1,l2,ll,m,mx,ki,kj

type(xtb_zone) :: zone
if (do_tracying) call zone%start("src/aespot.F90", "setvsdq", __LINE__, color=TracyColors%OliveDrab1)

vs = 0.0_wp
vd = 0.0_wp
vq = 0.0_wp
Expand Down Expand Up @@ -836,6 +863,7 @@ end subroutine setvsdq
! vq(6,nat) : qpole-int proportional potential from all atoms acting on atom i
subroutine setdvsdq(aesData,nat,at,xyz,q,dipm,qp,gab3,gab5,vs,vd,vq)
use xtb_lin, only : lin
use xtb_tracying
implicit none
class(TMultipoleData), intent(in) :: aesData
integer, intent(in) :: nat,at(:)
Expand All @@ -848,6 +876,10 @@ subroutine setdvsdq(aesData,nat,at,xyz,q,dipm,qp,gab3,gab5,vs,vd,vq)
real(wp) r2ab,t1b,t2b,t3b,t4b,dum3b,dum5b,dtmp(3),qtmp(6),g3,g5
real(wp) qs1,qs2
integer i,j,k,l1,l2,ll,m,mx,ki,kj

type(xtb_zone) :: zone
if (do_tracying) call zone%start("src/aespot.F90", "setdvsdq", __LINE__, color=TracyColors%OliveDrab1)

vs = 0.0_wp
vd = 0.0_wp
vq = 0.0_wp
Expand Down Expand Up @@ -936,13 +968,18 @@ end subroutine setdvsdq
subroutine molmom(iunit,n,xyz,q,dipm,qp,dip,d3)
use xtb_mctc_convert
use xtb_lin, only : lin
use xtb_tracying
implicit none
integer, intent(in) :: iunit
integer, intent(in) :: n
real(wp), intent(in) :: xyz(:,:),q(:),dipm(:,:),qp(:,:)
real(wp), intent(out) :: dip,d3(:)
real(wp) rr1(3),rr2(3),tma(6),tmb(6),tmc(6),dum
integer i,j,k,l

type(xtb_zone) :: zone
if (do_tracying) call zone%start("src/aespot.F90", "molmom", __LINE__, color=TracyColors%OliveDrab1)

rr1 = 0.0_wp
rr2 = 0.0_wp
write(iunit,'(a)')
Expand Down Expand Up @@ -1046,6 +1083,7 @@ end subroutine molqdip
subroutine aniso_grad(nat,at,xyz,q,dipm,qp,kdmp3,kdmp5, &
& radcn,dcn,gab3,gab5,g)
use xtb_lin, only : lin
use xtb_tracying
!gab3 Hellmann-Feynman terms correct, shift terms to be tested yet
implicit none
integer, intent(in) :: nat,at(:)
Expand All @@ -1059,6 +1097,10 @@ subroutine aniso_grad(nat,at,xyz,q,dipm,qp,kdmp3,kdmp5, &
real(wp) dgab3,dgab5,damp1,damp2,ddamp,qs2

integer i,j,k,l,m,ki,kj,kl

type(xtb_zone) :: zone
if (do_tracying) call zone%start("src/aespot.F90", "aniso_grad", __LINE__, color=TracyColors%OliveDrab1)

do i = 1,nat
q1 = q(i)
rr(1:3) = xyz(1:3,i)
Expand Down Expand Up @@ -1138,6 +1180,7 @@ subroutine aniso_grad(nat,at,xyz,q,dipm,qp,kdmp3,kdmp5, &
g(:,:) = g-tmp2*dcn(:,:,i)

enddo

end subroutine aniso_grad


Expand Down Expand Up @@ -1193,6 +1236,7 @@ end subroutine checkspars

! zero-damped gab
subroutine mmomgabzero(nat,at,xyz,kdmp3,kdmp5,radcn,gab3,gab5)
use xtb_tracying
implicit none
integer, intent(in) :: nat,at(:)
real(wp), intent(in) :: xyz(:,:),radcn(:)
Expand All @@ -1203,6 +1247,9 @@ subroutine mmomgabzero(nat,at,xyz,kdmp3,kdmp5,radcn,gab3,gab5)
real(wp) tmp1,tmp2,rr(3)
integer i,j,k,l,lin

type(xtb_zone) :: zone
if (do_tracying) call zone%start("src/aespot.F90", "mmomgabzero", __LINE__, color=TracyColors%OliveDrab1)

!!!!!!! set up damped Coulomb operators for multipole interactions
gab3 = 0.0_wp ! for r**-2 decaying q-dip term
gab5 = 0.0_wp ! for r**-3 decaying terms (q-qpol,dip-dip)
Expand Down
Loading
Loading