Skip to content

Commit fcacc68

Browse files
committed
restrict
1 parent 04a01ad commit fcacc68

File tree

1 file changed

+51
-58
lines changed

1 file changed

+51
-58
lines changed

org/qmckl_jastrow_champ_single.org

Lines changed: 51 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -3352,8 +3352,8 @@ integer(qmckl_exit_code) function qmckl_compute_jastrow_champ_delta_p_gl_doc( &
33523352

33533353
do nw=1, walk_num
33543354
do m=1, cord_num-1
3355-
do j = 1, elec_num
3356-
do k = 1, 4
3355+
do k = 1, 4
3356+
do j = 1, elec_num
33573357
delta_e_gl(j,k) = een_rescaled_single_e_gl(k,j,m,nw) - een_rescaled_e_gl(num, k, j, m, nw)
33583358
end do
33593359
end do
@@ -3371,7 +3371,7 @@ integer(qmckl_exit_code) function qmckl_compute_jastrow_champ_delta_p_gl_doc( &
33713371
cummu = 0.0d0
33723372
do i = 1, elec_num
33733373

3374-
delta_p_gl(i,a,k,l,m,nw) = -een_rescaled_e_gl(i,k,num,m,nw) * een_re_n&
3374+
delta_p_gl(i,a,k,l,m,nw) = -een_rescaled_e_gl(i,k,num,m,nw) * een_re_n &
33753375
- een_rescaled_single_e_gl(k,i,m,nw) * een_re_single_n
33763376

33773377
cummu = cummu + delta_e_gl(i,k) * een_rescaled_n(i,a,l,nw)
@@ -3790,27 +3790,12 @@ integer(qmckl_exit_code) function qmckl_compute_jastrow_champ_factor_single_een_
37903790
do a = 1, nucl_num
37913791
cn = c_vector_full(a, n)
37923792
if(cn == 0.d0) cycle
3793-
!do i = 1, elec_num
3794-
! delta_een_gl(i,kk,nw) = delta_een_gl(i,kk,nw) + ( &
3795-
! delta_p_gl(i,a,kk,m ,k,nw) * een_rescaled_n(i,a,m+l,nw) + &
3796-
! delta_p_gl(i,a,kk,m+l,k,nw) * een_rescaled_n(i,a,m ,nw) + &
3797-
! delta_p(i,a,m ,k,nw) * een_rescaled_n_gl(i,kk,a,m+l,nw) + &
3798-
! delta_p(i,a,m+l,k,nw) * een_rescaled_n_gl(i,kk,a,m ,nw) ) * cn
3799-
!end do
38003793
do i = 1, elec_num
3801-
! Cache repeated accesses
3802-
dpg1_m = delta_p_gl(i,a,kk,m ,k,nw)
3803-
dpg1_ml = delta_p_gl(i,a,kk,m+l,k,nw)
3804-
dp_m = delta_p(i,a,m ,k,nw)
3805-
dp_ml = delta_p(i,a,m+l,k,nw)
3806-
3807-
een_r_m = een_rescaled_n(i,a,m ,nw)
3808-
een_r_ml = een_rescaled_n(i,a,m+l,nw)
3809-
een_r_gl_m = een_rescaled_n_gl(i,kk,a,m ,nw)
3810-
een_r_gl_ml = een_rescaled_n_gl(i,kk,a,m+l,nw)
3811-
3812-
delta_een_gl(i,kk,nw) = delta_een_gl(i,kk,nw) + cn * &
3813-
(dpg1_m * een_r_ml + dpg1_ml * een_r_m + dp_m * een_r_gl_ml + dp_ml * een_r_gl_m)
3794+
delta_een_gl(i,kk,nw) = delta_een_gl(i,kk,nw) + ( &
3795+
delta_p_gl(i,a,kk,m ,k,nw) * een_rescaled_n(i,a,m+l,nw) + &
3796+
delta_p_gl(i,a,kk,m+l,k,nw) * een_rescaled_n(i,a,m ,nw) + &
3797+
delta_p(i,a,m ,k,nw) * een_rescaled_n_gl(i,kk,a,m+l,nw) + &
3798+
delta_p(i,a,m+l,k,nw) * een_rescaled_n_gl(i,kk,a,m ,nw) ) * cn
38143799
end do
38153800

38163801
delta_een_gl(num,kk,nw) = delta_een_gl(num,kk,nw) + ( &
@@ -3856,17 +3841,17 @@ qmckl_compute_jastrow_champ_factor_single_een_gl_hpc (const qmckl_context contex
38563841
const int64_t nucl_num,
38573842
const int64_t cord_num,
38583843
const int64_t dim_c_vector,
3859-
const double* c_vector_full,
3860-
const int64_t* lkpm_combined_index,
3861-
const double* tmp_c,
3862-
const double* dtmp_c,
3863-
const double* delta_p,
3864-
const double* delta_p_gl,
3865-
const double* een_rescaled_n,
3866-
const double* een_rescaled_single_n,
3867-
const double* een_rescaled_n_gl,
3868-
const double* een_rescaled_single_n_gl,
3869-
double* const delta_een_gl )
3844+
const double* restrict c_vector_full,
3845+
const int64_t* restrict lkpm_combined_index,
3846+
const double* restrict tmp_c,
3847+
const double* restrict dtmp_c,
3848+
const double* restrict delta_p,
3849+
const double* restrict delta_p_gl,
3850+
const double* restrict een_rescaled_n,
3851+
const double* restrict een_rescaled_single_n,
3852+
const double* restrict een_rescaled_n_gl,
3853+
const double* restrict een_rescaled_single_n_gl,
3854+
double* restrict const delta_een_gl )
38703855
{
38713856

38723857

@@ -3884,7 +3869,9 @@ qmckl_compute_jastrow_champ_factor_single_een_gl_hpc (const qmckl_context contex
38843869
return QMCKL_SUCCESS;
38853870
}
38863871

3872+
#ifdef HAVE_OPENMP
38873873
#pragma omp parallel for
3874+
#endif
38883875
for (int64_t nw=0 ; nw<walk_num ; nw++) {
38893876
for (size_t i=0 ; i<4*elec_num ; ++i) {
38903877
delta_een_gl[i+nw*4*elec_num] = 0.;
@@ -3905,21 +3892,24 @@ qmckl_compute_jastrow_champ_factor_single_een_gl_hpc (const qmckl_context contex
39053892
const int64_t m = lkpm_combined_index[n+3*dim_c_vector];
39063893

39073894
for (int64_t kk=0 ; kk<4 ; ++kk) {
3908-
double* dgl = &delta_een_gl[elec_num*(kk+4*nw)];
3895+
double* restrict dgl = &delta_een_gl[elec_num*(kk+4*nw)];
39093896

39103897
for (int64_t a=0 ; a<nucl_num ; ++a) {
39113898
const double cn = c_vector_full[a+n*nucl_num];
39123899
if (cn == 0.) continue;
39133900

3914-
const double* dpg1_m = &delta_p_gl[elec_num*(a+nucl_num*(kk+4*(m+(cord_num+1)*(k+cord_num*nw))))];
3915-
const double* dpg1_ml = &delta_p_gl[elec_num*(a+nucl_num*(kk+4*(m+l+(cord_num+1)*(k+cord_num*nw))))];
3916-
const double* dp_m = &delta_p[elec_num*(a+nucl_num*(m+(cord_num+1)*(k+cord_num*nw)))];
3917-
const double* dp_ml = &delta_p[elec_num*(a+nucl_num*(m+l+(cord_num+1)*(k+cord_num*nw)))];
3918-
const double* een_r_m = &een_rescaled_n[elec_num*(a+nucl_num*(m+(cord_num+1)*nw))];
3919-
const double* een_r_ml = &een_rescaled_n[elec_num*(a+nucl_num*(m+l+(cord_num+1)*nw))];
3920-
const double* een_r_gl_m = &een_rescaled_n_gl[elec_num*(kk+4*(a+nucl_num*(m+(cord_num+1)*nw)))];
3921-
const double* een_r_gl_ml = &een_rescaled_n_gl[elec_num*(kk+4*(a+nucl_num*(m+l+(cord_num+1)*nw)))];
3922-
3901+
const double* restrict dpg1_m = &delta_p_gl[elec_num*(a+nucl_num*(kk+4*(m+(cord_num+1)*(k+cord_num*nw))))];
3902+
const double* restrict dpg1_ml = &delta_p_gl[elec_num*(a+nucl_num*(kk+4*(m+l+(cord_num+1)*(k+cord_num*nw))))];
3903+
const double* restrict dp_m = &delta_p[elec_num*(a+nucl_num*(m+(cord_num+1)*(k+cord_num*nw)))];
3904+
const double* restrict dp_ml = &delta_p[elec_num*(a+nucl_num*(m+l+(cord_num+1)*(k+cord_num*nw)))];
3905+
const double* restrict een_r_m = &een_rescaled_n[elec_num*(a+nucl_num*(m+(cord_num+1)*nw))];
3906+
const double* restrict een_r_ml = &een_rescaled_n[elec_num*(a+nucl_num*(m+l+(cord_num+1)*nw))];
3907+
const double* restrict een_r_gl_m = &een_rescaled_n_gl[elec_num*(kk+4*(a+nucl_num*(m+(cord_num+1)*nw)))];
3908+
const double* restrict een_r_gl_ml = &een_rescaled_n_gl[elec_num*(kk+4*(a+nucl_num*(m+l+(cord_num+1)*nw)))];
3909+
3910+
#ifdef HAVE_OPENMP
3911+
#pragma omp simd
3912+
#endif
39233913
for (int64_t i=0 ; i<elec_num ; ++i) {
39243914
dgl[i] += cn * (dpg1_m[i] * een_r_ml[i] + dpg1_ml[i] * een_r_m[i] +
39253915
dp_m[i] * een_r_gl_ml[i] + dp_ml[i] * een_r_gl_m[i]);
@@ -3944,21 +3934,24 @@ qmckl_compute_jastrow_champ_factor_single_een_gl_hpc (const qmckl_context contex
39443934
const double cn = 2. * c_vector_full[a+n*nucl_num];
39453935
if (cn == 0.) continue;
39463936

3947-
double* dgl4 = &delta_een_gl[elec_num*(3+4*nw)];
3937+
double* restrict dgl4 = &delta_een_gl[elec_num*(3+4*nw)];
39483938

3949-
const double* dpg1_m = &delta_p_gl[elec_num*(a+nucl_num*(0+4*(m+(cord_num+1)*(k+cord_num*nw))))];
3950-
const double* dpg2_m = &delta_p_gl[elec_num*(a+nucl_num*(1+4*(m+(cord_num+1)*(k+cord_num*nw))))];
3951-
const double* dpg3_m = &delta_p_gl[elec_num*(a+nucl_num*(2+4*(m+(cord_num+1)*(k+cord_num*nw))))];
3952-
const double* dpg1_ml = &delta_p_gl[elec_num*(a+nucl_num*(0+4*(m+l+(cord_num+1)*(k+cord_num*nw))))];
3953-
const double* dpg2_ml = &delta_p_gl[elec_num*(a+nucl_num*(1+4*(m+l+(cord_num+1)*(k+cord_num*nw))))];
3954-
const double* dpg3_ml = &delta_p_gl[elec_num*(a+nucl_num*(2+4*(m+l+(cord_num+1)*(k+cord_num*nw))))];
3955-
const double* een_r_gl1_m = &een_rescaled_n_gl[elec_num*(0+4*(a+nucl_num*(m+(cord_num+1)*nw)))];
3956-
const double* een_r_gl2_m = &een_rescaled_n_gl[elec_num*(1+4*(a+nucl_num*(m+(cord_num+1)*nw)))];
3957-
const double* een_r_gl3_m = &een_rescaled_n_gl[elec_num*(2+4*(a+nucl_num*(m+(cord_num+1)*nw)))];
3958-
const double* een_r_gl1_ml = &een_rescaled_n_gl[elec_num*(0+4*(a+nucl_num*(m+l+(cord_num+1)*nw)))];
3959-
const double* een_r_gl2_ml = &een_rescaled_n_gl[elec_num*(1+4*(a+nucl_num*(m+l+(cord_num+1)*nw)))];
3960-
const double* een_r_gl3_ml = &een_rescaled_n_gl[elec_num*(2+4*(a+nucl_num*(m+l+(cord_num+1)*nw)))];
3961-
3939+
const double* restrict dpg1_m = &delta_p_gl[elec_num*(a+nucl_num*(0+4*(m+(cord_num+1)*(k+cord_num*nw))))];
3940+
const double* restrict dpg2_m = &delta_p_gl[elec_num*(a+nucl_num*(1+4*(m+(cord_num+1)*(k+cord_num*nw))))];
3941+
const double* restrict dpg3_m = &delta_p_gl[elec_num*(a+nucl_num*(2+4*(m+(cord_num+1)*(k+cord_num*nw))))];
3942+
const double* restrict dpg1_ml = &delta_p_gl[elec_num*(a+nucl_num*(0+4*(m+l+(cord_num+1)*(k+cord_num*nw))))];
3943+
const double* restrict dpg2_ml = &delta_p_gl[elec_num*(a+nucl_num*(1+4*(m+l+(cord_num+1)*(k+cord_num*nw))))];
3944+
const double* restrict dpg3_ml = &delta_p_gl[elec_num*(a+nucl_num*(2+4*(m+l+(cord_num+1)*(k+cord_num*nw))))];
3945+
const double* restrict een_r_gl1_m = &een_rescaled_n_gl[elec_num*(0+4*(a+nucl_num*(m+(cord_num+1)*nw)))];
3946+
const double* restrict een_r_gl2_m = &een_rescaled_n_gl[elec_num*(1+4*(a+nucl_num*(m+(cord_num+1)*nw)))];
3947+
const double* restrict een_r_gl3_m = &een_rescaled_n_gl[elec_num*(2+4*(a+nucl_num*(m+(cord_num+1)*nw)))];
3948+
const double* restrict een_r_gl1_ml = &een_rescaled_n_gl[elec_num*(0+4*(a+nucl_num*(m+l+(cord_num+1)*nw)))];
3949+
const double* restrict een_r_gl2_ml = &een_rescaled_n_gl[elec_num*(1+4*(a+nucl_num*(m+l+(cord_num+1)*nw)))];
3950+
const double* restrict een_r_gl3_ml = &een_rescaled_n_gl[elec_num*(2+4*(a+nucl_num*(m+l+(cord_num+1)*nw)))];
3951+
3952+
#ifdef HAVE_OPENMP
3953+
#pragma omp simd
3954+
#endif
39623955
for (int64_t i=0 ; i<elec_num ; ++i) {
39633956
dgl4[i] += (dpg1_m[i] * een_r_gl1_ml[i] + dpg1_ml[i] * een_r_gl1_m[i] +
39643957
dpg2_m[i] * een_r_gl2_ml[i] + dpg2_ml[i] * een_r_gl2_m[i] +

0 commit comments

Comments
 (0)