From 8965375dec2998907c3cab099c54f3cf13b7d179 Mon Sep 17 00:00:00 2001 From: zhangc <2023024681@m.scnu.edu.cn> Date: Sun, 29 Jun 2025 14:39:21 +0800 Subject: [PATCH 1/4] Refactored bin index calculation in the _single_frame function of InterRDF_s to optimize redundant np.histogram calls. Additionally, divided by N in _conclude to make the computation consistent with InterRDF. --- package/MDAnalysis/analysis/rdf.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index c3459ac65f..04af7f04e6 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -633,6 +633,7 @@ def _prepare(self): if self.norm == "rdf": # Cumulative volume for rdf normalization self.volume_cum = 0 + self._minrange = self.rdf_settings["range"][0] if self.rdf_settings["range"][0] > 0 else 0.0 self._maxrange = self.rdf_settings["range"][1] def _single_frame(self): @@ -641,13 +642,24 @@ def _single_frame(self): ag1.positions, ag2.positions, self._maxrange, + self._minrange, box=self._ts.dimensions, backend=self.backend, ) + # Different people write code for different purposes. For my needs, the following two lines are sufficient: + # count, _ = np.histogram(dist, **self.rdf_settings) + # self.results.count[i][0, 0, :] += count + + # The following is an optimized version based on the old logic. + bins = self.rdf_settings["bins"] + minv, maxv = self._minrange, self._maxrange + # Calculate bin indices for each value in dist, similar to np.histogram's bin assignment. + bin_indices = ((dist - minv) * bins / (maxv - minv)).astype(np.int64) + for j, (idx1, idx2) in enumerate(pairs): - count, _ = np.histogram(dist[j], **self.rdf_settings) - self.results.count[i][idx1, idx2, :] += count + self.results.count[i][idx1, idx2, bin_indices[j]] += 1 + self.results.count[i][0, 0, bin_indices[j]] += 1 if self.norm == "rdf": self.volume_cum += self._ts.volume @@ -669,8 +681,10 @@ def _conclude(self): for i, (ag1, ag2) in enumerate(self.ags): # Number of each selection + if self.norm == "rdf": + N = len(ag1) * len(ag2) self.results.indices.append([ag1.indices, ag2.indices]) - self.results.rdf.append(self.results.count[i] / norm) + self.results.rdf.append(self.results.count[i] / (norm * N)) # Modify to make it consistent with InterRDF def get_cdf(self): r"""Calculate the cumulative counts for all sites. From 2760210910d5938d7aebed396e063c5b97d542a8 Mon Sep 17 00:00:00 2001 From: zhangc <2023024681@m.scnu.edu.cn> Date: Tue, 1 Jul 2025 23:05:36 +0800 Subject: [PATCH 2/4] Adjust the code to ensure the test passes. --- package/MDAnalysis/analysis/rdf.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index 04af7f04e6..880e5d1afd 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -633,7 +633,11 @@ def _prepare(self): if self.norm == "rdf": # Cumulative volume for rdf normalization self.volume_cum = 0 - self._minrange = self.rdf_settings["range"][0] if self.rdf_settings["range"][0] > 0 else 0.0 + self._minrange = ( + self.rdf_settings["range"][0] + if self.rdf_settings["range"][0] > 0 + else 0.0 + ) self._maxrange = self.rdf_settings["range"][1] def _single_frame(self): @@ -655,11 +659,12 @@ def _single_frame(self): bins = self.rdf_settings["bins"] minv, maxv = self._minrange, self._maxrange # Calculate bin indices for each value in dist, similar to np.histogram's bin assignment. - bin_indices = ((dist - minv) * bins / (maxv - minv)).astype(np.int64) + bin_indices = (dist - minv) * bins / (maxv - minv) + bin_indices = bin_indices.astype(np.int64) for j, (idx1, idx2) in enumerate(pairs): self.results.count[i][idx1, idx2, bin_indices[j]] += 1 - self.results.count[i][0, 0, bin_indices[j]] += 1 + # self.results.count[i][0, 0, bin_indices[j]] += 1 # this is necessary to calc rdf if self.norm == "rdf": self.volume_cum += self._ts.volume @@ -681,10 +686,8 @@ def _conclude(self): for i, (ag1, ag2) in enumerate(self.ags): # Number of each selection - if self.norm == "rdf": - N = len(ag1) * len(ag2) self.results.indices.append([ag1.indices, ag2.indices]) - self.results.rdf.append(self.results.count[i] / (norm * N)) # Modify to make it consistent with InterRDF + self.results.rdf.append(self.results.count[i] / norm) def get_cdf(self): r"""Calculate the cumulative counts for all sites. From 375ecc9e278c1433df287103f8c197f7e68339bc Mon Sep 17 00:00:00 2001 From: zhangc <2023024681@m.scnu.edu.cn> Date: Thu, 3 Jul 2025 16:17:36 +0800 Subject: [PATCH 3/4] Fewer revisions --- package/MDAnalysis/analysis/rdf.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index 880e5d1afd..0137437bed 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -633,11 +633,6 @@ def _prepare(self): if self.norm == "rdf": # Cumulative volume for rdf normalization self.volume_cum = 0 - self._minrange = ( - self.rdf_settings["range"][0] - if self.rdf_settings["range"][0] > 0 - else 0.0 - ) self._maxrange = self.rdf_settings["range"][1] def _single_frame(self): @@ -646,7 +641,6 @@ def _single_frame(self): ag1.positions, ag2.positions, self._maxrange, - self._minrange, box=self._ts.dimensions, backend=self.backend, ) @@ -657,12 +651,14 @@ def _single_frame(self): # The following is an optimized version based on the old logic. bins = self.rdf_settings["bins"] - minv, maxv = self._minrange, self._maxrange + minv, maxv = self.rdf_settings["range"][0], self.rdf_settings["range"][1] # Calculate bin indices for each value in dist, similar to np.histogram's bin assignment. bin_indices = (dist - minv) * bins / (maxv - minv) bin_indices = bin_indices.astype(np.int64) for j, (idx1, idx2) in enumerate(pairs): + if bin_indices[j] < 0 or bin_indices[j] >= bins: + continue self.results.count[i][idx1, idx2, bin_indices[j]] += 1 # self.results.count[i][0, 0, bin_indices[j]] += 1 # this is necessary to calc rdf From fa271c2079724a99625247731aab55f220b2ecc5 Mon Sep 17 00:00:00 2001 From: zhangc <2023024681@m.scnu.edu.cn> Date: Thu, 3 Jul 2025 17:04:26 +0800 Subject: [PATCH 4/4] formt revisions --- package/MDAnalysis/analysis/rdf.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index 0137437bed..3e9232a5b0 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -651,15 +651,17 @@ def _single_frame(self): # The following is an optimized version based on the old logic. bins = self.rdf_settings["bins"] - minv, maxv = self.rdf_settings["range"][0], self.rdf_settings["range"][1] + minv, maxv = ( + self.rdf_settings["range"][0], + self.rdf_settings["range"][1], + ) # Calculate bin indices for each value in dist, similar to np.histogram's bin assignment. bin_indices = (dist - minv) * bins / (maxv - minv) bin_indices = bin_indices.astype(np.int64) for j, (idx1, idx2) in enumerate(pairs): - if bin_indices[j] < 0 or bin_indices[j] >= bins: - continue - self.results.count[i][idx1, idx2, bin_indices[j]] += 1 + if 0 <= bin_indices[j] < bins: + self.results.count[i][idx1, idx2, bin_indices[j]] += 1 # self.results.count[i][0, 0, bin_indices[j]] += 1 # this is necessary to calc rdf if self.norm == "rdf":