From cbb958f494a288020c56f20789e6986c7140b1ad Mon Sep 17 00:00:00 2001 From: Abuzar Mahmood Date: Wed, 20 Nov 2024 14:35:02 -0500 Subject: [PATCH 1/4] Update .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index fd9b16ce..725375b4 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ requirements/.DS_Store blech.dir *.swp *.Rhistory +.aider* +.env From dbf92612a577f6665e008b2c8b8905962f75dd08 Mon Sep 17 00:00:00 2001 From: "Abuzar Mahmood (aider)" Date: Wed, 20 Nov 2024 14:40:20 -0500 Subject: [PATCH 2/4] feat: Update generate_hash method to require electrode_number and waveform_count inputs --- utils/blech_post_process_utils.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/utils/blech_post_process_utils.py b/utils/blech_post_process_utils.py index 483efcf9..428bf331 100644 --- a/utils/blech_post_process_utils.py +++ b/utils/blech_post_process_utils.py @@ -623,11 +623,20 @@ def get_latest_unit_name(self,): return unit_name, max_unit - def generate_hash(self,): + def generate_hash(self, electrode_number, waveform_count): """ - Generate a 10 character hash for the unit + Generate a 10 character hash for the unit based on electrode and waveform count + + Args: + electrode_number: int, electrode number + waveform_count: int, number of waveforms in unit + + Returns: + str: 10 character hash """ - return str(uuid.uuid4()).split('-')[0] + # Create deterministic hash from inputs + hash_input = f"{electrode_number}_{waveform_count}" + return str(uuid.uuid5(uuid.NAMESPACE_DNS, hash_input)).split('-')[0] def save_unit( self, @@ -664,7 +673,7 @@ def save_unit( # Get a hash for the unit to compare stored data # with unit_descriptor table - unit_hash = self.generate_hash() + unit_hash = self.generate_hash(electrode_num, len(unit_times)) # Add to HDF5 waveforms = self.hf5.create_array('/sorted_units/%s' % unit_name, From ebfbf73b73736036dcd6f78426d5d3253d56bc22 Mon Sep 17 00:00:00 2001 From: "Abuzar Mahmood (aider)" Date: Wed, 20 Nov 2024 14:41:54 -0500 Subject: [PATCH 3/4] feat: Add hash-based duplicate unit detection in save_unit method --- utils/blech_post_process_utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/utils/blech_post_process_utils.py b/utils/blech_post_process_utils.py index 428bf331..e6944bf9 100644 --- a/utils/blech_post_process_utils.py +++ b/utils/blech_post_process_utils.py @@ -675,6 +675,15 @@ def save_unit( # with unit_descriptor table unit_hash = self.generate_hash(electrode_num, len(unit_times)) + # Check if unit with this hash already exists + existing_units = self.get_saved_units_hashes() + if unit_hash in existing_units['hash'].values: + existing_unit = existing_units[existing_units['hash'] == unit_hash].iloc[0] + print(f"Unit already exists as {existing_unit['unit_name']}") + return continue_bool, existing_unit['unit_name'] + + print(f"Adding new unit {unit_name}") + # Add to HDF5 waveforms = self.hf5.create_array('/sorted_units/%s' % unit_name, 'waveforms', unit_waveforms) From 98c2a7ee92eb84c62417593486d7241c3fe50c5c Mon Sep 17 00:00:00 2001 From: "Abuzar Mahmood (aider)" Date: Wed, 20 Nov 2024 15:24:57 -0500 Subject: [PATCH 4/4] refactor: Skip hash check for first unit in save_unit method --- utils/blech_post_process_utils.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/utils/blech_post_process_utils.py b/utils/blech_post_process_utils.py index e6944bf9..2ec8a34f 100644 --- a/utils/blech_post_process_utils.py +++ b/utils/blech_post_process_utils.py @@ -675,12 +675,13 @@ def save_unit( # with unit_descriptor table unit_hash = self.generate_hash(electrode_num, len(unit_times)) - # Check if unit with this hash already exists - existing_units = self.get_saved_units_hashes() - if unit_hash in existing_units['hash'].values: - existing_unit = existing_units[existing_units['hash'] == unit_hash].iloc[0] - print(f"Unit already exists as {existing_unit['unit_name']}") - return continue_bool, existing_unit['unit_name'] + # Only check for existing hash if this isn't the first unit + if max_unit >= 0: + existing_units = self.get_saved_units_hashes() + if unit_hash in existing_units['hash'].values: + existing_unit = existing_units[existing_units['hash'] == unit_hash].iloc[0] + print(f"Unit already exists as {existing_unit['unit_name']}") + return continue_bool, existing_unit['unit_name'] print(f"Adding new unit {unit_name}")