From 3f4e60b4992a85bfa314538022fd67dfa36360b9 Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Tue, 23 Jan 2018 21:55:30 +0100 Subject: [PATCH 01/28] MAINT: prepare for release 3.1.4 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 06db2fa..3a5e34d 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ #!/usr/bin/env python from setuptools import setup, find_packages # This setup relies on setuptools since distutils is insufficient and badly hacked code -version = '3.1.3' +version = '3.1.4' author = 'David-Leon Pohl, Jens Janssen' author_email = 'pohl@physik.uni-bonn.de, janssen@physik.uni-bonn.de' From 4bbe0935a56bd8fe7859978920160d1911313cae Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Thu, 19 Sep 2019 13:38:53 +0200 Subject: [PATCH 02/28] PRJ: bump version to 3.1.5.dev0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3a5e34d..fcfaf05 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ #!/usr/bin/env python from setuptools import setup, find_packages # This setup relies on setuptools since distutils is insufficient and badly hacked code -version = '3.1.4' +version = '3.1.5.dev0' author = 'David-Leon Pohl, Jens Janssen' author_email = 'pohl@physik.uni-bonn.de, janssen@physik.uni-bonn.de' From 4b40c44770191333da1adeacbbe62c8733e18e22 Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Tue, 26 Jun 2018 15:14:53 +0200 Subject: [PATCH 03/28] ENH: allow clustering with coordinates --- pixel_clusterizer/cluster_functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pixel_clusterizer/cluster_functions.py b/pixel_clusterizer/cluster_functions.py index bf54c0b..ceefab5 100644 --- a/pixel_clusterizer/cluster_functions.py +++ b/pixel_clusterizer/cluster_functions.py @@ -13,8 +13,8 @@ def _new_event(event_number_1, event_number_2): def _pixel_masked(hit, array): ''' Checks whether a hit (column/row) is masked or not. Array is 2D array with boolean elements corresponding to pixles indicating whether a pixel is disabled or not. ''' - if array.shape[0] > hit["column"] and array.shape[1] > hit["row"]: - return array[hit["column"], hit["row"]] + if hit["column"] >= 0 and hit["row"] >= 0 and array.shape[0] > int(hit["column"]) and array.shape[1] > int(hit["row"]): + return array[int(hit["column"]), int(hit["row"])] else: return False From c6668586a7310b1eaf9f4cbf902dc38b1c0b079a Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Tue, 26 Jun 2018 17:32:22 +0200 Subject: [PATCH 04/28] ENH: fuzzy floating point comparison if one of the number is float --- pixel_clusterizer/cluster_functions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pixel_clusterizer/cluster_functions.py b/pixel_clusterizer/cluster_functions.py index ceefab5..1b97aba 100644 --- a/pixel_clusterizer/cluster_functions.py +++ b/pixel_clusterizer/cluster_functions.py @@ -1,5 +1,5 @@ ''' Fast clustering functions that are compiled in time via numba ''' - +import numpy as np from numba import njit @@ -139,8 +139,8 @@ def _is_in_max_difference(value_1, value_2, max_difference): Circumvents numba bug #1653 ''' if value_1 <= value_2: - return value_2 - value_1 <= max_difference - return value_1 - value_2 <= max_difference + return (np.nextafter(value_2, value_1) - np.nextafter(value_1, value_2)) <= max_difference + return (np.nextafter(value_1, value_2) - np.nextafter(value_2, value_1)) <= max_difference # @njit() From 18aa9886a954ea4de3874be92abbb405608f95cd Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Wed, 5 Dec 2018 13:35:34 +0100 Subject: [PATCH 05/28] ENH: only pass valid indices --- pixel_clusterizer/cluster_functions.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pixel_clusterizer/cluster_functions.py b/pixel_clusterizer/cluster_functions.py index 1b97aba..e612b08 100644 --- a/pixel_clusterizer/cluster_functions.py +++ b/pixel_clusterizer/cluster_functions.py @@ -39,8 +39,7 @@ def _finish_cluster(hits, clusters, cluster_size, cluster_hit_indices, cluster_i total_weighted_column = 0 total_weighted_row = 0 - for i in range(cluster_size): - hit_index = cluster_hit_indices[i] + for hit_index in cluster_hit_indices: if hits[hit_index]['charge'] > max_cluster_charge: seed_hit_index = hit_index max_cluster_charge = hits[hit_index]['charge'] @@ -264,7 +263,7 @@ def _cluster_hits(hits, clusters, assigned_hit_array, cluster_hit_indices, colum hits=hits, clusters=clusters, cluster_size=cluster_size, - cluster_hit_indices=cluster_hit_indices, + cluster_hit_indices=cluster_hit_indices[:cluster_size], cluster_index=start_event_cluster_index + event_cluster_index, cluster_id=event_cluster_index, charge_correction=charge_correction, From 1e0ddfefab75b9fa7d035311fbb15e3a883df803 Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Wed, 5 Dec 2018 13:35:50 +0100 Subject: [PATCH 06/28] MAINT: comments --- pixel_clusterizer/clusterizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pixel_clusterizer/clusterizer.py b/pixel_clusterizer/clusterizer.py index 8bec2dc..bffab58 100644 --- a/pixel_clusterizer/clusterizer.py +++ b/pixel_clusterizer/clusterizer.py @@ -316,8 +316,8 @@ def cluster_hits(self, hits, noisy_pixels=None, disabled_pixels=None): if (n_hits < int(0.5 * self._cluster_hits.size)) or (n_hits > self._cluster_hits.size): self._init_arrays(size=int(1.1 * n_hits)) # oversize buffer slightly to reduce allocations else: - self._assigned_hit_array.fill(0) # The hit indices of the actual cluster, 0 means not assigned - self._cluster_hit_indices.fill(-1) # The hit indices of the actual cluster, -1 means not assigned + self._assigned_hit_array.fill(0) # The array represents the assignment of hits to clusters, 0 means not assigned to any cluster + self._cluster_hit_indices.fill(-1) # The hit indices of the actual cluster, -1 means not assigned to any hit self._clusters.dtype.names = self._unmap_cluster_field_names(self._clusters.dtype.names) # Reset the data fields from previous renaming self._cluster_hits.dtype.names = self._unmap_hit_field_names(self._cluster_hits.dtype.names) # Reset the data fields from previous renaming From e0b688d4c4670e14c9eba351cffa933de43600f4 Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Wed, 5 Dec 2018 13:50:26 +0100 Subject: [PATCH 07/28] MAINT: cleanup --- pixel_clusterizer/cluster_functions.py | 2 +- pixel_clusterizer/clusterizer.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pixel_clusterizer/cluster_functions.py b/pixel_clusterizer/cluster_functions.py index e612b08..94b74db 100644 --- a/pixel_clusterizer/cluster_functions.py +++ b/pixel_clusterizer/cluster_functions.py @@ -221,7 +221,7 @@ def _cluster_hits(hits, clusters, assigned_hit_array, cluster_hit_indices, colum assigned_hit_array[i] = 1 cluster_size = 1 # actual cluster has one hit so far - for j in cluster_hit_indices: # Loop over all hits of the actual cluster; cluster_hit_indices is updated within the loop if new hit are found + for j in cluster_hit_indices: # Loop over all hits of the actual cluster; cluster_hit_indices is updated within the loop if new hits are found if j < 0: # There are no more cluster hits found break diff --git a/pixel_clusterizer/clusterizer.py b/pixel_clusterizer/clusterizer.py index bffab58..70192b2 100644 --- a/pixel_clusterizer/clusterizer.py +++ b/pixel_clusterizer/clusterizer.py @@ -122,8 +122,7 @@ def _init_arrays(self, size=0): self._cluster_hits = np.zeros(shape=(size, ), dtype=np.dtype(self._cluster_hits_descr)) self._clusters = np.zeros(shape=(size, ), dtype=np.dtype(self._cluster_descr)) self._assigned_hit_array = np.zeros(shape=(size, ), dtype=np.bool) - self._cluster_hit_indices = np.empty(shape=(size, ), dtype=np_int_type_chooser(size)) - self._cluster_hit_indices.fill(-1) + self._cluster_hit_indices = np.full(shape=(size, ), fill_value=-1, dtype=np_int_type_chooser(size)) def reset(self): # Resets the overwritten function hooks, otherwise they are stored as a module global and not reset on clusterizer initialization self._init_arrays(size=0) From b30d900a96b0613be8eff375715f5c9357b2b6c0 Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Wed, 5 Dec 2018 14:28:02 +0100 Subject: [PATCH 08/28] ENH: check for increasing event number --- pixel_clusterizer/clusterizer.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pixel_clusterizer/clusterizer.py b/pixel_clusterizer/clusterizer.py index 70192b2..6d0c9fa 100644 --- a/pixel_clusterizer/clusterizer.py +++ b/pixel_clusterizer/clusterizer.py @@ -126,6 +126,7 @@ def _init_arrays(self, size=0): def reset(self): # Resets the overwritten function hooks, otherwise they are stored as a module global and not reset on clusterizer initialization self._init_arrays(size=0) + self._last_event_number = None def end_of_cluster_function(hits, clusters, cluster_size, cluster_hit_indices, cluster_index, cluster_id, charge_correction, noisy_pixels, disabled_pixels, seed_hit_index): pass @@ -356,6 +357,12 @@ def cluster_hits(self, hits, noisy_pixels=None, disabled_pixels=None): # disabled_pixels = np.recarray(disabled_pixels_array.shape[0], dtype=mask_dtype) # disabled_pixels[:] = [(item[0], item[1]) for item in disabled_pixels_array] + # Check if event number is increasing. Otherwise give a warning message. + if self._last_event_number is not None and self._cluster_hits.shape[0] != 0 and self._cluster_hits[0]["event_number"] == self._last_event_number: + logging.warning('Event number not increasing.') + if self._cluster_hits.shape[0] != 0: + self._last_event_number = self._cluster_hits[-1]["event_number"] + n_clusters = self.cluster_functions._cluster_hits( # Set n_clusters to new size hits=self._cluster_hits[:n_hits], clusters=self._clusters[:n_hits], From 70d44920b4733e50d72d9405f1ae0fe79fdf05c7 Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Wed, 5 Dec 2018 14:28:30 +0100 Subject: [PATCH 09/28] MAINT: docstring --- pixel_clusterizer/clusterizer.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pixel_clusterizer/clusterizer.py b/pixel_clusterizer/clusterizer.py index 6d0c9fa..b8d5daa 100644 --- a/pixel_clusterizer/clusterizer.py +++ b/pixel_clusterizer/clusterizer.py @@ -302,9 +302,16 @@ def ignore_same_hits(self, value): def cluster_hits(self, hits, noisy_pixels=None, disabled_pixels=None): ''' Cluster given hit array. - The noisy_pixels and disabled_pixels parameters are iterables of column/row index pairs, e.g. [[column_1, row_1], [column_2, row_2], ...]. - The noisy_pixels parameter allows for removing clusters that consist of a single noisy pixels. Clusters with 2 or more noisy pixels are not removed. - The disabled_pixels parameter allows for ignoring pixels. + Parameters + ---------- + hits : numpy.recarray + Hit data that will be clustered. The hit data contains all information that is required for clustering. + The hit data must contain only complete events. + noisy_pixels : list of 2-tuples + List of 2-tuples of column and row indices of noisy pixels. No cluster will be built when it consists of a single noisy pixel. + Clusters with two or more noisy pixels are built. + disabled_pixels : list of 2-tuples + List of 2-tuples of column and row indices of pixles that will be masked. Masked pixels will not be used during clustering. ''' # Jitting a second time to workaround different bahavior of the installation methods on different platforms (pip install vs. python setup.py). # In some circumstances, the Numba compiler can't compile functions that were pickled previously. From 8edbc592e4e87ebe02cd0d839d871198cabbc9ca Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Wed, 5 Dec 2018 14:48:49 +0100 Subject: [PATCH 10/28] MAINT: remove deprecated error check --- pixel_clusterizer/cluster_functions.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pixel_clusterizer/cluster_functions.py b/pixel_clusterizer/cluster_functions.py index 94b74db..c5427e8 100644 --- a/pixel_clusterizer/cluster_functions.py +++ b/pixel_clusterizer/cluster_functions.py @@ -163,7 +163,6 @@ def _cluster_hits(hits, clusters, assigned_hit_array, cluster_hit_indices, colum total_hits = hits.shape[0] if total_hits == 0: return 0 # total clusters - max_cluster_hits = cluster_hit_indices.shape[0] if total_hits != clusters.shape[0]: raise ValueError("hits and clusters must be the same size") @@ -246,8 +245,6 @@ def _cluster_hits(hits, clusters, assigned_hit_array, cluster_hit_indices, colum if _is_in_max_difference(hits[j]['column'], hits[k]['column'], column_cluster_distance) and _is_in_max_difference(hits[j]['row'], hits[k]['row'], row_cluster_distance) and _is_in_max_difference(hits[j]['frame'], hits[k]['frame'], frame_cluster_distance): if not ignore_same_hits or hits[j]['column'] != hits[k]['column'] or hits[j]['row'] != hits[k]['row']: cluster_size += 1 - if cluster_size > max_cluster_hits: - raise IndexError('cluster_hit_indices is too small to contain all cluster hits') cluster_hit_indices[cluster_size - 1] = k assigned_hit_array[k] = 1 From 85acca2e9fafe4ad6f9d2ccbe1529095932b9130 Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Wed, 21 Aug 2019 11:08:48 +0200 Subject: [PATCH 11/28] ENH: adding parameter to disable charge weighted clustering (default) --- pixel_clusterizer/cluster_functions.py | 47 +++++++++++++------------- pixel_clusterizer/clusterizer.py | 46 +++++++++++++++++++------ 2 files changed, 60 insertions(+), 33 deletions(-) diff --git a/pixel_clusterizer/cluster_functions.py b/pixel_clusterizer/cluster_functions.py index c5427e8..90f67c2 100644 --- a/pixel_clusterizer/cluster_functions.py +++ b/pixel_clusterizer/cluster_functions.py @@ -30,24 +30,27 @@ def _pixel_masked(hit, array): @njit() -def _finish_cluster(hits, clusters, cluster_size, cluster_hit_indices, cluster_index, cluster_id, charge_correction, noisy_pixels, disabled_pixels): +def _finish_cluster(hits, clusters, cluster_size, cluster_hit_indices, cluster_index, cluster_id, charge_correction, charge_weighted_clustering, noisy_pixels, disabled_pixels): ''' Set hit and cluster information of the cluster (e.g. number of hits in the cluster (cluster_size), total cluster charge (charge), ...). ''' cluster_charge = 0 - max_cluster_charge = -1 - # necessary for charge weighted hit position - total_weighted_column = 0 - total_weighted_row = 0 + seed_charge = -1 + total_column = 0 + total_row = 0 for hit_index in cluster_hit_indices: - if hits[hit_index]['charge'] > max_cluster_charge: + if hits[hit_index]['charge'] > seed_charge: seed_hit_index = hit_index - max_cluster_charge = hits[hit_index]['charge'] + seed_charge = hits[hit_index]['charge'] hits[hit_index]['is_seed'] = 0 hits[hit_index]['cluster_size'] = cluster_size - # include charge correction in sum - total_weighted_column += hits[hit_index]['column'] * (hits[hit_index]['charge'] + charge_correction) - total_weighted_row += hits[hit_index]['row'] * (hits[hit_index]['charge'] + charge_correction) + if charge_weighted_clustering: + # include charge correction in sum + total_column += hits[hit_index]['column'] * (hits[hit_index]['charge'] + charge_correction) + total_row += hits[hit_index]['row'] * (hits[hit_index]['charge'] + charge_correction) + else: + total_column += hits[hit_index]['column'] + total_row += hits[hit_index]['row'] cluster_charge += hits[hit_index]['charge'] hits[hit_index]['cluster_ID'] = cluster_id @@ -58,9 +61,13 @@ def _finish_cluster(hits, clusters, cluster_size, cluster_hit_indices, cluster_i clusters[cluster_index]["charge"] = cluster_charge clusters[cluster_index]['seed_column'] = hits[seed_hit_index]['column'] clusters[cluster_index]['seed_row'] = hits[seed_hit_index]['row'] - # correct total charge value and calculate mean column and row - clusters[cluster_index]['mean_column'] = float(total_weighted_column) / (cluster_charge + cluster_size * charge_correction) - clusters[cluster_index]['mean_row'] = float(total_weighted_row) / (cluster_charge + cluster_size * charge_correction) + if charge_weighted_clustering: + # correct total charge value and calculate mean column and row + clusters[cluster_index]['mean_column'] = float(total_column) / (cluster_charge + cluster_size * charge_correction) + clusters[cluster_index]['mean_row'] = float(total_row) / (cluster_charge + cluster_size * charge_correction) + else: + clusters[cluster_index]['mean_column'] = float(total_column) / cluster_size + clusters[cluster_index]['mean_row'] = float(total_row) / cluster_size # Call end of cluster function hook _end_of_cluster_function( @@ -101,11 +108,11 @@ def _hit_ok(hit, min_hit_charge, max_hit_charge): ''' Check if given hit is withing the limits. ''' # Omit hits with charge < min_hit_charge - if hit['charge'] < min_hit_charge: + if min_hit_charge is not None and hit['charge'] < min_hit_charge: return False # Omit hits with charge > max_hit_charge - if max_hit_charge != 0 and hit['charge'] > max_hit_charge: + if max_hit_charge is not None and hit['charge'] > max_hit_charge: return False return True @@ -157,7 +164,7 @@ def _is_in_max_difference(value_1, value_2, max_difference): @njit() -def _cluster_hits(hits, clusters, assigned_hit_array, cluster_hit_indices, column_cluster_distance, row_cluster_distance, frame_cluster_distance, min_hit_charge, max_hit_charge, ignore_same_hits, noisy_pixels, disabled_pixels): +def _cluster_hits(hits, clusters, assigned_hit_array, cluster_hit_indices, min_hit_charge, max_hit_charge, charge_correction, charge_weighted_clustering, column_cluster_distance, row_cluster_distance, frame_cluster_distance, ignore_same_hits, noisy_pixels, disabled_pixels): ''' Main precompiled function that loopes over the hits and clusters them ''' total_hits = hits.shape[0] @@ -170,13 +177,6 @@ def _cluster_hits(hits, clusters, assigned_hit_array, cluster_hit_indices, colum if total_hits != assigned_hit_array.shape[0]: raise ValueError("hits and assigned_hit_array must be the same size") - # Correction for charge weighting - # Some chips have non-zero charge for a charge value of zero, charge needs to be corrected to calculate cluster center correctly - if min_hit_charge == 0: - charge_correction = 1 - else: - charge_correction = 0 - # Temporary variables that are reset for each cluster or event start_event_hit_index = 0 start_event_cluster_index = 0 @@ -264,6 +264,7 @@ def _cluster_hits(hits, clusters, assigned_hit_array, cluster_hit_indices, colum cluster_index=start_event_cluster_index + event_cluster_index, cluster_id=event_cluster_index, charge_correction=charge_correction, + charge_weighted_clustering=charge_weighted_clustering, noisy_pixels=noisy_pixels, disabled_pixels=disabled_pixels) event_cluster_index += 1 diff --git a/pixel_clusterizer/clusterizer.py b/pixel_clusterizer/clusterizer.py index b8d5daa..48bc796 100644 --- a/pixel_clusterizer/clusterizer.py +++ b/pixel_clusterizer/clusterizer.py @@ -29,7 +29,7 @@ class HitClusterizer(object): ''' Clusterizer class providing an interface for the jitted functions and stores settings.''' - def __init__(self, hit_fields=None, hit_dtype=None, cluster_fields=None, cluster_dtype=None, pure_python=False, min_hit_charge=0, max_hit_charge=None, column_cluster_distance=1, row_cluster_distance=1, frame_cluster_distance=0, ignore_same_hits=True): + def __init__(self, hit_fields=None, hit_dtype=None, cluster_fields=None, cluster_dtype=None, pure_python=False, min_hit_charge=None, max_hit_charge=None, charge_correction=None, charge_weighted_clustering=False, column_cluster_distance=1, row_cluster_distance=1, frame_cluster_distance=0, ignore_same_hits=True): # Activate pute python mode by setting the evnironment variable NUMBA_DISABLE_JIT self.pure_python = pure_python if self.pure_python: @@ -106,11 +106,12 @@ def __init__(self, hit_fields=None, hit_dtype=None, cluster_fields=None, cluster # Std. settings self.set_min_hit_charge(min_hit_charge) self.set_max_hit_charge(max_hit_charge) + self.set_charge_correction(charge_correction) + self.set_charge_weighted_clustering(charge_weighted_clustering) self.set_column_cluster_distance(column_cluster_distance) self.set_row_cluster_distance(row_cluster_distance) self.set_frame_cluster_distance(frame_cluster_distance) self.ignore_same_hits(ignore_same_hits) - self.reset() @property @@ -266,19 +267,42 @@ def set_end_of_event_function(self, function): self._end_of_event_function = function def set_min_hit_charge(self, value): - ''' Charge values below this value will effectively ignore the hit. - Value has influence on clustering charge weighting. + ''' Hits with charge values below this value will be ignored. + If None, all hits will be used. ''' self._min_hit_charge = value def set_max_hit_charge(self, value): - ''' Charge values above this value will effectively ignore the hit. - Value of None or 0 will deactivate this feature. + ''' Hits with charge values above this value will be ignored. + If None, all hits will be used. ''' - if value is None: - value = 0 self._max_hit_charge = value + def set_charge_correction(self, value): + ''' Adding the given value to the hit charge. + If 0 or None, no offset will be added to the hit charge. + Note: + 1. The charge correction is olny used when charge_weighted_clustering + is True. + 2. Charge digitizers of some front-end chips start with a value of 0. + If the privded data contains hits with chage of 0, charge_correction must + be set to 1 in this case. Otherwise hits with a charge value of 0 + will not contribute to the charge weighted clustering. + ''' + if value is None: + self._charge_correction = 0 + else: + self._charge_correction = value + + def set_charge_weighted_clustering(self, value): + ''' If True, the charge value of the hits is used + to calculate center of gravity of a cluster. + For correct function, the parameter charge_correction must be set correctly. + If False, only the arithmetic mean of the hit positions is used + to calculate the center of a cluster. + ''' + self._charge_weighted_clustering = bool(value) + def set_column_cluster_distance(self, value): ''' Setting up max. column cluster distance. ''' @@ -375,11 +399,13 @@ def cluster_hits(self, hits, noisy_pixels=None, disabled_pixels=None): clusters=self._clusters[:n_hits], assigned_hit_array=self._assigned_hit_array[:n_hits], cluster_hit_indices=self._cluster_hit_indices[:n_hits], + min_hit_charge=self._min_hit_charge, + max_hit_charge=self._max_hit_charge, + charge_correction=self._charge_correction, + charge_weighted_clustering=self._charge_weighted_clustering, column_cluster_distance=self._column_cluster_distance, row_cluster_distance=self._row_cluster_distance, frame_cluster_distance=self._frame_cluster_distance, - min_hit_charge=self._min_hit_charge, - max_hit_charge=self._max_hit_charge, ignore_same_hits=self._ignore_same_hits, noisy_pixels=noisy_pixels, disabled_pixels=disabled_pixels) From 5fa3f96e9975e2b912e3c35ede50be81e4e2d853 Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Thu, 19 Sep 2019 13:32:01 +0200 Subject: [PATCH 12/28] MAINT: test Python 3.7, cleanup --- .travis.yml | 16 +++++++++------- appveyor.yml | 18 ++++++++---------- requirements.txt | 5 ++--- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/.travis.yml b/.travis.yml index a5473a8..f44bff8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,27 +1,29 @@ language: python python: -- 2.7 -- 3.5 + - 2.7 + - 3.7 sudo: false notifications: email: - pohl@physik.uni-bonn.de + - janssen@physik.uni-bonn.de install: - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then - wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; + wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh; else wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; fi - - chmod +x miniconda.sh - - bash miniconda.sh -b -p $HOME/miniconda + - bash miniconda.sh -b -p "$HOME/miniconda" - export PATH="$HOME/miniconda/bin:$PATH" - - conda install --yes numpy numba future nose docutils + - conda update --yes conda - conda info -a + - conda install --yes numpy numba nose - pip install coverage coveralls - - python setup.py develop + - pip install -e . + - conda list script: - nosetests # Run nosetests with jitted functions diff --git a/appveyor.yml b/appveyor.yml index c84a060..70051ea 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -8,23 +8,21 @@ environment: - PYTHON_VERSION: 2.7 MINICONDA: C:\Miniconda-x64 PYTHON_ARCH: "64" - - PYTHON_VERSION: 3.5 - MINICONDA: C:\Miniconda35 + - PYTHON_VERSION: 3.7 + MINICONDA: C:\Miniconda37 PYTHON_ARCH: "32" - - PYTHON_VERSION: 3.5 - MINICONDA: C:\Miniconda35-x64 + - PYTHON_VERSION: 3.7 + MINICONDA: C:\Miniconda37-x64 PYTHON_ARCH: "64" -init: - - "ECHO %PYTHON_VERSION% %MINICONDA%" - install: # Miniconda Python setup + external packages installation - - set PATH=%MINICONDA%;%MINICONDA%\\Scripts;%PATH% # Miniconda is already installed on appveyor: https://github.com/appveyor/ci/issues/359 - - conda install --yes numpy numba future nose docutils + - set PATH=%MINICONDA%;%MINICONDA%\Scripts;%MINICONDA%\Library\bin;%PATH% + - conda update --yes conda - conda info -a + - conda install --yes numpy numba nose + - pip install -e . - conda list - - python setup.py develop # Install pixel_clusterizer test_script: - nosetests diff --git a/requirements.txt b/requirements.txt index 0e90eb0..aeeb696 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ -future # Python 2/3 compatibility -numpy # fast c array data structures -numba>=0.24.0 # JIT for numpy +numpy +numba>=0.24.0 From 38a6ba16f3db1cc8a38a851dde8a32cf7192586c Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Thu, 19 Sep 2019 13:58:14 +0200 Subject: [PATCH 13/28] MAINT: fixing tests, use proper parameters --- pixel_clusterizer/testing/test_clusterizer.py | 8 ++-- pixel_clusterizer/testing/test_data_types.py | 13 ++++--- pixel_clusterizer/testing/test_hit_masking.py | 37 +++++++++---------- 3 files changed, 30 insertions(+), 28 deletions(-) diff --git a/pixel_clusterizer/testing/test_clusterizer.py b/pixel_clusterizer/testing/test_clusterizer.py index 876a6ad..e852c96 100644 --- a/pixel_clusterizer/testing/test_clusterizer.py +++ b/pixel_clusterizer/testing/test_clusterizer.py @@ -68,7 +68,7 @@ def test_exceptions(self): def test_cluster_algorithm(self): # Check with multiple jumps data # Inititalize Clusterizer - clusterizer = HitClusterizer(pure_python=self.pure_python, min_hit_charge=0, max_hit_charge=13, column_cluster_distance=2, row_cluster_distance=2, frame_cluster_distance=4, ignore_same_hits=True) + clusterizer = HitClusterizer(pure_python=self.pure_python, min_hit_charge=0, max_hit_charge=13, charge_correction=1, charge_weighted_clustering=True, column_cluster_distance=2, row_cluster_distance=2, frame_cluster_distance=4, ignore_same_hits=True) hits = create_hits(n_hits=10, max_column=100, max_row=100, max_frame=1, max_charge=2) @@ -125,7 +125,7 @@ def test_cluster_cuts(self): hits[1]['column'], hits[1]['row'], hits[1]['charge'], hits[1]['event_number'] = 18, 36, 6, 19 # Create clusterizer object - clusterizer = HitClusterizer(pure_python=self.pure_python, min_hit_charge=0, max_hit_charge=13, column_cluster_distance=2, row_cluster_distance=2, frame_cluster_distance=4, ignore_same_hits=True) + clusterizer = HitClusterizer(pure_python=self.pure_python, min_hit_charge=0, max_hit_charge=13, charge_correction=1, charge_weighted_clustering=True, column_cluster_distance=2, row_cluster_distance=2, frame_cluster_distance=4, ignore_same_hits=True) # Case 1: Test max hit charge cut, accept all hits clusterizer.set_max_hit_charge(30) # only add hits with charge <= 30 @@ -285,7 +285,7 @@ def test_cluster_cuts(self): def test_set_end_of_cluster_function(self): # Initialize clusterizer object - clusterizer = HitClusterizer(pure_python=self.pure_python, min_hit_charge=0, max_hit_charge=13, column_cluster_distance=2, row_cluster_distance=2, frame_cluster_distance=4, ignore_same_hits=True) + clusterizer = HitClusterizer(pure_python=self.pure_python, min_hit_charge=0, max_hit_charge=13, charge_correction=1, charge_weighted_clustering=True, column_cluster_distance=2, row_cluster_distance=2, frame_cluster_distance=4, ignore_same_hits=True) hits = create_hits(n_hits=10, max_column=100, max_row=100, max_frame=1, max_charge=2) @@ -355,7 +355,7 @@ def end_of_cluster_function(hits, clusters, cluster_size, cluster_hit_indices, c def test_set_end_of_event_function(self): # Initialize clusterizer object - clusterizer = HitClusterizer(pure_python=self.pure_python, min_hit_charge=0, max_hit_charge=13, column_cluster_distance=2, row_cluster_distance=2, frame_cluster_distance=4, ignore_same_hits=True) + clusterizer = HitClusterizer(pure_python=self.pure_python, min_hit_charge=0, max_hit_charge=13, charge_correction=1, charge_weighted_clustering=True, column_cluster_distance=2, row_cluster_distance=2, frame_cluster_distance=4, ignore_same_hits=True) hits = create_hits(n_hits=10, max_column=100, max_row=100, max_frame=1, max_charge=2) diff --git a/pixel_clusterizer/testing/test_data_types.py b/pixel_clusterizer/testing/test_data_types.py index 9d7a2c6..ac98f6d 100644 --- a/pixel_clusterizer/testing/test_data_types.py +++ b/pixel_clusterizer/testing/test_data_types.py @@ -52,7 +52,7 @@ def test_different_hit_data_types(self): ('parameter_2', 'f4')]) # Initialize clusterizer - clusterizer = HitClusterizer(pure_python=self.pure_python, min_hit_charge=0, max_hit_charge=13, column_cluster_distance=2, row_cluster_distance=2, frame_cluster_distance=4, ignore_same_hits=True) + clusterizer = HitClusterizer(pure_python=self.pure_python, min_hit_charge=0, max_hit_charge=13, charge_correction=1, charge_weighted_clustering=True, column_cluster_distance=2, row_cluster_distance=2, frame_cluster_distance=4, ignore_same_hits=True) for hit_data_type in hit_data_types: clusterizer.set_hit_dtype(np.dtype(hit_data_type)) @@ -172,7 +172,10 @@ def test_different_cluster_data_types(self): # Initialize clusterizer clusterizer = HitClusterizer(pure_python=self.pure_python, - min_hit_charge=0, max_hit_charge=13, + min_hit_charge=0, + max_hit_charge=13, + charge_correction=1, + charge_weighted_clustering=True, column_cluster_distance=2, row_cluster_distance=2, frame_cluster_distance=4, @@ -292,7 +295,7 @@ def test_custom_hit_fields(self): } # Initialize clusterizer and cluster test hits with self defined data type names - clusterizer = HitClusterizer(hit_fields=hit_fields, hit_dtype=hit_dtype, pure_python=self.pure_python, min_hit_charge=0, max_hit_charge=13, column_cluster_distance=2, row_cluster_distance=2, frame_cluster_distance=4, ignore_same_hits=True) + clusterizer = HitClusterizer(hit_fields=hit_fields, hit_dtype=hit_dtype, pure_python=self.pure_python, min_hit_charge=0, max_hit_charge=13, charge_correction=1, charge_weighted_clustering=True, column_cluster_distance=2, row_cluster_distance=2, frame_cluster_distance=4, ignore_same_hits=True) hits = create_hits(n_hits=10, max_column=100, max_row=100, max_frame=1, max_charge=2, hit_dtype=hit_dtype, hit_fields=hit_fields) cluster_hits, clusters = clusterizer.cluster_hits(hits) array_size_before = clusterizer._clusters.shape[0] @@ -399,7 +402,7 @@ def test_custom_cluster_fields(self): } # Initialize clusterizer and cluster test hits with self defined data type names - clusterizer = HitClusterizer(cluster_fields=cluster_fields, cluster_dtype=cluster_dtype, pure_python=self.pure_python, min_hit_charge=0, max_hit_charge=13, column_cluster_distance=2, row_cluster_distance=2, frame_cluster_distance=4, ignore_same_hits=True) + clusterizer = HitClusterizer(cluster_fields=cluster_fields, cluster_dtype=cluster_dtype, pure_python=self.pure_python, min_hit_charge=0, max_hit_charge=13, charge_correction=1, charge_weighted_clustering=True, column_cluster_distance=2, row_cluster_distance=2, frame_cluster_distance=4, ignore_same_hits=True) hits = create_hits(n_hits=10, max_column=100, max_row=100, max_frame=1, max_charge=2) cluster_hits, clusters = clusterizer.cluster_hits(hits) array_size_before = clusterizer._clusters.shape[0] @@ -487,7 +490,7 @@ def test_custom_cluster_fields(self): self.assertTrue(np.array_equal(cluster_hits, expected_hit_result)) def test_adding_cluster_field(self): - clusterizer = HitClusterizer(pure_python=self.pure_python, min_hit_charge=0, max_hit_charge=13, column_cluster_distance=2, row_cluster_distance=2, frame_cluster_distance=4, ignore_same_hits=True) + clusterizer = HitClusterizer(pure_python=self.pure_python, min_hit_charge=0, max_hit_charge=13, charge_correction=1, charge_weighted_clustering=True, column_cluster_distance=2, row_cluster_distance=2, frame_cluster_distance=4, ignore_same_hits=True) with self.assertRaises(TypeError): clusterizer.add_cluster_field(description=['extra_field', 'f4']) # also test list of 2 items clusterizer.add_cluster_field(description=[('extra_field', 'f4')]) # also test list of 2-tuples diff --git a/pixel_clusterizer/testing/test_hit_masking.py b/pixel_clusterizer/testing/test_hit_masking.py index bb10a74..27795c8 100644 --- a/pixel_clusterizer/testing/test_hit_masking.py +++ b/pixel_clusterizer/testing/test_hit_masking.py @@ -33,7 +33,6 @@ class TestClusterizer(unittest.TestCase): def setUpClass(cls): cls.pure_python = os.getenv('PURE_PYTHON', False) - def test_disabled_pixels(self): # Create some fake data hits = np.ones(shape=(7, ), dtype=np.dtype([('event_number', ' Date: Thu, 19 Sep 2019 14:05:51 +0200 Subject: [PATCH 14/28] MAINT: update README, fix appveyor badge --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2ab90f1..bee38c7 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -# Pixel Clusterizer [![Build Status](https://travis-ci.org/SiLab-Bonn/pixel_clusterizer.svg?branch=master)](https://travis-ci.org/SiLab-Bonn/pixel_clusterizer) [![Build Status](https://ci.appveyor.com/api/projects/status/github/SiLab-Bonn/pixel_clusterizer)](https://ci.appveyor.com/project/SiLab-Bonn/pixel_clusterizer) [![Coverage Status](https://coveralls.io/repos/github/SiLab-Bonn/pixel_clusterizer/badge.svg?branch=master)](https://coveralls.io/github/SiLab-Bonn/pixel_clusterizer?branch=master) +# Pixel Clusterizer [![Build Status](https://travis-ci.org/SiLab-Bonn/pixel_clusterizer.svg?branch=master)](https://travis-ci.org/SiLab-Bonn/pixel_clusterizer) [![Build status](https://ci.appveyor.com/api/projects/status/c8jqu9ow696opevf?svg=true)](https://ci.appveyor.com/project/laborleben/pixel-clusterizer) [![Coverage Status](https://coveralls.io/repos/github/SiLab-Bonn/pixel_clusterizer/badge.svg?branch=master)](https://coveralls.io/github/SiLab-Bonn/pixel_clusterizer?branch=master) + +Pixel_clusterizer is an easy to use pixel hit clusterizer for Python. It clusters hits connected to unique event numbers in space and time. -Pixel_clusterizer is an easy to use pixel hit-clusterizer for Python. It clusters hits on an event basis in space and time. - The hits have to be defined as a numpy recarray. The array has to have the following fields: - event_number - frame From ab24a24ec343ae70e773525236276d0b0e29f840 Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Mon, 23 Sep 2019 12:00:37 +0200 Subject: [PATCH 15/28] MAINT: adding more tests for charge offset and charge weighted clustering parameters --- README.md | 2 - pixel_clusterizer/testing/test_clusterizer.py | 197 +++++++++++++++++- 2 files changed, 192 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index bee38c7..e019458 100644 --- a/README.md +++ b/README.md @@ -27,8 +27,6 @@ A new array with cluster information is also created created and has the followi - mean_column - mean_row - - # Installation The stable code is hosted on PyPI and can be installed by typing: diff --git a/pixel_clusterizer/testing/test_clusterizer.py b/pixel_clusterizer/testing/test_clusterizer.py index e852c96..eac77f8 100644 --- a/pixel_clusterizer/testing/test_clusterizer.py +++ b/pixel_clusterizer/testing/test_clusterizer.py @@ -66,8 +66,195 @@ def test_exceptions(self): clusterizer = HitClusterizer(hit_fields=hit_mapping, hit_dtype=hit_dtype_new, pure_python=self.pure_python) _, _ = clusterizer.cluster_hits(np.array([], dtype=hit_dtype_new)) - def test_cluster_algorithm(self): # Check with multiple jumps data - # Inititalize Clusterizer + def test_cluster_algorithm(self): # Basic functionality checks + # Initialize Clusterizer with default arguments + clusterizer = HitClusterizer(pure_python=self.pure_python) + + hits = create_hits(n_hits=15, max_column=100, max_row=100, max_frame=1, max_charge=2) + # Dioganal + hits[1]["row"] = 2 + hits[2]["column"] = 4 + hits[2]["row"] = 4 + # Same row + hits[4]["row"] = 7 + hits[5]["column"] = 7 + hits[5]["row"] = 7 + # Same column + hits[7]["column"] = 7 + hits[7]["row"] = 14 + hits[8]["column"] = 7 + hits[8]["row"] = 16 + # Test frame + hits[10]["row"] = 20 + hits[10]["frame"] = 1 + # Same location + hits[14]["column"] = 13 + hits[14]["row"] = 25 + + cluster_hits, clusters = clusterizer.cluster_hits(hits) # cluster hits + + # Define expected output + expected_cluster_result = np.zeros(shape=(11, ), dtype=np.dtype([ + ('event_number', ' 29 hits + # Case 2: Test max hit charge cut, omit hits with charge > 29 hits['event_number'] = 20 clusterizer.set_max_hit_charge(29) # only add hits with charge <= 30 cluster_hits, clusters = clusterizer.cluster_hits(hits) # cluster hits @@ -261,14 +448,14 @@ def test_cluster_cuts(self): expected_cluster_result['mean_column'] = [18.0] expected_cluster_result['mean_row'] = [37.0] - clusterizer.ignore_same_hits(True) # If a hit occured 2 times in an event it is ignored and gets the cluster index -2 + clusterizer.ignore_same_hits(True) # If a hit occurred 2 times in an event it is ignored and gets the cluster index -2 cluster_hits, clusters = clusterizer.cluster_hits(hits) # Cluster hits # Test results self.assertTrue(np.array_equal(clusters, expected_cluster_result)) self.assertTrue(np.array_equal(cluster_hits, expected_hit_result)) - clusterizer.ignore_same_hits(False) # If a hit occured 2 times in an event it is used as a normal hit + clusterizer.ignore_same_hits(False) # If a hit occurred 2 times in an event it is used as a normal hit cluster_hits, clusters = clusterizer.cluster_hits(hits) # Cluster hits expected_hit_result['cluster_ID'] = [0, 0, 0] From a6df150882092bf5d28ea1a06d207d1acee64bc3 Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Mon, 23 Sep 2019 13:54:04 +0200 Subject: [PATCH 16/28] MAINT: update README --- README.md | 87 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index e019458..f0e247f 100644 --- a/README.md +++ b/README.md @@ -1,39 +1,63 @@ # Pixel Clusterizer [![Build Status](https://travis-ci.org/SiLab-Bonn/pixel_clusterizer.svg?branch=master)](https://travis-ci.org/SiLab-Bonn/pixel_clusterizer) [![Build status](https://ci.appveyor.com/api/projects/status/c8jqu9ow696opevf?svg=true)](https://ci.appveyor.com/project/laborleben/pixel-clusterizer) [![Coverage Status](https://coveralls.io/repos/github/SiLab-Bonn/pixel_clusterizer/badge.svg?branch=master)](https://coveralls.io/github/SiLab-Bonn/pixel_clusterizer?branch=master) +## Intended Use + Pixel_clusterizer is an easy to use pixel hit clusterizer for Python. It clusters hits connected to unique event numbers in space and time. -The hits have to be defined as a numpy recarray. The array has to have the following fields: -- event_number -- frame -- column -- row -- charge - -or a mapping of the names has to be provided. The data type does not matter. - -The result of the clustering is the hit array extended by the following fields: -- cluster_ID -- is_seed -- cluster_size -- n_cluster - -A new array with cluster information is also created created and has the following fields: -- event_number -- ID -- size -- charge -- seed_column -- seed_row -- mean_column -- mean_row - -# Installation +The hits must be provided in a numpy recarray. The array must contain the following columns ("fields"): +- ```event_number``` +- ```frame``` +- ```column``` +- ```row``` +- ```charge``` + +If the column names are different, a mapping of the names to the default names can be provided. The data type of each column can vary and is not fixed. The ```column```/```row``` values can be either indices (integer) or positions (float). ```Charge``` can be either integer or float. + +After clustering, two new arrays are returned: +1. The cluster hits array is the hits array extended by the following columns: + - ```cluster_ID``` + - ```is_seed``` + - ```cluster_size``` + - ```n_cluster``` +2. The cluster array contains in each row the information about a single cluster. It has the following columns: + - ```event_number``` + - ```ID``` + - ```n_hits``` + - ```charge``` + - ```seed_column``` + - ```seed_row``` + - ```mean_column``` + - ```mean_row``` + +## Installation + +Python 2.7 or Python 3 or higher must be used. There are many ways to install Python, though we recommend using [Anaconda Python](https://www.anaconda.com/distribution/) or [Miniconda](https://docs.conda.io/en/latest/miniconda.html). + +### Prerequisites + +The following packages are required: +``` +numpy numba>=0.24.0 +``` -The stable code is hosted on PyPI and can be installed by typing: +### Installation of pixel_clusterizer +The stable code is hosted on PyPI and can be installed by typing: +``` pip install pixel_clusterizer +``` + +For developer, clone the pixel_clusterizer git repository and use the following command to install pixel_clusterizer: +``` +pip install -e . +``` + +For testing the basic functionality of pixel_clusterizer, execute the following command: +``` +nosetests pixel_clusterizer +``` -# Usage +## Usage ``` import numpy as np @@ -47,9 +71,4 @@ cr = clusterizer.HitClusterizer() # Initialize clusterizer hits_clustered, cluster = cr.cluster_hits(hits) # Cluster hits ``` -Also take a look at the example folder! - -# Test installation -``` -nosetests pixel_clusterizer -``` +Also please have a look at the ```examples``` folder! From ecc9584581cca27f18acfb4ee995336adbb1e40a Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Tue, 24 Sep 2019 10:12:19 +0200 Subject: [PATCH 17/28] ENH: allow safe casting --- pixel_clusterizer/clusterizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pixel_clusterizer/clusterizer.py b/pixel_clusterizer/clusterizer.py index 48bc796..7cf9875 100644 --- a/pixel_clusterizer/clusterizer.py +++ b/pixel_clusterizer/clusterizer.py @@ -470,7 +470,7 @@ def _check_struct_compatibility(self, hits): continue if key not in hits.dtype.names: raise TypeError('Required hit field "%s" not found.' % key) - if self._cluster_hits.dtype[mapped_key] != hits.dtype[key]: + if self._cluster_hits.dtype[mapped_key] != hits.dtype[key] and not np.can_cast(hits.dtype[key], self._cluster_hits.dtype[mapped_key]): raise TypeError('The dtype for hit data field "%s" does not match. Got/expected: %s/%s.' % (key, hits.dtype[key], self._cluster_hits.dtype[mapped_key])) additional_hit_fields = set(hits.dtype.names) - set([key for key, val in self._cluster_hits_descr]) if additional_hit_fields: From 8f3293ae23dfe99338133149f0d673c7a7257ff7 Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Tue, 24 Sep 2019 10:56:30 +0200 Subject: [PATCH 18/28] MAINT: use default hit dtype --- README.md | 2 +- pixel_clusterizer/clusterizer.py | 11 ++++++----- pixel_clusterizer/testing/test_clusterizer.py | 8 ++------ pixel_clusterizer/testing/test_data_types.py | 8 ++------ pixel_clusterizer/testing/test_hit_masking.py | 8 ++------ 5 files changed, 13 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index f0e247f..c570b27 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ import numpy as np from pixel_clusterizer import clusterizer -hits = np.ones(shape=(3, ), dtype=clusterizer.hit_data_type) # Create some data with std. hit data type +hits = np.ones(shape=(3, ), dtype=clusterizer.default_hit_data_type) # Create some data with std. hit data type cr = clusterizer.HitClusterizer() # Initialize clusterizer diff --git a/pixel_clusterizer/clusterizer.py b/pixel_clusterizer/clusterizer.py index 7cf9875..a9ac55e 100644 --- a/pixel_clusterizer/clusterizer.py +++ b/pixel_clusterizer/clusterizer.py @@ -4,11 +4,12 @@ import numpy as np -hit_data_type = np.dtype([('event_number', ' Date: Tue, 24 Sep 2019 10:59:34 +0200 Subject: [PATCH 19/28] ENH: raise exception for dtype float16 which is not supported by numba --- pixel_clusterizer/clusterizer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pixel_clusterizer/clusterizer.py b/pixel_clusterizer/clusterizer.py index a9ac55e..1b6c472 100644 --- a/pixel_clusterizer/clusterizer.py +++ b/pixel_clusterizer/clusterizer.py @@ -471,6 +471,8 @@ def _check_struct_compatibility(self, hits): continue if key not in hits.dtype.names: raise TypeError('Required hit field "%s" not found.' % key) + if not self.pure_python and hits.dtype[key] == np.float16: + raise TypeError('The dtype float16 for hit data filed "%s" is not supported.' % (key,)) if self._cluster_hits.dtype[mapped_key] != hits.dtype[key] and not np.can_cast(hits.dtype[key], self._cluster_hits.dtype[mapped_key]): raise TypeError('The dtype for hit data field "%s" does not match. Got/expected: %s/%s.' % (key, hits.dtype[key], self._cluster_hits.dtype[mapped_key])) additional_hit_fields = set(hits.dtype.names) - set([key for key, val in self._cluster_hits_descr]) From 4c1b0f33e25a5ebb036998fa39dc5fc76cfd6208 Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Tue, 24 Sep 2019 11:18:51 +0200 Subject: [PATCH 20/28] MAINT: update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c570b27..34620d6 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ hits = np.ones(shape=(3, ), dtype=clusterizer.default_hit_data_type) # Create s cr = clusterizer.HitClusterizer() # Initialize clusterizer -hits_clustered, cluster = cr.cluster_hits(hits) # Cluster hits +cluster_hits, clusters = cr.cluster_hits(hits) # Cluster hits ``` Also please have a look at the ```examples``` folder! From 9e9182b87c87cba1c2619070af2d7df423424d4c Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Tue, 24 Sep 2019 11:21:39 +0200 Subject: [PATCH 21/28] MAINT: code cleanup --- pixel_clusterizer/testing/test_clusterizer.py | 25 ++-------- pixel_clusterizer/testing/test_hit_masking.py | 48 ++++--------------- 2 files changed, 12 insertions(+), 61 deletions(-) diff --git a/pixel_clusterizer/testing/test_clusterizer.py b/pixel_clusterizer/testing/test_clusterizer.py index 95167a7..9be0627 100644 --- a/pixel_clusterizer/testing/test_clusterizer.py +++ b/pixel_clusterizer/testing/test_clusterizer.py @@ -137,12 +137,7 @@ def test_cluster_algorithm(self): # Basic functionality checks clusterizer = HitClusterizer(pure_python=self.pure_python, charge_weighted_clustering=True) # Create some fake data - hits = np.ones(shape=(4, ), dtype=np.dtype([ - ('event_number', ' Date: Tue, 24 Sep 2019 11:45:16 +0200 Subject: [PATCH 22/28] ENH: change default data type (frame -> u2, charge -> f4) --- README.md | 4 +- pixel_clusterizer/clusterizer.py | 81 ++++++++------- pixel_clusterizer/testing/test_clusterizer.py | 74 +++++++------- pixel_clusterizer/testing/test_data_types.py | 98 ++++++++++++++----- pixel_clusterizer/testing/test_hit_masking.py | 84 ++++++++-------- 5 files changed, 196 insertions(+), 145 deletions(-) diff --git a/README.md b/README.md index 34620d6..f93c530 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ The hits must be provided in a numpy recarray. The array must contain the follow - ```row``` - ```charge``` -If the column names are different, a mapping of the names to the default names can be provided. The data type of each column can vary and is not fixed. The ```column```/```row``` values can be either indices (integer) or positions (float). ```Charge``` can be either integer or float. +If the column names are different, a mapping of the names to the default names can be provided. The data type of each column can vary and is not fixed. The ```column```/```row``` values can be either indices (integer, default) or positions (float). ```Charge``` can be either integer or float (default). After clustering, two new arrays are returned: 1. The cluster hits array is the hits array extended by the following columns: @@ -64,7 +64,7 @@ import numpy as np from pixel_clusterizer import clusterizer -hits = np.ones(shape=(3, ), dtype=clusterizer.default_hit_data_type) # Create some data with std. hit data type +hits = np.ones(shape=(3, ), dtype=clusterizer.default_hit_dtype) # Create some data with std. hit data type cr = clusterizer.HitClusterizer() # Initialize clusterizer diff --git a/pixel_clusterizer/clusterizer.py b/pixel_clusterizer/clusterizer.py index 1b6c472..68ead0d 100644 --- a/pixel_clusterizer/clusterizer.py +++ b/pixel_clusterizer/clusterizer.py @@ -1,15 +1,42 @@ import logging import os +from operator import itemgetter import numpy as np +default_hit_descr = [ + ('event_number', ' Date: Tue, 24 Sep 2019 12:30:47 +0200 Subject: [PATCH 23/28] MAINT: code cleanup --- README.md | 2 +- pixel_clusterizer/__init__.py | 4 + pixel_clusterizer/clusterizer.py | 4 +- pixel_clusterizer/testing/test_clusterizer.py | 609 +++++------- pixel_clusterizer/testing/test_data_types.py | 744 ++++++-------- pixel_clusterizer/testing/test_hit_masking.py | 940 +++++++----------- 6 files changed, 919 insertions(+), 1384 deletions(-) diff --git a/README.md b/README.md index f93c530..a458e36 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ import numpy as np from pixel_clusterizer import clusterizer -hits = np.ones(shape=(3, ), dtype=clusterizer.default_hit_dtype) # Create some data with std. hit data type +hits = np.ones(shape=(3, ), dtype=clusterizer.default_hits_dtype) # Create some data with std. hit data type cr = clusterizer.HitClusterizer() # Initialize clusterizer diff --git a/pixel_clusterizer/__init__.py b/pixel_clusterizer/__init__.py index db3fcc7..b9d6a15 100644 --- a/pixel_clusterizer/__init__.py +++ b/pixel_clusterizer/__init__.py @@ -1,3 +1,7 @@ # http://stackoverflow.com/questions/17583443/what-is-the-correct-way-to-share-package-version-with-setup-py-and-the-package from pkg_resources import get_distribution +from pixel_clusterizer.clusterizer import HitClusterizer, default_hits_descr, default_hits_dtype, default_cluster_hits_descr, default_cluster_hits_dtype, default_clusters_descr, default_clusters_dtype + + __version__ = get_distribution('pixel_clusterizer').version +_all_ = ["HitClusterizer", "default_hits_dtype", "default_cluster_hits_descr", "default_cluster_hits_dtype", "default_clusters_descr", "default_clusters_dtype"] diff --git a/pixel_clusterizer/clusterizer.py b/pixel_clusterizer/clusterizer.py index 68ead0d..b435581 100644 --- a/pixel_clusterizer/clusterizer.py +++ b/pixel_clusterizer/clusterizer.py @@ -4,14 +4,14 @@ import numpy as np -default_hit_descr = [ +default_hits_descr = [ ('event_number', ' 29 hits['event_number'] = 20 clusterizer.set_max_hit_charge(29) # only add hits with charge <= 30 cluster_hits, clusters = clusterizer.cluster_hits(hits) # cluster hits # Check cluster - expected_cluster_result = np.zeros(shape=(1, ), dtype=np.dtype([('event_number', ' Date: Tue, 24 Sep 2019 13:42:45 +0200 Subject: [PATCH 24/28] ENH: additional sanity checks for the event_number column --- pixel_clusterizer/clusterizer.py | 16 +++++++++------- pixel_clusterizer/testing/test_clusterizer.py | 6 ++++++ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/pixel_clusterizer/clusterizer.py b/pixel_clusterizer/clusterizer.py index b435581..697b9d2 100644 --- a/pixel_clusterizer/clusterizer.py +++ b/pixel_clusterizer/clusterizer.py @@ -361,6 +361,14 @@ def cluster_hits(self, hits, noisy_pixels=None, disabled_pixels=None): if cluster_hits_field_name in self._cluster_hits.dtype.fields: self._cluster_hits[cluster_hits_field_name][:n_hits] = hits[field_name] + # Check if event number is increasing. Otherwise give a warning message. + if self._last_event_number is not None and hits.shape[0] != 0 and self._cluster_hits["event_number"][0] <= self._last_event_number: + logging.warning('The event number does not increase with successive chunks.') + if hits.shape[0] > 1 and not np.all((self._cluster_hits["event_number"][1:n_hits] - self._cluster_hits["event_number"][:n_hits - 1]) >= 0): + raise RuntimeError('Some values in column "%s" decrease.' % (self._hit_fields_mapping["event_number"],)) + if self._cluster_hits.shape[0] != 0: + self._last_event_number = self._cluster_hits[-1]["event_number"] + noisy_pixels_array = np.array([]) if noisy_pixels is None else np.array(noisy_pixels) if noisy_pixels_array.shape[0] != 0: noisy_pixels_max_range = np.array([max(0, np.max(noisy_pixels_array[:, 0])), max(0, np.max(noisy_pixels_array[:, 1]))]) @@ -386,12 +394,6 @@ def cluster_hits(self, hits, noisy_pixels=None, disabled_pixels=None): # disabled_pixels = np.recarray(disabled_pixels_array.shape[0], dtype=mask_dtype) # disabled_pixels[:] = [(item[0], item[1]) for item in disabled_pixels_array] - # Check if event number is increasing. Otherwise give a warning message. - if self._last_event_number is not None and self._cluster_hits.shape[0] != 0 and self._cluster_hits[0]["event_number"] == self._last_event_number: - logging.warning('Event number not increasing.') - if self._cluster_hits.shape[0] != 0: - self._last_event_number = self._cluster_hits[-1]["event_number"] - n_clusters = self.cluster_functions._cluster_hits( # Set n_clusters to new size hits=self._cluster_hits[:n_hits], clusters=self._clusters[:n_hits], @@ -474,4 +476,4 @@ def _check_struct_compatibility(self, hits): raise TypeError('The dtype for hit data field "%s" does not match. Got/expected: %s/%s.' % (key, hits.dtype[key], self._cluster_hits.dtype[mapped_key])) additional_hit_fields = set(hits.dtype.names) - set([key for key, val in self._cluster_hits_descr]) if additional_hit_fields: - logging.warning('Found additional hit fields: %s' % ", ".join(additional_hit_fields)) + logging.warning('Found additional column: %s' % ", ".join(additional_hit_fields)) diff --git a/pixel_clusterizer/testing/test_clusterizer.py b/pixel_clusterizer/testing/test_clusterizer.py index 68638b3..94b2c09 100644 --- a/pixel_clusterizer/testing/test_clusterizer.py +++ b/pixel_clusterizer/testing/test_clusterizer.py @@ -63,6 +63,12 @@ def test_exceptions(self): 'frame': 'frame'} clusterizer = HitClusterizer(hit_fields=hit_mapping, hit_dtype=hit_dtype_new, pure_python=self.pure_python) _, _ = clusterizer.cluster_hits(np.array([], dtype=hit_dtype_new)) + # TEST 4 Set custom and correct hit mapping, decrease event_number + hits = np.ones(shape=(2, ), dtype=hit_dtype_new) + hits[0]['column'], hits[0]['row'], hits[0]['charge'], hits[0]['not_defined'] = 17, 36, 30, 19 + hits[1]['column'], hits[1]['row'], hits[1]['charge'], hits[1]['not_defined'] = 18, 36, 6, 18 + with self.assertRaises(RuntimeError): + _, _ = clusterizer.cluster_hits(hits) def test_cluster_algorithm(self): # Basic functionality checks # Initialize Clusterizer with default arguments From 0b67b3de5d6a4219d3ca32bf056b39e9d9898190 Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Tue, 24 Sep 2019 14:38:09 +0200 Subject: [PATCH 25/28] ENH: adding function to add hit field --- pixel_clusterizer/clusterizer.py | 29 +++++++++---- pixel_clusterizer/testing/test_data_types.py | 44 +++++++++++++++++++- 2 files changed, 64 insertions(+), 9 deletions(-) diff --git a/pixel_clusterizer/clusterizer.py b/pixel_clusterizer/clusterizer.py index 697b9d2..d1a3e8c 100644 --- a/pixel_clusterizer/clusterizer.py +++ b/pixel_clusterizer/clusterizer.py @@ -119,7 +119,7 @@ def initialized(self): def _init_arrays(self, size=0): if self.initialized: self._cluster_hits = np.zeros(shape=(size, ), dtype=np.dtype(self._cluster_hits_descr)) - self._clusters = np.zeros(shape=(size, ), dtype=np.dtype(self._cluster_descr)) + self._clusters = np.zeros(shape=(size, ), dtype=np.dtype(self._clusters_descr)) self._assigned_hit_array = np.zeros(shape=(size, ), dtype=np.bool) self._cluster_hit_indices = np.full(shape=(size, ), fill_value=-1, dtype=np_int_type_chooser(size)) @@ -228,13 +228,26 @@ def set_cluster_dtype(self, cluster_dtype): cluster_dtype = np.dtype([]) else: cluster_dtype = np.dtype(cluster_dtype) - cluster_descr = cluster_dtype.descr + clusters_descr = cluster_dtype.descr for dtype_name, dtype in self._default_clusters_descr: if self._cluster_fields_mapping[dtype_name] not in cluster_dtype.fields: - cluster_descr.append((dtype_name, dtype)) + clusters_descr.append((dtype_name, dtype)) - self._cluster_descr = cluster_descr + self._clusters_descr = clusters_descr + self._init_arrays(size=0) + + def add_hit_field(self, description): + ''' Adds a field or a list of fields to the cluster result array. Has to be defined as a numpy dtype entry, e.g.: ('parameter', ' Date: Tue, 24 Sep 2019 14:41:01 +0200 Subject: [PATCH 26/28] MAINT: update description and set python_requires parameter --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index fcfaf05..1f15d8c 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ setup( name='pixel_clusterizer', version=version, - description='A fast, generic, and easy to use clusterizer to cluster hits of a pixel matrix in Python. The clustering happens with numba on numpy arrays to increase the speed.', + description='A fast, generic, and easy to use clusterizer to cluster hits of a pixel matrix in Python.', url='https://github.com/SiLab-Bonn/pixel_clusterizer', license='GNU LESSER GENERAL PUBLIC LICENSE Version 2.1', long_description='', @@ -25,5 +25,6 @@ include_package_data=True, # accept all data files and directories matched by MANIFEST.in or found in source control package_data={'': ['README.*', 'VERSION'], 'docs': ['*'], 'examples': ['*']}, keywords=['cluster', 'clusterizer', 'pixel'], + python_requires='>=2.7', platforms='any' ) From fb632eeff7fcbe2f29ddd3902a3e01cc96e2c01e Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Tue, 24 Sep 2019 14:41:23 +0200 Subject: [PATCH 27/28] PRJ: bump version to 3.1.5 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1f15d8c..19e45ff 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ #!/usr/bin/env python from setuptools import setup, find_packages # This setup relies on setuptools since distutils is insufficient and badly hacked code -version = '3.1.5.dev0' +version = '3.1.5' author = 'David-Leon Pohl, Jens Janssen' author_email = 'pohl@physik.uni-bonn.de, janssen@physik.uni-bonn.de' From a66b7d3b89c5249e224e3e24ea2e978599124da5 Mon Sep 17 00:00:00 2001 From: Jens Janssen Date: Tue, 24 Sep 2019 14:46:34 +0200 Subject: [PATCH 28/28] MAINT: update README --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index a458e36..b01f02b 100644 --- a/README.md +++ b/README.md @@ -72,3 +72,7 @@ cluster_hits, clusters = cr.cluster_hits(hits) # Cluster hits ``` Also please have a look at the ```examples``` folder! + +## Support + +Please use GitHub's [issue tracker](https://github.com/SiLab-Bonn/pixel_clusterizer/issues) for bug reports/feature requests/questions.