-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathhistcn.m
138 lines (126 loc) · 4.19 KB
/
histcn.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
function [count edges mid loc] = histcn(X, varargin)
% function [count edges mid loc] = histcn(X, edge1, edge2, ..., edgeN)
%
% Purpose: compute n-dimensional histogram
%
% INPUT
% - X: is (M x N) array, represents M data points in R^N
% - edgek: are the bin vectors on dimension k, k=1...N.
% If it is a scalar (Nk), the bins will be the linear subdivision of
% the data on the range [min(X(:,k)), max(X(:,k))] into Nk
% sub-intervals
% If it's empty, a default of 32 subdivions will be used
%
% OUTPUT
% - count: n-dimensional array count of X on the bins, i.e.,
% count(i1,i2,...,iN) = cardinal of X such that
% edge1(i1) <= X(:,i1) < edge1(i1)+1 and
% ...
% edgeN(iN) <= X(:,iN) < edgeN(iN)+1
% - edges: (1 x N) cell, each provides the effective edges used in the
% respective dimension
% - mid: (1 x N) cell, provides the mid points of the cellpatch used in
% the respective dimension
% - loc: (M x N) array, index location of X in the bins. Points have out
% of range coordinates will have zero at the corresponding dimension.
%
% DATA ACCUMULATE SYNTAX:
% [ ... ] = histcn(..., 'AccumData', VAL);
% where VAL is M x 1 array. Each VAL(k) corresponds to position X(k,:)
% will be accumulated in the cell containing X. The accumulate result
% is returned in COUNT.
% NOTE: Calling without 'AccumData' is similar to having VAL = ones(M,1)
%
% [ ... ] = histcn(..., 'AccumData', VAL, 'FUN', FUN);
% applies the function FUN to each subset of elements of VAL. FUN is
% a function that accepts a column vector and returns
% a numeric, logical, or char scalar, or a scalar cell. A has the same class
% as the values returned by FUN. FUN is @SUM by default. Specify FUN as []
% for the default behavior.
%
% Usage examples:
% M = 1e5;
% N = 3;
% X = randn(M,N);
% [N edges mid loc] = histcn(X);
% imagesc(mid{1:2},N(:,:,ceil(end/2)))
%
% % Compute the mean on rectangular patch from scattered data
% DataSize = 1e5;
% Lat = rand(1,DataSize)*180;
% Lon = rand(1,DataSize)*360;
% Data = randn(1,DataSize);
% lat_edge = 0:1:180;
% lon_edge = 0:1:360;
% meanData = histcn([Lat(:) Lon(:)], lat_edge, lon_edge, 'AccumData', Data, 'Fun', @mean);
%
% See also: HIST, ACCUMARRAY
%
% Bruno Luong: <brunoluong@yahoo.com>
% Last update: 25/August/2011
if ndims(X)>2
error('histcn: X requires to be an (M x N) array of M points in R^N');
end
DEFAULT_NBINS = 32;
AccumData = [];
Fun = {};
% Looks for where optional parameters start
% For now only 'AccumData' is valid
split = find(cellfun('isclass', varargin, 'char'), 1, 'first');
if ~isempty(split)
for k = split:2:length(varargin)
if strcmpi(varargin{k},'AccumData')
AccumData = varargin{k+1}(:);
elseif strcmpi(varargin{k},'Fun')
Fun = varargin(k+1); % 1x1 cell
end
end
varargin = varargin(1:split-1);
end
% Get the dimension
nd = size(X,2);
edges = varargin;
if nd<length(edges)
nd = length(edges); % wasting CPU time warranty
else
edges(end+1:nd) = {DEFAULT_NBINS};
end
% Allocation of array loc: index location of X in the bins
loc = zeros(size(X));
sz = zeros(1,nd);
% Loop in the dimension
for d=1:nd
ed = edges{d};
Xd = X(:,d);
if isempty(ed)
ed = DEFAULT_NBINS;
end
if isscalar(ed) % automatic linear subdivision
ed = linspace(min(Xd),max(Xd),ed+1);
end
edges{d} = ed;
% Call histc on this dimension
[dummy loc(:,d)] = histc(Xd, ed, 1);
% Use sz(d) = length(ed); to create consistent number of bins
sz(d) = length(ed)-1;
end % for-loop
% Clean
clear dummy
% This is need for seldome points that hit the right border
sz = max([sz; max(loc,[],1)]);
% Compute the mid points
mid = cellfun(@(e) 0.5*(e(1:end-1)+e(2:end)), edges, ...
'UniformOutput', false);
% Count for points where all coordinates are falling in a corresponding
% bins
if nd==1
sz = [sz 1]; % Matlab doesn't know what is one-dimensional array!
end
hasdata = all(loc>0, 2);
if ~isempty(AccumData)
count = accumarray(loc(hasdata,:), AccumData(hasdata), sz, Fun{:});
else
count = accumarray(loc(hasdata,:), 1, sz);
end
return
end % histcn