casacore
Sort.h
Go to the documentation of this file.
1 //# Sort.h: Sort objects on one or more keys
2 //# Copyright (C) 1995,1996,1997,1998,1999,2000,2001
3 //# Associated Universities, Inc. Washington DC, USA.
4 //#
5 //# This library is free software; you can redistribute it and/or modify it
6 //# under the terms of the GNU Library General Public License as published by
7 //# the Free Software Foundation; either version 2 of the License, or (at your
8 //# option) any later version.
9 //#
10 //# This library is distributed in the hope that it will be useful, but WITHOUT
11 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 //# License for more details.
14 //#
15 //# You should have received a copy of the GNU Library General Public License
16 //# along with this library; if not, write to the Free Software Foundation,
17 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 //#
19 //# Correspondence concerning AIPS++ should be addressed as follows:
20 //# Internet email: aips2-request@nrao.edu.
21 //# Postal address: AIPS++ Project Office
22 //# National Radio Astronomy Observatory
23 //# 520 Edgemont Road
24 //# Charlottesville, VA 22903-2475 USA
25 //#
26 //#
27 //# $Id$
28 
29 #ifndef CASA_SORT_H
30 #define CASA_SORT_H
31 
32 //# Includes
33 #include <casacore/casa/aips.h>
34 #include <casacore/casa/Arrays/ArrayFwd.h>
35 #include <casacore/casa/Containers/Block.h>
36 #include <casacore/casa/Utilities/ValType.h>
37 #include <casacore/casa/Utilities/Compare.h>
38 #include <casacore/casa/Utilities/CountedPtr.h>
39 
40 namespace casacore { //# NAMESPACE CASACORE - BEGIN
41 
42 // <summary> Define a Sort key </summary>
43 // <use visibility=local>
44 // <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1">
45 // </reviewed>
46 
47 // <synopsis>
48 // SortKey is a helper class for the <linkto class=Sort>Sort</linkto> class.
49 // It holds the following information about a sort key:
50 // <ul>
51 // <li> Address of the data array containing the sort key;
52 // <li> A CountedPtr to a comparison object to be used
53 // (of a class derived from the abstract base class BaseCompare).
54 // <li> Increment for the next data point -- this lets you specify a
55 // stride for keys embedded in a struct;
56 // <li> Sort order -- ascending or descending;
57 // </ul>
58 // </synopsis>
59 
60 class SortKey
61 {
62 public:
63  friend class Sort;
64 
65  // Define a sort key in a given data array using the indicated
66  // comparison object, stride and sort order.
67  SortKey (const void* data, const CountedPtr<BaseCompare>&,
68  uInt increment, int order);
69 
70  // Copy constructor (copy semantics).
71  SortKey (const SortKey&);
72 
74 
75  // Assignment (copy semantics).
77 
78  // Try if GenSort can be used for this single key.
79  // If it succeeds, it returns the resulting number of elements.
80  // Otherwise it returns 0.
81  uInt tryGenSort (Vector<uInt>& indexVector, uInt nrrec, int opt) const;
82  uInt64 tryGenSort (Vector<uInt64>& indexVector, uInt64 nrrec, int opt) const;
83 
84  // Get the sort order.
85  int order() const
86  { return order_p; }
87 
88 protected:
89  // sort order; -1 = ascending, 1 = descending
90  int order_p;
91  // address of first data point
92  const void* data_p;
93  // increment for next data point
95  // comparison object; use CountedPtr for memory management
97  // comparison object; use raw pointer for performance
99 };
100 
101 
102 
103 // <summary> Sort on one or more keys, ascending and/or descending </summary>
104 // <use visibility=export>
105 // <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1">
106 // </reviewed>
107 
108 // <synopsis>
109 // <src>Sort</src> lets you sort data on one or more keys in a mix of
110 // <src>Sort::ascending</src> and <src>Sort::descending</src> order.
111 // Duplicates can be skipped by giving the option
112 // <src>Sort::NoDuplicates</src>. Only in this case the number of output
113 // elements can be different from the number of input elements.
114 // <br>The <src>unique</src> function offers another way of getting
115 // unique values.
116 // <p>
117 // Class <src>Sort</src> does not sort the data themselves, but
118 // returns an index to them. This gives more flexibility and
119 // allows the sort to be stable; but it is slower.
120 // <br>Very fast sorting of the data themselves can be done with the
121 // functions in class <linkto class=GenSort>GenSort</linkto>.
122 // If sorting on a single key with a standard data type is done,
123 // Sort will use GenSortIndirect to speed up the sort.
124 // <br>
125 // Four sort algorithms are provided:
126 // <DL>
127 // <DT> <src>Sort::ParSort</src>
128 // <DD> The parallel merge sort is the fastest if it can use multiple threads.
129 // For a single thread it has O(n*log(n)) behaviour, but is slower
130 // than quicksort.
131 // A drawback is that it needs an extra index array to do the merge.
132 // <DT> <src>Sort::InsSort</src>
133 // <DD> Insertion sort has O(n*n) behaviour, thus is very slow for large
134 // arrays. However, it is the fastest method for small arrays
135 // (< 50 elements) and for arrays already (almost) in the right order.
136 // <DT> <src>Sort::QuickSort</src>
137 // <DD> Care has been taken to solve the well-known quicksort problems
138 // like "array already in order" or "many equal elements". The
139 // behaviour is O(n*log(n)) in all the cases tested, even in
140 // degenerated cases where the SUN Solaris qsort algorithm is O(n*n).
141 // <DT> <src>Sort::HeapSort</src>
142 // <DD> Heapsort has O(n*log(n)) behaviour. Its speed is lower than
143 // that of QuickSort, so QuickSort is the default algorithm.
144 // </DL>
145 // The default is to use QuickSort for small arrays or if only a single
146 // thread can be used. Otherwise ParSort is the default.
147 //
148 // All sort algorithms are <em>stable</em>, which means that the original
149 // order is kept when keys are equal.
150 //
151 // The sort is a four step process:
152 // <ol>
153 // <li> Construct the <src>Sort</src> object.
154 // <li> Define the sort keys. The function <src>sortKey</src> must be
155 // called for each sort key (the most significant one first).
156 // The comparison object can be passed in directly, or a
157 // <linkto group="DataType.h#DataType">basic data type</linkto>
158 // can be given. In the latter case the appropriate ObjCompare
159 // comparison object will be created.
160 // <li> Sort the data. The function <src>sort</src> returns an index
161 // array, which is allocated when needed.
162 // <li> Destruct the <src>Sort</src> object (usually done automatically)
163 // and delete the index array.
164 // </ol>
165 // The data can be in a single array of structs, in separate arrays, or
166 // in a mix of those. Of course, all arrays must have the same length.
167 // The data can be passed to the <src>Sort</src> constructor and/or to the
168 // <src>sortKey</src> function. If passed to the <src>Sort</src> constructor,
169 // the offset of the data item in the data array must be given to
170 // <src>sortKey</src>.
171 // </synopsis>
172 
173 // <example>
174 // In the first example we sort the data contained in two "parallel"
175 // arrays, <src>idata</src> and <src>ddata</src>, both of length
176 // <src>nrdata</src>.
177 // <srcblock>
178 // Sort sort;
179 // sort.sortKey (idata, TpInt); // define 1st sort key
180 // sort.sortKey (ddata, TpDouble,0,Sort::Descending); // define 2nd sort key
181 // Vector<uInt> inx;
182 // sort.sort (inx, nrdata);
183 // for (uInt i=0; i<nrdata; i++) { // show sorted data
184 // cout << idata[inx[i]] << " " << ddata[inx[i]] << endl;
185 // }
186 // </srcblock>
187 // Now <src>nr</src> contains the nr of records (=<src>nrdata</src>)
188 // and <src>inx</src> an array of (sorted) indices.
189 //
190 // In the second example we sort the data stored in an array of structs
191 // on the double (ascending) and the string (descending). We can pass
192 // the data to the <src>Sort</src> constructor, and the offsets of the
193 // struct elements to the <src>sortKey</src> function.
194 // <srcblock>
195 // struct Ts {
196 // String as;
197 // double ad;
198 // }
199 // Vector<uInt> inx;
200 // Sort sort (tsarr, sizeof(Ts));
201 // sort.sortKey ((char*)&tsarr[0].ad - (char*)tsarr, TpDouble);
202 // sort.sortKey ((char*)&tsarr[0].as - (char*)tsarr, TpString,
203 // Sort::Descending);
204 // sort.sort (inx, nrts);
205 // </srcblock>
206 // Note that the first argument in function <src>sortKey</src> gives
207 // the offset of the variable in the struct.
208 //
209 // Alternatively, and probably slightly easier, we could pass the data
210 // to the <src>sortKey</src> function and use an increment:
211 // <srcblock>
212 // struct Ts {
213 // String as;
214 // double ad;
215 // }
216 // Vector<uInt> inx;
217 // Sort sort;
218 // sort.sortKey (&tsarr[0].ad, TpDouble, sizeof(Ts));
219 // sort.sortKey (&tsarr[0].as, TpString, sizeof(Ts), Sort::Descending);
220 // sort.sort (inx, nrts);
221 // </srcblock>
222 //
223 // Finally, we could provide a comparison object for the struct.
224 // <srcblock>
225 // struct Ts {
226 // String as;
227 // double ad;
228 // }
229 // class MyCompare: public BaseCompare {
230 // virtual int comp (const void* val1, const void* val2) const
231 // {
232 // const Ts& t1 = *(Ts*)val1;
233 // const Ts& t2 = *(Ts*)val2;
234 // if (t1.ad < t2.ad) return -1;
235 // if (t1.ad > t2.ad) return 1;
236 // if (t1.as < t2.as) return 1; // string must be descending
237 // if (t1.as > t2.as) return -1;
238 // return 0;
239 // }
240 // };
241 // Vector<uInt> inx;
242 // Sort sort;
243 // sort.sortKey (tsarr, compareTs, sizeof(Ts));
244 // sort.sort (inx, nrts);
245 // </srcblock>
246 
247 class Sort
248 {
249 public:
250  // Enumerate the sort options:
251  enum Option {DefaultSort=0, // ParSort, but QuickSort for small array
252  HeapSort=1, // use Heapsort algorithm
253  InsSort=2, // use insertion sort algorithm
254  QuickSort=4, // use Quicksort algorithm
255  ParSort=8, // use parallel merge sort algorithm
256  NoDuplicates=16}; // skip data with equal sort keys
257 
258  // Enumerate the sort order:
259  enum Order {Ascending=-1,
261 
262  // The default constructor can be used when the data is only passed
263  // in via function <src>sortKey</src>.
264  Sort();
265 
266  // Construct a Sort object for the given data array with elements
267  // of <src>elementSize</src> bytes. This data array will be used
268  // when an offset is given to the <src>sortKey</src> functions.
269  // You can still pass additional data arrays to the
270  // <src>sortKey</src> functions.
271  Sort (const void* data, uInt elementSize);
272 
273  // Copy constructor (copy semantics).
274  Sort (const Sort&);
275 
276  ~Sort();
277 
278  // Assignment (copy semantics).
279  Sort& operator= (const Sort&);
280 
281  // Define a sort key (the most significant key should be defined first).
282  // The key contains:
283  // <ul>
284  // <li> A pointer to the start of the data array. --- When structs are
285  // sorted on an element in the struct, the pointer must point to
286  // that element in the first struct.
287  // <li> A pointer to the comparison object to be used. --- The
288  // comparison object can be specified in two ways:
289  // <ul>
290  // <li> by giving a
291  // <linkto group="DataType.h#DataType">basic data type</linkto>,
292  // in which case the appropriate comparison object will be
293  // created automatically, or
294  // <li> by a CountedPtr of a comparison object.
295  // You may want to use the templated comparison classes
296  // <linkto class=ObjCompare>ObjCompare</linkto>(),
297  // but you are free to use any other class derived from BaseCompare
298  // that implements the <src>comp</src> function.
299  // </ul>
300  // <li> The increment from one data element to the next. --- When
301  // structs are sorted on an element in the struct, the increment
302  // should be the size of the struct. If the comparison object is
303  // automatically created using the data type specified, the default
304  // increment is the size of the data type.
305  // <li> The sort order. --- <src>Ascending</src> (default) or
306  // <src>Descending</src>;
307  // </ul>
308  //
309  // When the data array has been passed to the Sort constructor,
310  // the data pointer and the increment arguments can be replaced by a
311  // single argument: the offset of the key in each element of the array.
312  //
313  // <group>
314  void sortKey (const void* data, DataType, uInt increment = 0,
315  Order = Ascending);
316  void sortKey (const void* data, const CountedPtr<BaseCompare>&,
317  uInt increment, Order = Ascending);
318  void sortKey (uInt offset, DataType, Order = Ascending);
319  void sortKey (uInt offset, const CountedPtr<BaseCompare>&,
320  Order = Ascending);
321  // </group>
322 
323  // Sort the data array of <src>nrrec</src> records.
324  // The result is an array of indices giving the requested order.
325  // It returns the number of resulting records. The indices array
326  // is resized to that number.
327  // <br> By default it'll try if the faster GenSortIndirect can be used
328  // if a sort on a single key is used.
329  uInt sort (Vector<uInt>& indexVector, uInt nrrec,
330  int options = DefaultSort, Bool tryGenSort = True) const;
331  uInt64 sort (Vector<uInt64>& indexVector, uInt64 nrrec,
332  int options = DefaultSort, Bool tryGenSort = True) const;
333 
334  // Get all unique records in a sorted array. The array order is
335  // given in the indexVector (as possibly returned by the sort function).
336  // The default indexVector is 0..nrrec-1.
337  // The index of each first unique record is returned in the uniqueVector.
338  // They are indices in the supplied indexVector, so
339  // <src>data[indexVector(uniqueVector(i))]</src>
340  // is giving the i-th unique record.
341  // Note that the records indexed by <src>indexVector(uniqueVector(i))</src>
342  // till <src>indexVector(uniqueVector(i+1))</src> are all the same.
343  // <br>
344  // It returns the number of unique records. The unique array
345  // is resized to that number.
346  // <group>
347  uInt unique (Vector<uInt>& uniqueVector, uInt nrrec) const;
348  uInt unique (Vector<uInt>& uniqueVector,
349  const Vector<uInt>& indexVector) const;
350  uInt64 unique (Vector<uInt64>& uniqueVector, uInt64 nrrec) const;
351  uInt64 unique (Vector<uInt64>& uniqueVector,
352  const Vector<uInt64>& indexVector) const;
353  // </group>
354 
355 private:
356  template<typename T>
357  T doSort (Vector<T>& indexVector, T nrrec,
358  int options = DefaultSort, Bool tryGenSort = True) const;
359 
360  template <typename T>
361  T doUnique (Vector<T>& uniqueVector, T nrrec) const;
362  template <typename T>
363  T doUnique (Vector<T>& uniqueVector, const Vector<T>& indexVector) const;
364 
365  // Copy that Sort object to this.
366  void copy (const Sort& that);
367 
368  // Add a sort key giving a data type and stride or the sort key.
369  // <group>
370  void addKey (const void* data, DataType, uInt increment, int options);
371  void addKey (SortKey*);
372  // </group>
373 
374  // Do an insertion sort, optionally skipping duplicates.
375  // <group>
376  template<typename T>
377  T insSort (T nr, T* indices) const;
378  template<typename T>
379  T insSortNoDup (T nr, T* indices) const;
380  // </group>
381 
382  // Do a merge sort, if possible in parallel using OpenMP.
383  // Note that the env.var. OMP_NUM_TRHEADS sets the maximum nr of threads
384  // to use. It defaults to the number of cores.
385  template<typename T>
386  T parSort (int nthr, T nrrec, T* inx) const;
387  template<typename T>
388  void merge (T* inx, T* tmp, T size, T* index,
389  T nparts) const;
390 
391  // Do a quicksort, optionally skipping duplicates
392  // (qkSort is the actual quicksort function).
393  // <group>
394  template<typename T>
395  T quickSort (T nr, T* indices) const;
396  template<typename T>
397  T quickSortNoDup (T nr, T* indices) const;
398  template<typename T>
399  void qkSort (T nr, T* indices) const;
400  // </group>
401 
402  // Do a heapsort, optionally skipping duplicates.
403  // <group>
404  template<typename T>
405  T heapSort (T nr, T* indices) const;
406  template<typename T>
407  T heapSortNoDup (T nr, T* indices) const;
408  // </group>
409 
410  // Siftdown algorithm for heapsort.
411  template<typename T>
412  void siftDown (T low, T up, T* indices) const;
413 
414  // Compare the keys of 2 records.
415  template<typename T>
416  int compare (T index1, T index2) const;
417 
418  // Swap 2 indices.
419  template<typename T>
420  inline void swap (T index1, T index2, T* indices) const
421  {
422  T t = indices[index1];
423  indices[index1] = indices[index2];
424  indices[index2] = t;
425  }
426 
427  //# Data memebers
428  PtrBlock<SortKey*> keys_p; //# keys to sort on
429  uInt nrkey_p; //# #sort-keys
430  const void* data_p; //# pointer to data records
431  uInt size_p; //# size of data record
432  int order_p; //# -1=asc 0=mixed 1=desc
433 };
434 
435 
436 } //# NAMESPACE CASACORE - END
437 
438 #endif
abstract base class for comparing two objects
Definition: Compare.h:65
Referenced counted pointer for constant data.
Definition: CountedPtr.h:81
A drop-in replacement for Block<T*>.
Definition: Block.h:814
int order() const
Get the sort order.
Definition: Sort.h:85
uInt tryGenSort(Vector< uInt > &indexVector, uInt nrrec, int opt) const
Try if GenSort can be used for this single key.
BaseCompare * cmpObj_p
comparison object; use raw pointer for performance
Definition: Sort.h:98
SortKey(const void *data, const CountedPtr< BaseCompare > &, uInt increment, int order)
Define a sort key in a given data array using the indicated comparison object, stride and sort order.
CountedPtr< BaseCompare > ccmpObj_p
comparison object; use CountedPtr for memory management
Definition: Sort.h:96
uInt64 tryGenSort(Vector< uInt64 > &indexVector, uInt64 nrrec, int opt) const
SortKey(const SortKey &)
Copy constructor (copy semantics).
const void * data_p
address of first data point
Definition: Sort.h:92
int order_p
sort order; -1 = ascending, 1 = descending
Definition: Sort.h:90
uInt incr_p
increment for next data point
Definition: Sort.h:94
SortKey & operator=(const SortKey &)
Assignment (copy semantics).
Sort on one or more keys, ascending and/or descending.
Definition: Sort.h:248
uInt64 unique(Vector< uInt64 > &uniqueVector, const Vector< uInt64 > &indexVector) const
Option
Enumerate the sort options:
Definition: Sort.h:251
@ DefaultSort
Definition: Sort.h:251
@ NoDuplicates
Definition: Sort.h:256
T heapSortNoDup(T nr, T *indices) const
const void * data_p
Definition: Sort.h:430
void addKey(const void *data, DataType, uInt increment, int options)
Add a sort key giving a data type and stride or the sort key.
uInt64 sort(Vector< uInt64 > &indexVector, uInt64 nrrec, int options=DefaultSort, Bool tryGenSort=True) const
T parSort(int nthr, T nrrec, T *inx) const
Do a merge sort, if possible in parallel using OpenMP.
void sortKey(uInt offset, const CountedPtr< BaseCompare > &, Order=Ascending)
uInt size_p
Definition: Sort.h:431
void copy(const Sort &that)
Copy that Sort object to this.
uInt unique(Vector< uInt > &uniqueVector, uInt nrrec) const
Get all unique records in a sorted array.
T insSort(T nr, T *indices) const
Do an insertion sort, optionally skipping duplicates.
uInt unique(Vector< uInt > &uniqueVector, const Vector< uInt > &indexVector) const
T doSort(Vector< T > &indexVector, T nrrec, int options=DefaultSort, Bool tryGenSort=True) const
T quickSort(T nr, T *indices) const
Do a quicksort, optionally skipping duplicates (qkSort is the actual quicksort function).
void merge(T *inx, T *tmp, T size, T *index, T nparts) const
int compare(T index1, T index2) const
Compare the keys of 2 records.
T heapSort(T nr, T *indices) const
Do a heapsort, optionally skipping duplicates.
void sortKey(uInt offset, DataType, Order=Ascending)
void sortKey(const void *data, DataType, uInt increment=0, Order=Ascending)
Define a sort key (the most significant key should be defined first).
uInt64 unique(Vector< uInt64 > &uniqueVector, uInt64 nrrec) const
Order
Enumerate the sort order:
Definition: Sort.h:259
@ Descending
Definition: Sort.h:260
T quickSortNoDup(T nr, T *indices) const
Sort & operator=(const Sort &)
Assignment (copy semantics).
void swap(T index1, T index2, T *indices) const
Swap 2 indices.
Definition: Sort.h:420
uInt nrkey_p
Definition: Sort.h:429
T doUnique(Vector< T > &uniqueVector, const Vector< T > &indexVector) const
Sort(const void *data, uInt elementSize)
Construct a Sort object for the given data array with elements of elementSize bytes.
T doUnique(Vector< T > &uniqueVector, T nrrec) const
uInt sort(Vector< uInt > &indexVector, uInt nrrec, int options=DefaultSort, Bool tryGenSort=True) const
Sort the data array of nrrec records.
T insSortNoDup(T nr, T *indices) const
void addKey(SortKey *)
Sort()
The default constructor can be used when the data is only passed in via function sortKey.
PtrBlock< SortKey * > keys_p
Definition: Sort.h:428
Sort(const Sort &)
Copy constructor (copy semantics).
void qkSort(T nr, T *indices) const
void sortKey(const void *data, const CountedPtr< BaseCompare > &, uInt increment, Order=Ascending)
int order_p
Definition: Sort.h:432
void siftDown(T low, T up, T *indices) const
Siftdown algorithm for heapsort.
this file contains all the compiler specific defines
Definition: mainpage.dox:28
unsigned int uInt
Definition: aipstype.h:51
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
const Bool True
Definition: aipstype.h:43
unsigned long long uInt64
Definition: aipsxtype.h:39