GeographicLib  1.48
NearestNeighbor.hpp
Go to the documentation of this file.
1 /**
2  * \file NearestNeighbor.hpp
3  * \brief Header for GeographicLib::NearestNeighbor class
4  *
5  * Copyright (c) Charles Karney (2016) <charles@karney.com> and licensed under
6  * the MIT/X11 License. For more information, see
7  * https://geographiclib.sourceforge.io/
8  **********************************************************************/
9 
10 #if !defined(GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP)
11 #define GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP 1
12 
13 #include <algorithm> // for nth_element, max_element, etc.
14 #include <vector>
15 #include <queue> // for priority_queue
16 #include <utility> // for swap + pair
17 #include <cstring>
18 #include <limits>
19 #include <cmath>
20 #include <iostream>
21 #include <sstream>
22 // Only for GEOGRAPHICLIB_STATIC_ASSERT and GeographicLib::GeographicErr
24 
25 #if !defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION)
26 #define GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION 0
27 #endif
28 #if GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
29 #include <boost/serialization/nvp.hpp>
30 #include <boost/serialization/split_member.hpp>
31 #include <boost/serialization/array.hpp>
32 #include <boost/serialization/vector.hpp>
33 #endif
34 
35 #if defined(_MSC_VER)
36 // Squelch warnings about constant conditional expressions
37 # pragma warning (push)
38 # pragma warning (disable: 4127)
39 #endif
40 
41 namespace GeographicLib {
42 
43  /**
44  * \brief Nearest-neighbor calculations
45  *
46  * This class solves the nearest-neighbor problm using a vantage-point tree
47  * as described in \ref nearest.
48  *
49  * This class is templated so that it can handle arbitrary metric spaces as
50  * follows:
51  *
52  * @tparam dist_t the type used for measuring distances; it can be a real or
53  * signed integer type; in typical geodetic applications, \e dist_t might be
54  * <code>double</code>.
55  * @tparam pos_t the type for specifying the positions of points; geodetic
56  * application might bundled the latitude and longitude into a
57  * <code>std::pair<dist_t, dist_t></code>.
58  * @tparam distfun_t the type of a function object which takes takes two
59  * positions (of type \e pos_t) and returns the distance (of type \e
60  * dist_t); in geodetic applications, this might be a class which is
61  * constructed with a Geodesic object and which implements a member
62  * function with a signature <code>dist_t operator() (const pos_t&, const
63  * pos_t&) const</code>, which returns the geodesic distance between two
64  * points.
65  *
66  * \note The distance measure must satisfy the triangle inequality, \f$
67  * d(a,c) \le d(a,b) + d(b,c) \f$ for all points \e a, \e b, \e c. The
68  * geodesic distance (given by Geodesic::Inverse) does, while the great
69  * ellipse distance and the rhumb line distance <i>do not</i>. If you use
70  * the ordinary Euclidean distance, i.e., \f$ \sqrt{(x_a-x_b)^2 +
71  * (y_a-y_b)^2} \f$ for two dimensions, don't be tempted to leave out the
72  * square root in the interests of "efficiency"; the squared distance does
73  * not satisfy the triangle inequality!
74  *
75  * This is a "header-only" implementation and, as such, depends in a minimal
76  * way on the rest of GeographicLib (the only dependency is through the use
77  * of GEOGRAPHICLIB_STATIC_ASSERT and GeographicLib::GeographicErr for
78  * handling run-time and compile-time exceptions). Therefore, it is easy to
79  * extract this class from the rest of GeographicLib and use it as a
80  * stand-alone facility.
81  *
82  * The \e dist_t type must support numeric_limits queries (specifically:
83  * is_signed, is_integer, max(), digits, and digits10).
84  *
85  * The NearestNeighbor object is constructed with a vector of points (type \e
86  * pos_t) and a distance function (type \e distfun_t). However the object
87  * does \e not store the points. When querying the object with Search(),
88  * it's necessary to supply the same vector of points and the same distance
89  * function.
90  *
91  * There's no capability in this implementation to add or remove points from
92  * the set. Instead Initialize() should be called to re-initialize the
93  * object with the modified vector of points.
94  *
95  * Because of the overhead in constructing a NearestNeighbor object for a
96  * large set of points, functions Save() and Load() are provided to save the
97  * object to an external file. operator<<(), operator>>() and <a
98  * href="http://www.boost.org/libs/serialization/doc"> Boost
99  * serialization</a> can also be used to save and restore a NearestNeighbor
100  * object. This is illustrated in the example.
101  *
102  * Example of use:
103  * \include example-NearestNeighbor.cpp
104  **********************************************************************/
105  template <typename dist_t, typename pos_t, class distfun_t>
107  // For tracking changes to the I/O format
108  static const int version = 1;
109  // This is what we get "free"; but if sizeof(dist_t) = 1 (unlikely), allow 4
110  // slots (and this accommodates the default value bucket = 4).
111  static const int maxbucket = (2 + ((4 * sizeof(dist_t)) / sizeof(int) >= 2 ?
112  (4 * sizeof(dist_t)) / sizeof(int) : 2));
113  public:
114 
115  /**
116  * Default constructor for NearestNeighbor.
117  *
118  * This is equivalent to specifying an empty set of points.
119  **********************************************************************/
120  NearestNeighbor() : _numpoints(0), _bucket(0), _cost(0) {}
121 
122  /**
123  * Constructor for NearestNeighbor.
124  *
125  * @param[in] pts a vector of points to include in the set.
126  * @param[in] dist the distance function object.
127  * @param[in] bucket the size of the buckets at the leaf nodes; this must
128  * lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)] (default 4).
129  * @exception GeographicErr if the value of \e bucket is out of bounds or
130  * the size of \e pts is too big for an int.
131  * @exception std::bad_alloc if memory for the tree can't be allocated.
132  *
133  * The distances computed by \e dist must satisfy the standard metric
134  * conditions. If not, the results are undefined. Neither the data in \e
135  * pts nor the query points should contain NaNs or infinities because such
136  * data violates the metric conditions.
137  *
138  * \e pts may contain coincident points (i.e., the distance between them
139  * vanishes); these are treated as distinct.
140  *
141  * The choice of \e bucket is a tradeoff between space and efficiency. A
142  * larger \e bucket decreases the size of the NearestNeighbor object which
143  * scales as pts.size() / max(1, bucket) and reduces the number of distance
144  * calculations to construct the object by log2(bucket) * pts.size().
145  * However each search then requires about bucket additional distance
146  * calculations.
147  *
148  * \warning The same arguments \e pts and \e dist must be provided
149  * to the Search() function.
150  **********************************************************************/
151  NearestNeighbor(const std::vector<pos_t>& pts, const distfun_t& dist,
152  int bucket = 4) {
153  Initialize(pts, dist, bucket);
154  }
155 
156  /**
157  * Initialize or re-initialize NearestNeighbor.
158  *
159  * @param[in] pts a vector of points to include in the tree.
160  * @param[in] dist the distance function object.
161  * @param[in] bucket the size of the buckets at the leaf nodes; this must
162  * lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)] (default 4).
163  * @exception GeographicErr if the value of \e bucket is out of bounds or
164  * the size of \e pts is too big for an int.
165  * @exception std::bad_alloc if memory for the tree can't be allocated.
166  *
167  * See also the documentation on the constructor.
168  *
169  * If an exception is thrown, the state of the NearestNeighbor is
170  * unchanged.
171  **********************************************************************/
172  void Initialize(const std::vector<pos_t>& pts, const distfun_t& dist,
173  int bucket = 4) {
174  GEOGRAPHICLIB_STATIC_ASSERT(std::numeric_limits<dist_t>::is_signed,
175  "dist_t must be a signed type");
176  if (!( 0 <= bucket && bucket <= maxbucket ))
178  ("bucket must lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)]");
179  if (pts.size() > size_t(std::numeric_limits<int>::max()))
180  throw GeographicLib::GeographicErr("pts array too big");
181  // the pair contains distance+id
182  std::vector<item> ids(pts.size());
183  for (int k = int(ids.size()); k--;)
184  ids[k] = std::make_pair(dist_t(0), k);
185  int cost = 0;
186  std::vector<Node> tree;
187  init(pts, dist, bucket, tree, ids, cost,
188  0, int(ids.size()), int(ids.size()/2));
189  _tree.swap(tree);
190  _numpoints = int(pts.size());
191  _bucket = bucket;
192  _mc = _sc = 0;
193  _cost = cost; _c1 = _k = _cmax = 0;
194  _cmin = std::numeric_limits<int>::max();
195  }
196 
197  /**
198  * Search the NearestNeighbor.
199  *
200  * @param[in] pts the vector of points used for initialization.
201  * @param[in] dist the distance function object used for initialization.
202  * @param[in] query the query point.
203  * @param[out] ind a vector of indices to the closest points found.
204  * @param[in] k the number of points to search for (default = 1).
205  * @param[in] maxdist only return points with distances of \e maxdist or
206  * less from \e query (default is the maximum \e dist_t).
207  * @param[in] mindist only return points with distances of more than
208  * \e mindist from \e query (default = &minus;1).
209  * @param[in] exhaustive whether to do an exhaustive search (default true).
210  * @param[in] tol the tolerance on the results (default 0).
211  * @return the distance to the closest point found (&minus;1 if no points
212  * are found).
213  *
214  * The indices returned in \e ind are sorted by distance from \e query
215  * (closest first).
216  *
217  * The simplest invocation is with just the 4 non-optional arguments. This
218  * returns the closest distance and the index to the closest point in
219  * <i>ind</i><sub>0</sub>. If there are several points equally close, then
220  * <i>ind</i><sub>0</sub> gives the index of an arbirary one of them. If
221  * there's no closest point (because the set of points is empty), then \e
222  * ind is empty and &minus;1 is returned.
223  *
224  * With \e exhaustive = true and \e tol = 0 (their default values), this
225  * finds the indices of \e k closest neighbors to \e query whose distances
226  * to \e query are in (\e mindist, \e maxdist]. If \e mindist and \e
227  * maxdist have their default values, then these bounds have no effect. If
228  * \e query is one of the points in the tree, then set \e mindist = 0 to
229  * prevent this point (and other coincident points) from being returned.
230  *
231  * If \e exhaustive = false, exit as soon as \e k results satisfying the
232  * distance criteria are found. If less than \e k results are returned
233  * then the search was exhaustive even if \e exhaustive = false.
234  *
235  * If \e tol is positive, do an approximate search; in this case the
236  * results are to be interpreted as follows: if the <i>k</i>'th distance is
237  * \e dk, then all results with distances less than or equal \e dk &minus;
238  * \e tol are correct; all others are suspect &mdash; there may be other
239  * closer results with distances greater or equal to \e dk &minus; \e tol.
240  * If less than \e k results are found, then the search is exact.
241  *
242  * \e mindist should be used to exclude a "small" neighborhood of the query
243  * point (relative to the average spacing of the data). If \e mindist is
244  * large, the efficiency of the search deteriorates.
245  *
246  * \note Only the shortest distance is returned (as as the function value).
247  * The distances to other points (indexed by <i>ind</i><sub><i>j</i></sub>
248  * for \e j > 0) can be found by invoking \e dist again.
249  *
250  * \warning The arguments \e pts and \e dist must be identical to
251  * those used to initialize the NearestNeighbor; if not, the behavior of
252  * this function is undefined (however, if the size of \e pts is wrong,
253  * this function exits with no results returned).
254  **********************************************************************/
255  dist_t Search(const std::vector<pos_t>& pts, const distfun_t& dist,
256  const pos_t& query,
257  std::vector<int>& ind,
258  int k = 1,
259  dist_t maxdist = std::numeric_limits<dist_t>::max(),
260  dist_t mindist = -1,
261  bool exhaustive = true,
262  dist_t tol = 0) const {
263  std::priority_queue<item> results;
264  if (_numpoints > 0 && _numpoints == int(pts.size()) &&
265  k > 0 && maxdist > mindist) {
266  // distance to the kth closest point so far
267  dist_t tau = maxdist;
268  // first is negative of how far query is outside boundary of node
269  // +1 if on boundary or inside
270  // second is node index
271  std::priority_queue<item> todo;
272  todo.push(std::make_pair(dist_t(1), int(_tree.size()) - 1));
273  int c = 0;
274  while (!todo.empty()) {
275  int n = todo.top().second;
276  dist_t d = -todo.top().first;
277  todo.pop();
278  dist_t tau1 = tau - tol;
279  // compare tau and d again since tau may have become smaller.
280  if (!( n >= 0 && tau1 >= d )) continue;
281  const Node& current = _tree[n];
282  dist_t dst = 0; // to suppress warning about uninitialized variable
283  bool exitflag = false, leaf = current.index < 0;
284  for (int i = 0; i < (leaf ? _bucket : 1); ++i) {
285  int index = leaf ? current.leaves[i] : current.index;
286  if (index < 0) break;
287  dst = dist(pts[index], query);
288  ++c;
289 
290  if (dst > mindist && dst <= tau) {
291  if (int(results.size()) == k) results.pop();
292  results.push(std::make_pair(dst, index));
293  if (int(results.size()) == k) {
294  if (exhaustive)
295  tau = results.top().first;
296  else {
297  exitflag = true;
298  break;
299  }
300  if (tau <= tol) {
301  exitflag = true;
302  break;
303  }
304  }
305  }
306  }
307  if (exitflag) break;
308 
309  if (current.index < 0) continue;
310  tau1 = tau - tol;
311  for (int l = 0; l < 2; ++l) {
312  if (current.data.child[l] >= 0 &&
313  dst + current.data.upper[l] >= mindist) {
314  if (dst < current.data.lower[l]) {
315  d = current.data.lower[l] - dst;
316  if (tau1 >= d)
317  todo.push(std::make_pair(-d, current.data.child[l]));
318  } else if (dst > current.data.upper[l]) {
319  d = dst - current.data.upper[l];
320  if (tau1 >= d)
321  todo.push(std::make_pair(-d, current.data.child[l]));
322  } else
323  todo.push(std::make_pair(dist_t(1), current.data.child[l]));
324  }
325  }
326  }
327  ++_k;
328  _c1 += c;
329  double omc = _mc;
330  _mc += (c - omc) / _k;
331  _sc += (c - omc) * (c - _mc);
332  if (c > _cmax) _cmax = c;
333  if (c < _cmin) _cmin = c;
334  }
335 
336  dist_t d = -1;
337  ind.resize(results.size());
338 
339  for (int i = int(ind.size()); i--;) {
340  ind[i] = int(results.top().second);
341  if (i == 0) d = results.top().first;
342  results.pop();
343  }
344  return d;
345 
346  }
347 
348  /**
349  * @return the total number of points in the set.
350  **********************************************************************/
351  int NumPoints() const { return _numpoints; }
352 
353  /**
354  * Write the object to an I/O stream.
355  *
356  * @param[in,out] os the stream to write to.
357  * @param[in] bin if true (the default) save in binary mode.
358  * @exception std::bad_alloc if memory for the string representation of the
359  * object can't be allocated.
360  *
361  * The counters tracking the statistics of searches are not saved; however
362  * the initializtion cost is saved. The format of the binary saves is \e
363  * not portable.
364  *
365  * \note <a href="http://www.boost.org/libs/serialization/doc">
366  * Boost serialization</a> can also be used to save and restore a
367  * NearestNeighbor object. This requires that the
368  * GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION macro be defined.
369  **********************************************************************/
370  void Save(std::ostream& os, bool bin = true) const {
371  int realspec = std::numeric_limits<dist_t>::digits *
372  (std::numeric_limits<dist_t>::is_integer ? -1 : 1);
373  if (bin) {
374  char id[] = "NearestNeighbor_";
375  os.write(id, 16);
376  int buf[6];
377  buf[0] = version;
378  buf[1] = realspec;
379  buf[2] = _bucket;
380  buf[3] = _numpoints;
381  buf[4] = int(_tree.size());
382  buf[5] = _cost;
383  os.write(reinterpret_cast<const char *>(buf), 6 * sizeof(int));
384  for (int i = 0; i < int(_tree.size()); ++i) {
385  const Node& node = _tree[i];
386  os.write(reinterpret_cast<const char *>(&node.index), sizeof(int));
387  if (node.index >= 0) {
388  os.write(reinterpret_cast<const char *>(node.data.lower),
389  2 * sizeof(dist_t));
390  os.write(reinterpret_cast<const char *>(node.data.upper),
391  2 * sizeof(dist_t));
392  os.write(reinterpret_cast<const char *>(node.data.child),
393  2 * sizeof(int));
394  } else {
395  os.write(reinterpret_cast<const char *>(node.leaves),
396  _bucket * sizeof(int));
397  }
398  }
399  } else {
400  std::stringstream ostring;
401  // Ensure enough precision for type dist_t. With C++11, max_digits10
402  // can be used instead.
403  if (!std::numeric_limits<dist_t>::is_integer)
404  ostring.precision(std::numeric_limits<dist_t>::digits10 + 2);
405  ostring << version << " " << realspec << " " << _bucket << " "
406  << _numpoints << " " << _tree.size() << " " << _cost;
407  for (int i = 0; i < int(_tree.size()); ++i) {
408  const Node& node = _tree[i];
409  ostring << "\n" << node.index;
410  if (node.index >= 0) {
411  for (int l = 0; l < 2; ++l)
412  ostring << " " << node.data.lower[l] << " " << node.data.upper[l]
413  << " " << node.data.child[l];
414  } else {
415  for (int l = 0; l < _bucket; ++l)
416  ostring << " " << node.leaves[l];
417  }
418  }
419  os << ostring.str();
420  }
421  }
422 
423  /**
424  * Read the object from an I/O stream.
425  *
426  * @param[in,out] is the stream to read from
427  * @param[in] bin if true (the default) load in binary mode.
428  * @exception GeographicErr if the state read from \e is is illegal.
429  * @exception std::bad_alloc if memory for the tree can't be allocated.
430  *
431  * The counters tracking the statistics of searches are reset by this
432  * operation. Binary data must have been saved on a machine with the same
433  * architecture. If an exception is thrown, the state of the
434  * NearestNeighbor is unchanged.
435  *
436  * \note <a href="http://www.boost.org/libs/serialization/doc">
437  * Boost serialization</a> can also be used to save and restore a
438  * NearestNeighbor object. This requires that the
439  * GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION macro be defined.
440  *
441  * \warning The same arguments \e pts and \e dist used for
442  * initialization must be provided to the Search() function.
443  **********************************************************************/
444  void Load(std::istream& is, bool bin = true) {
445  int version1, realspec, bucket, numpoints, treesize, cost;
446  if (bin) {
447  char id[17];
448  is.read(id, 16);
449  id[16] = '\0';
450  if (!(std::strcmp(id, "NearestNeighbor_") == 0))
451  throw GeographicLib::GeographicErr("Bad ID");
452  is.read(reinterpret_cast<char *>(&version1), sizeof(int));
453  is.read(reinterpret_cast<char *>(&realspec), sizeof(int));
454  is.read(reinterpret_cast<char *>(&bucket), sizeof(int));
455  is.read(reinterpret_cast<char *>(&numpoints), sizeof(int));
456  is.read(reinterpret_cast<char *>(&treesize), sizeof(int));
457  is.read(reinterpret_cast<char *>(&cost), sizeof(int));
458  } else {
459  if (!( is >> version1 >> realspec >> bucket >> numpoints >> treesize
460  >> cost ))
461  throw GeographicLib::GeographicErr("Bad header");
462  }
463  if (!( version1 == version ))
464  throw GeographicLib::GeographicErr("Incompatible version");
465  if (!( realspec == std::numeric_limits<dist_t>::digits *
466  (std::numeric_limits<dist_t>::is_integer ? -1 : 1) ))
467  throw GeographicLib::GeographicErr("Different dist_t types");
468  if (!( 0 <= bucket && bucket <= maxbucket ))
469  throw GeographicLib::GeographicErr("Bad bucket size");
470  if (!( 0 <= treesize && treesize <= numpoints ))
471  throw GeographicLib::GeographicErr("Bad number of points or tree size");
472  std::vector<Node> tree;
473  tree.reserve(treesize);
474  for (int i = 0; i < treesize; ++i) {
475  Node node;
476  if (bin) {
477  is.read(reinterpret_cast<char *>(&node.index), sizeof(int));
478  if (node.index >= 0) {
479  is.read(reinterpret_cast<char *>(node.data.lower),
480  2 * sizeof(dist_t));
481  is.read(reinterpret_cast<char *>(node.data.upper),
482  2 * sizeof(dist_t));
483  is.read(reinterpret_cast<char *>(node.data.child),
484  2 * sizeof(int));
485  } else {
486  is.read(reinterpret_cast<char *>(node.leaves),
487  bucket * sizeof(int));
488  for (int l = bucket; l < maxbucket; ++l)
489  node.leaves[l] = 0;
490  }
491  } else {
492  if (!( is >> node.index ))
493  throw GeographicLib::GeographicErr("Bad index");
494  if (node.index >= 0) {
495  for (int l = 0; l < 2; ++l) {
496  if (!( is >> node.data.lower[l] >> node.data.upper[l]
497  >> node.data.child[l] ))
498  throw GeographicLib::GeographicErr("Bad node data");
499  }
500  } else {
501  // Must be at least one valid leaf followed by a sequence end
502  // markers (-1).
503  for (int l = 0; l < bucket; ++l) {
504  if (!( is >> node.leaves[l] ))
505  throw GeographicLib::GeographicErr("Bad leaf data");
506  }
507  for (int l = bucket; l < maxbucket; ++l)
508  node.leaves[l] = 0;
509  }
510  }
511  node.Check(numpoints, treesize, bucket);
512  tree.push_back(node);
513  }
514  _tree.swap(tree);
515  _numpoints = numpoints;
516  _bucket = bucket;
517  _mc = _sc = 0;
518  _cost = cost; _c1 = _k = _cmax = 0;
519  _cmin = std::numeric_limits<int>::max();
520  }
521 
522  /**
523  * Write the object to stream \e os as text.
524  *
525  * @param[in,out] os the output stream.
526  * @param[in] t the NearestNeighbor object to be saved.
527  * @exception std::bad_alloc if memory for the string representation of the
528  * object can't be allocated.
529  **********************************************************************/
530  friend std::ostream& operator<<(std::ostream& os, const NearestNeighbor& t)
531  { t.Save(os, false); return os; }
532 
533  /**
534  * Read the object from stream \e is as text.
535  *
536  * @param[in,out] is the input stream.
537  * @param[out] t the NearestNeighbor object to be loaded.
538  * @exception GeographicErr if the state read from \e is is illegal.
539  * @exception std::bad_alloc if memory for the tree can't be allocated.
540  **********************************************************************/
541  friend std::istream& operator>>(std::istream& is, NearestNeighbor& t)
542  { t.Load(is, false); return is; }
543 
544  /**
545  * Swap with another NearestNeighbor object.
546  *
547  * @param[in,out] t the NearestNeighbor object to swap with.
548  **********************************************************************/
550  std::swap(_numpoints, t._numpoints);
551  std::swap(_bucket, t._bucket);
552  std::swap(_cost, t._cost);
553  _tree.swap(t._tree);
554  std::swap(_mc, t._mc);
555  std::swap(_sc, t._sc);
556  std::swap(_c1, t._c1);
557  std::swap(_k, t._k);
558  std::swap(_cmin, t._cmin);
559  std::swap(_cmax, t._cmax);
560  }
561 
562  /**
563  * The accumulated statistics on the searches so far.
564  *
565  * @param[out] setupcost the cost of initializing the NearestNeighbor.
566  * @param[out] numsearches the number of calls to Search().
567  * @param[out] searchcost the total cost of the calls to Search().
568  * @param[out] mincost the minimum cost of a Search().
569  * @param[out] maxcost the maximum cost of a Search().
570  * @param[out] mean the mean cost of a Search().
571  * @param[out] sd the standard deviation in the cost of a Search().
572  *
573  * Here "cost" measures the number of distance calculations needed. Note
574  * that the accumulation of statistics is \e not thread safe.
575  **********************************************************************/
576  void Statistics(int& setupcost, int& numsearches, int& searchcost,
577  int& mincost, int& maxcost,
578  double& mean, double& sd) const {
579  setupcost = _cost; numsearches = _k; searchcost = _c1;
580  mincost = _cmin; maxcost = _cmax;
581  mean = _mc; sd = std::sqrt(_sc / (_k - 1));
582  }
583 
584  /**
585  * Reset the counters for the accumulated statistics on the searches so
586  * far.
587  **********************************************************************/
588  void ResetStatistics() const {
589  _mc = _sc = 0;
590  _c1 = _k = _cmax = 0;
591  _cmin = std::numeric_limits<int>::max();
592  }
593 
594  private:
595  // Package up a dist_t and an int. We will want to sort on the dist_t so
596  // put it first.
597  typedef std::pair<dist_t, int> item;
598  // \cond SKIP
599  class Node {
600  public:
601  struct bounds {
602  dist_t lower[2], upper[2]; // bounds on inner/outer distances
603  int child[2];
604  };
605  union {
606  bounds data;
607  int leaves[maxbucket];
608  };
609  int index;
610 
611  Node()
612  : index(-1)
613  {
614  for (int i = 0; i < 2; ++i) {
615  data.lower[i] = data.upper[i] = 0;
616  data.child[i] = -1;
617  }
618  }
619 
620  // Sanity check on a Node
621  void Check(int numpoints, int treesize, int bucket) const {
622  if (!( -1 <= index && index < numpoints ))
623  throw GeographicLib::GeographicErr("Bad index");
624  if (index >= 0) {
625  if (!( -1 <= data.child[0] && data.child[0] < treesize &&
626  -1 <= data.child[1] && data.child[1] < treesize ))
627  throw GeographicLib::GeographicErr("Bad child pointers");
628  if (!( 0 <= data.lower[0] && data.lower[0] <= data.upper[0] &&
629  data.upper[0] <= data.lower[1] &&
630  data.lower[1] <= data.upper[1] ))
631  throw GeographicLib::GeographicErr("Bad bounds");
632  } else {
633  // Must be at least one valid leaf followed by a sequence end markers
634  // (-1).
635  bool start = true;
636  for (int l = 0; l < bucket; ++l) {
637  if (!( (start ?
638  ((l == 0 ? 0 : -1) <= leaves[l] && leaves[l] < numpoints) :
639  leaves[l] == -1) ))
640  throw GeographicLib::GeographicErr("Bad leaf data");
641  start = leaves[l] >= 0;
642  }
643  for (int l = bucket; l < maxbucket; ++l) {
644  if (leaves[l] != 0)
645  throw GeographicLib::GeographicErr("Bad leaf data");
646  }
647  }
648  }
649 
650 #if GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
651  friend class boost::serialization::access;
652  template<class Archive> void save(Archive& ar, const unsigned int) const {
653  ar & boost::serialization::make_nvp("index", index);
654  if (index < 0)
655  ar & boost::serialization::make_nvp("leaves", leaves);
656  else
657  ar & boost::serialization::make_nvp("lower", data.lower)
658  & boost::serialization::make_nvp("upper", data.upper)
659  & boost::serialization::make_nvp("child", data.child);
660  }
661  template<class Archive> void load(Archive& ar, const unsigned int) {
662  ar & boost::serialization::make_nvp("index", index);
663  if (index < 0)
664  ar & boost::serialization::make_nvp("leaves", leaves);
665  else
666  ar & boost::serialization::make_nvp("lower", data.lower)
667  & boost::serialization::make_nvp("upper", data.upper)
668  & boost::serialization::make_nvp("child", data.child);
669  }
670  template<class Archive>
671  void serialize(Archive& ar, const unsigned int file_version)
672  { boost::serialization::split_member(ar, *this, file_version); }
673 #endif
674  };
675  // \endcond
676 #if GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
677  friend class boost::serialization::access;
678  template<class Archive> void save(Archive& ar, const unsigned) const {
679  int realspec = std::numeric_limits<dist_t>::digits *
680  (std::numeric_limits<dist_t>::is_integer ? -1 : 1);
681  // Need to use version1, otherwise load error in debug mode on Linux:
682  // undefined reference to GeographicLib::NearestNeighbor<...>::version.
683  int version1 = version;
684  ar & boost::serialization::make_nvp("version", version1)
685  & boost::serialization::make_nvp("realspec", realspec)
686  & boost::serialization::make_nvp("bucket", _bucket)
687  & boost::serialization::make_nvp("numpoints", _numpoints)
688  & boost::serialization::make_nvp("cost", _cost)
689  & boost::serialization::make_nvp("tree", _tree);
690  }
691  template<class Archive> void load(Archive& ar, const unsigned) {
692  int version1, realspec, bucket, numpoints, cost;
693  ar & boost::serialization::make_nvp("version", version1);
694  if (version1 != version)
695  throw GeographicLib::GeographicErr("Incompatible version");
696  std::vector<Node> tree;
697  ar & boost::serialization::make_nvp("realspec", realspec);
698  if (!( realspec == std::numeric_limits<dist_t>::digits *
699  (std::numeric_limits<dist_t>::is_integer ? -1 : 1) ))
700  throw GeographicLib::GeographicErr("Different dist_t types");
701  ar & boost::serialization::make_nvp("bucket", bucket);
702  if (!( 0 <= bucket && bucket <= maxbucket ))
703  throw GeographicLib::GeographicErr("Bad bucket size");
704  ar & boost::serialization::make_nvp("numpoints", numpoints)
705  & boost::serialization::make_nvp("cost", cost)
706  & boost::serialization::make_nvp("tree", tree);
707  if (!( 0 <= int(tree.size()) && int(tree.size()) <= numpoints ))
708  throw GeographicLib::GeographicErr("Bad number of points or tree size");
709  for (int i = 0; i < int(tree.size()); ++i)
710  tree[i].Check(numpoints, int(tree.size()), bucket);
711  _tree.swap(tree);
712  _numpoints = numpoints;
713  _bucket = bucket;
714  _mc = _sc = 0;
715  _cost = cost; _c1 = _k = _cmax = 0;
716  _cmin = std::numeric_limits<int>::max();
717  }
718  template<class Archive>
719  void serialize(Archive& ar, const unsigned int file_version)
720  { boost::serialization::split_member(ar, *this, file_version); }
721 #endif
722 
723  int _numpoints, _bucket, _cost;
724  std::vector<Node> _tree;
725  // Counters to track stastistics on the cost of searches
726  mutable double _mc, _sc;
727  mutable int _c1, _k, _cmin, _cmax;
728 
729  int init(const std::vector<pos_t>& pts, const distfun_t& dist, int bucket,
730  std::vector<Node>& tree, std::vector<item>& ids, int& cost,
731  int l, int u, int vp) {
732 
733  if (u == l)
734  return -1;
735  Node node;
736 
737  if (u - l > (bucket == 0 ? 1 : bucket)) {
738 
739  // choose a vantage point and move it to the start
740  int i = vp;
741  std::swap(ids[l], ids[i]);
742 
743  int m = (u + l + 1) / 2;
744 
745  for (int k = l + 1; k < u; ++k) {
746  ids[k].first = dist(pts[ids[l].second], pts[ids[k].second]);
747  ++cost;
748  }
749  // partition around the median distance
750  std::nth_element(ids.begin() + l + 1, ids.begin() + m, ids.begin() + u);
751  node.index = ids[l].second;
752  if (m > l + 1) { // node.child[0] is possibly empty
753  typename std::vector<item>::iterator
754  t = std::min_element(ids.begin() + l + 1, ids.begin() + m);
755  node.data.lower[0] = t->first;
756  t = std::max_element(ids.begin() + l + 1, ids.begin() + m);
757  node.data.upper[0] = t->first;
758  // Use point with max distance as vantage point; this point act as a
759  // "corner" point and leads to a good partition.
760  node.data.child[0] = init(pts, dist, bucket, tree, ids, cost,
761  l + 1, m, int(t - ids.begin()));
762  }
763  typename std::vector<item>::iterator
764  t = std::max_element(ids.begin() + m, ids.begin() + u);
765  node.data.lower[1] = ids[m].first;
766  node.data.upper[1] = t->first;
767  // Use point with max distance as vantage point here too
768  node.data.child[1] = init(pts, dist, bucket, tree, ids, cost,
769  m, u, int(t - ids.begin()));
770  } else {
771  if (bucket == 0)
772  node.index = ids[l].second;
773  else {
774  node.index = -1;
775  // Sort the bucket entries so that the tree is independent of the
776  // implementation of nth_element.
777  std::sort(ids.begin() + l, ids.begin() + u);
778  for (int i = l; i < u; ++i)
779  node.leaves[i-l] = ids[i].second;
780  for (int i = u - l; i < bucket; ++i)
781  node.leaves[i] = -1;
782  for (int i = bucket; i < maxbucket; ++i)
783  node.leaves[i] = 0;
784  }
785  }
786 
787  tree.push_back(node);
788  return int(tree.size()) - 1;
789  }
790 
791  };
792 
793 } // namespace GeographicLib
794 
795 namespace std {
796 
797  /**
798  * Swap two GeographicLib::NearestNeighbor objects.
799  *
800  * @tparam dist_t the type used for measuring distances.
801  * @tparam pos_t the type for specifying the positions of points.
802  * @tparam distfun_t the type for a function object which calculates distances
803  * between points.
804  * @param[in,out] a the first GeographicLib::NearestNeighbor to swap.
805  * @param[in,out] b the second GeographicLib::NearestNeighbor to swap.
806  **********************************************************************/
807  template <typename dist_t, typename pos_t, class distfun_t>
810  a.swap(b);
811  }
812 
813 } // namespace std
814 
815 #if defined(_MSC_VER)
816 # pragma warning (pop)
817 #endif
818 
819 #endif // GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP
void Statistics(int &setupcost, int &numsearches, int &searchcost, int &mincost, int &maxcost, double &mean, double &sd) const
NearestNeighbor(const std::vector< pos_t > &pts, const distfun_t &dist, int bucket=4)
void Load(std::istream &is, bool bin=true)
dist_t Search(const std::vector< pos_t > &pts, const distfun_t &dist, const pos_t &query, std::vector< int > &ind, int k=1, dist_t maxdist=std::numeric_limits< dist_t >::max(), dist_t mindist=-1, bool exhaustive=true, dist_t tol=0) const
Namespace for GeographicLib.
Definition: Accumulator.cpp:12
friend std::ostream & operator<<(std::ostream &os, const NearestNeighbor &t)
void swap(GeographicLib::NearestNeighbor< dist_t, pos_t, distfun_t > &a, GeographicLib::NearestNeighbor< dist_t, pos_t, distfun_t > &b)
friend std::istream & operator>>(std::istream &is, NearestNeighbor &t)
Exception handling for GeographicLib.
Definition: Constants.hpp:389
Header for GeographicLib::Constants class.
void Initialize(const std::vector< pos_t > &pts, const distfun_t &dist, int bucket=4)
void swap(NearestNeighbor &t)
Nearest-neighbor calculations.
void Save(std::ostream &os, bool bin=true) const