ORTS

KCutil.cpp

Go to the documentation of this file.
00001 //----------------------------------------------------------------------
00002 //  File:       KCutil.h
00003 //  Programmer: David Mount
00004 //  Last modified:  03/27/02
00005 //  Description:    Utilities for kc-tree splitting rules
00006 //----------------------------------------------------------------------
00007 // Copyright (C) 2004-2005 David M. Mount and University of Maryland
00008 // All Rights Reserved.
00009 // 
00010 // This program is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU General Public License as published by
00012 // the Free Software Foundation; either version 2 of the License, or (at
00013 // your option) any later version.  See the file Copyright.txt in the
00014 // main directory.
00015 // 
00016 // The University of Maryland and the authors make no representations
00017 // about the suitability or fitness of this software for any purpose.
00018 // It is provided "as is" without express or implied warranty.
00019 //----------------------------------------------------------------------
00020 
00021 #include "KCutil.h"         // kc-utility declarations
00022 
00023 //----------------------------------------------------------------------
00024 //  NOTE: Virtually all point indexing is done through an
00025 //  index (i.e. permutation) array pidx.  Consequently,
00026 //  a reference to the d-th coordinate of the i-th point
00027 //  is pa[pidx[i]][d].  The macro PA(i,d) is a shorthand
00028 //  for this.
00029 //----------------------------------------------------------------------
00030                     // standard 2-d indirect indexing
00031 #define PA(i,d)     (pa[pidx[(i)]][(d)])
00032                     // accessing a single point
00033 #define PP(i)       (pa[pidx[(i)]])
00034                     // swap two points
00035 #define PASWAP(a,b) { int tmp = pidx[a];\
00036                     pidx[a] = pidx[b];\
00037                     pidx[b] = tmp; }
00038 
00039 //----------------------------------------------------------------------
00040 //  Constants
00041 //----------------------------------------------------------------------
00042 
00043 const double ERR = 0.001;       // a small value
00044 
00045 //----------------------------------------------------------------------
00046 //  kmEnclRect, kmEnclCube
00047 //  These utilities compute the smallest rectangle and cube enclosing
00048 //  a set of points, respectively.
00049 //----------------------------------------------------------------------
00050 
00051 void kmEnclRect(
00052     KMpointArray    pa,     // point array
00053     KMidxArray      pidx,       // point indices
00054     int         n,      // number of points
00055     int         dim,        // dimension
00056     KMorthRect      &bnds)      // bounding cube (returned)
00057 {
00058     for (int d = 0; d < dim; d++) { // find smallest enclosing rectangle
00059     KMcoord lo_bnd = PA(0,d);   // lower bound on dimension d
00060     KMcoord hi_bnd = PA(0,d);   // upper bound on dimension d
00061         for (int i = 0; i < n; i++) {
00062         if (PA(i,d) < lo_bnd) lo_bnd = PA(i,d);
00063         else if (PA(i,d) > hi_bnd) hi_bnd = PA(i,d);
00064     }
00065     bnds.lo[d] = lo_bnd;
00066     bnds.hi[d] = hi_bnd;
00067     }
00068 }
00069 
00070 //----------------------------------------------------------------------
00071 //  kmSpread - find spread along given dimension
00072 //  kmMinMax - find min and max coordinates along given dimension
00073 //  kmMaxSpread - find dimension of max spread
00074 //----------------------------------------------------------------------
00075 
00076 KMcoord kmSpread(       // compute point spread along dimension
00077     KMpointArray    pa,     // point array
00078     KMidxArray      pidx,       // point indices
00079     int         n,      // number of points
00080     int         d)      // dimension to check
00081 {
00082     KMcoord min = PA(0,d);      // compute max and min coords
00083     KMcoord max = PA(0,d);
00084     for (int i = 1; i < n; i++) {
00085     KMcoord c = PA(i,d);
00086     if (c < min) min = c;
00087     else if (c > max) max = c;
00088     }
00089     return (max - min);         // total spread is difference
00090 }
00091 
00092 void kmMinMax(          // compute min and max coordinates along dim
00093     KMpointArray    pa,     // point array
00094     KMidxArray      pidx,       // point indices
00095     int         n,      // number of points
00096     int         d,      // dimension to check
00097     KMcoord     &min,       // minimum value (returned)
00098     KMcoord     &max)       // maximum value (returned)
00099 {
00100     min = PA(0,d);          // compute max and min coords
00101     max = PA(0,d);
00102     for (int i = 1; i < n; i++) {
00103     KMcoord c = PA(i,d);
00104     if (c < min) min = c;
00105     else if (c > max) max = c;
00106     }
00107 }
00108 
00109 //----------------------------------------------------------------------
00110 //  kmPlaneSplit - split point array about a cutting plane
00111 //  Split the points in an array about a given plane along a
00112 //  given cutting dimension.  On exit, br1 and br2 are set so
00113 //  that:
00114 //  
00115 //      pa[ 0 ..br1-1] <  cv
00116 //      pa[br1..br2-1] == cv
00117 //      pa[br2.. n -1] >  cv
00118 //
00119 //  All indexing is done indirectly through the index array pidx.
00120 //----------------------------------------------------------------------
00121 
00122 void kmPlaneSplit(      // split points by a plane
00123     KMpointArray    pa,     // points to split
00124     KMidxArray      pidx,       // point indices
00125     int         n,      // number of points
00126     int         d,      // dimension along which to split
00127     KMcoord     cv,     // cutting value
00128     int         &br1,       // first break (values < cv)
00129     int         &br2)       // second break (values == cv)
00130 {
00131     int l = 0;
00132     int r = n-1;
00133     for(;;) {               // partition pa[0..n-1] about cv
00134     while (l < n && PA(l,d) < cv) l++;
00135     while (r >= 0 && PA(r,d) >= cv) r--;
00136     if (l > r) break;
00137     PASWAP(l,r);
00138     l++; r--;
00139     }
00140     br1 = l;            // now: pa[0..br1-1] < cv <= pa[br1..n-1]
00141     r = n-1;
00142     for(;;) {               // partition pa[br1..n-1] about cv
00143     while (l < n && PA(l,d) <= cv) l++;
00144     while (r >= br1 && PA(r,d) > cv) r--;
00145     if (l > r) break;
00146     PASWAP(l,r);
00147     l++; r--;
00148     }
00149     br2 = l;            // now: pa[br1..br2-1] == cv < pa[br2..n-1]
00150 }
00151 
00152 //----------------------------------------------------------------------
00153 //  sl_midpt_split - sliding midpoint splitting rule
00154 //
00155 //  This is a modification of midpt_split, which has the nonsensical
00156 //  name "sliding midpoint".  The idea is that we try to use the
00157 //  midpoint rule, by bisecting the longest side.  If there are
00158 //  ties, the dimension with the maximum spread is selected.  If,
00159 //  however, the midpoint split produces a trivial split (no points
00160 //  on one side of the splitting plane) then we slide the splitting
00161 //  (maintaining its orientation) until it produces a nontrivial
00162 //  split.  For example, if the splitting plane is along the x-axis,
00163 //  and all the data points have x-coordinate less than the x-bisector,
00164 //  then the split is taken along the maximum x-coordinate of the
00165 //  data points.
00166 //
00167 //  Intuitively, this rule cannot generate trivial splits, and
00168 //  hence avoids midpt_split's tendency to produce trees with
00169 //  a very large number of nodes.
00170 //
00171 //----------------------------------------------------------------------
00172 
00173 void sl_midpt_split(
00174     KMpointArray    pa,     // point array
00175     KMidxArray      pidx,       // point indices (permuted on return)
00176     const KMorthRect    &bnds,      // bounding rectangle for cell
00177     int         n,      // number of points
00178     int         dim,        // dimension of space
00179     int         &cut_dim,   // cutting dimension (returned)
00180     KMcoord     &cut_val,   // cutting value (returned)
00181     int         &n_lo)      // num of points on low side (returned)
00182 {
00183     int d;
00184 
00185     KMcoord max_length = bnds.hi[0] - bnds.lo[0];
00186     for (d = 1; d < dim; d++) {     // find length of longest box side
00187     KMcoord length = bnds.hi[d] - bnds.lo[d];
00188     if (length  > max_length) {
00189         max_length = length;
00190     }
00191     }
00192     KMcoord max_spread = -1;        // find long side with most spread
00193     for (d = 0; d < dim; d++) {
00194                     // is it among longest?
00195     if ((bnds.hi[d] - bnds.lo[d]) >= (1-ERR)*max_length) {
00196                     // compute its spread
00197         KMcoord spr = kmSpread(pa, pidx, n, d);
00198         if (spr > max_spread) { // is it max so far?
00199         max_spread = spr;
00200         cut_dim = d;
00201         }
00202     }
00203     }
00204                     // ideal split at midpoint
00205     KMcoord ideal_cut_val = (bnds.lo[cut_dim] + bnds.hi[cut_dim])/2;
00206 
00207     KMcoord min, max;
00208     kmMinMax(pa, pidx, n, cut_dim, min, max);   // find min/max coordinates
00209 
00210     if (ideal_cut_val < min)        // slide to min or max as needed
00211     cut_val = min;
00212     else if (ideal_cut_val > max)
00213     cut_val = max;
00214     else
00215     cut_val = ideal_cut_val;
00216 
00217                     // permute points accordingly
00218     int br1, br2;
00219     kmPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2);
00220     //------------------------------------------------------------------
00221     //  On return:  pa[pidx[[0..br1-1]]   < cut_val
00222     //          pa[pidx[[br1..br2-1]] = cut_val
00223     //          pa[pidx[[br2..n-1]]   > cut_val
00224     //
00225     //  We can set n_lo to any value in the range [br1..br2] to satisfy
00226     //  the exit conditions of the procedure.
00227     //
00228     //  if ideal_cut_val < min (implying br2 >= 1),
00229     //      then we select n_lo = 1 (so there is one point on left) and
00230     //  if ideal_cut_val > max (implying br1 <= n-1),
00231     //      then we select n_lo = n-1 (so there is one point on right).
00232     //  Otherwise, we select n_lo as close to n/2 as possible within
00233     //      [br1..br2].
00234     //------------------------------------------------------------------
00235     if (ideal_cut_val < min) n_lo = 1;
00236     else if (ideal_cut_val > max) n_lo = n-1;
00237     else if (br1 > n/2) n_lo = br1;
00238     else if (br2 < n/2) n_lo = br2;
00239     else n_lo = n/2;
00240 }


Generated on Fri May 18 2012 03:02:40 for ORTS by Doxygen1.7.3