|
ORTS
|
00001 //---------------------------------------------------------------------- 00002 // File: KCutil.h 00003 // Programmer: David Mount 00004 // Last modified: 03/27/02 00005 // Description: Utilities for kc-tree splitting rules 00006 //---------------------------------------------------------------------- 00007 // Copyright (C) 2004-2005 David M. Mount and University of Maryland 00008 // All Rights Reserved. 00009 // 00010 // This program is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU General Public License as published by 00012 // the Free Software Foundation; either version 2 of the License, or (at 00013 // your option) any later version. See the file Copyright.txt in the 00014 // main directory. 00015 // 00016 // The University of Maryland and the authors make no representations 00017 // about the suitability or fitness of this software for any purpose. 00018 // It is provided "as is" without express or implied warranty. 00019 //---------------------------------------------------------------------- 00020 00021 #include "KCutil.h" // kc-utility declarations 00022 00023 //---------------------------------------------------------------------- 00024 // NOTE: Virtually all point indexing is done through an 00025 // index (i.e. permutation) array pidx. Consequently, 00026 // a reference to the d-th coordinate of the i-th point 00027 // is pa[pidx[i]][d]. The macro PA(i,d) is a shorthand 00028 // for this. 00029 //---------------------------------------------------------------------- 00030 // standard 2-d indirect indexing 00031 #define PA(i,d) (pa[pidx[(i)]][(d)]) 00032 // accessing a single point 00033 #define PP(i) (pa[pidx[(i)]]) 00034 // swap two points 00035 #define PASWAP(a,b) { int tmp = pidx[a];\ 00036 pidx[a] = pidx[b];\ 00037 pidx[b] = tmp; } 00038 00039 //---------------------------------------------------------------------- 00040 // Constants 00041 //---------------------------------------------------------------------- 00042 00043 const double ERR = 0.001; // a small value 00044 00045 //---------------------------------------------------------------------- 00046 // kmEnclRect, kmEnclCube 00047 // These utilities compute the smallest rectangle and cube enclosing 00048 // a set of points, respectively. 00049 //---------------------------------------------------------------------- 00050 00051 void kmEnclRect( 00052 KMpointArray pa, // point array 00053 KMidxArray pidx, // point indices 00054 int n, // number of points 00055 int dim, // dimension 00056 KMorthRect &bnds) // bounding cube (returned) 00057 { 00058 for (int d = 0; d < dim; d++) { // find smallest enclosing rectangle 00059 KMcoord lo_bnd = PA(0,d); // lower bound on dimension d 00060 KMcoord hi_bnd = PA(0,d); // upper bound on dimension d 00061 for (int i = 0; i < n; i++) { 00062 if (PA(i,d) < lo_bnd) lo_bnd = PA(i,d); 00063 else if (PA(i,d) > hi_bnd) hi_bnd = PA(i,d); 00064 } 00065 bnds.lo[d] = lo_bnd; 00066 bnds.hi[d] = hi_bnd; 00067 } 00068 } 00069 00070 //---------------------------------------------------------------------- 00071 // kmSpread - find spread along given dimension 00072 // kmMinMax - find min and max coordinates along given dimension 00073 // kmMaxSpread - find dimension of max spread 00074 //---------------------------------------------------------------------- 00075 00076 KMcoord kmSpread( // compute point spread along dimension 00077 KMpointArray pa, // point array 00078 KMidxArray pidx, // point indices 00079 int n, // number of points 00080 int d) // dimension to check 00081 { 00082 KMcoord min = PA(0,d); // compute max and min coords 00083 KMcoord max = PA(0,d); 00084 for (int i = 1; i < n; i++) { 00085 KMcoord c = PA(i,d); 00086 if (c < min) min = c; 00087 else if (c > max) max = c; 00088 } 00089 return (max - min); // total spread is difference 00090 } 00091 00092 void kmMinMax( // compute min and max coordinates along dim 00093 KMpointArray pa, // point array 00094 KMidxArray pidx, // point indices 00095 int n, // number of points 00096 int d, // dimension to check 00097 KMcoord &min, // minimum value (returned) 00098 KMcoord &max) // maximum value (returned) 00099 { 00100 min = PA(0,d); // compute max and min coords 00101 max = PA(0,d); 00102 for (int i = 1; i < n; i++) { 00103 KMcoord c = PA(i,d); 00104 if (c < min) min = c; 00105 else if (c > max) max = c; 00106 } 00107 } 00108 00109 //---------------------------------------------------------------------- 00110 // kmPlaneSplit - split point array about a cutting plane 00111 // Split the points in an array about a given plane along a 00112 // given cutting dimension. On exit, br1 and br2 are set so 00113 // that: 00114 // 00115 // pa[ 0 ..br1-1] < cv 00116 // pa[br1..br2-1] == cv 00117 // pa[br2.. n -1] > cv 00118 // 00119 // All indexing is done indirectly through the index array pidx. 00120 //---------------------------------------------------------------------- 00121 00122 void kmPlaneSplit( // split points by a plane 00123 KMpointArray pa, // points to split 00124 KMidxArray pidx, // point indices 00125 int n, // number of points 00126 int d, // dimension along which to split 00127 KMcoord cv, // cutting value 00128 int &br1, // first break (values < cv) 00129 int &br2) // second break (values == cv) 00130 { 00131 int l = 0; 00132 int r = n-1; 00133 for(;;) { // partition pa[0..n-1] about cv 00134 while (l < n && PA(l,d) < cv) l++; 00135 while (r >= 0 && PA(r,d) >= cv) r--; 00136 if (l > r) break; 00137 PASWAP(l,r); 00138 l++; r--; 00139 } 00140 br1 = l; // now: pa[0..br1-1] < cv <= pa[br1..n-1] 00141 r = n-1; 00142 for(;;) { // partition pa[br1..n-1] about cv 00143 while (l < n && PA(l,d) <= cv) l++; 00144 while (r >= br1 && PA(r,d) > cv) r--; 00145 if (l > r) break; 00146 PASWAP(l,r); 00147 l++; r--; 00148 } 00149 br2 = l; // now: pa[br1..br2-1] == cv < pa[br2..n-1] 00150 } 00151 00152 //---------------------------------------------------------------------- 00153 // sl_midpt_split - sliding midpoint splitting rule 00154 // 00155 // This is a modification of midpt_split, which has the nonsensical 00156 // name "sliding midpoint". The idea is that we try to use the 00157 // midpoint rule, by bisecting the longest side. If there are 00158 // ties, the dimension with the maximum spread is selected. If, 00159 // however, the midpoint split produces a trivial split (no points 00160 // on one side of the splitting plane) then we slide the splitting 00161 // (maintaining its orientation) until it produces a nontrivial 00162 // split. For example, if the splitting plane is along the x-axis, 00163 // and all the data points have x-coordinate less than the x-bisector, 00164 // then the split is taken along the maximum x-coordinate of the 00165 // data points. 00166 // 00167 // Intuitively, this rule cannot generate trivial splits, and 00168 // hence avoids midpt_split's tendency to produce trees with 00169 // a very large number of nodes. 00170 // 00171 //---------------------------------------------------------------------- 00172 00173 void sl_midpt_split( 00174 KMpointArray pa, // point array 00175 KMidxArray pidx, // point indices (permuted on return) 00176 const KMorthRect &bnds, // bounding rectangle for cell 00177 int n, // number of points 00178 int dim, // dimension of space 00179 int &cut_dim, // cutting dimension (returned) 00180 KMcoord &cut_val, // cutting value (returned) 00181 int &n_lo) // num of points on low side (returned) 00182 { 00183 int d; 00184 00185 KMcoord max_length = bnds.hi[0] - bnds.lo[0]; 00186 for (d = 1; d < dim; d++) { // find length of longest box side 00187 KMcoord length = bnds.hi[d] - bnds.lo[d]; 00188 if (length > max_length) { 00189 max_length = length; 00190 } 00191 } 00192 KMcoord max_spread = -1; // find long side with most spread 00193 for (d = 0; d < dim; d++) { 00194 // is it among longest? 00195 if ((bnds.hi[d] - bnds.lo[d]) >= (1-ERR)*max_length) { 00196 // compute its spread 00197 KMcoord spr = kmSpread(pa, pidx, n, d); 00198 if (spr > max_spread) { // is it max so far? 00199 max_spread = spr; 00200 cut_dim = d; 00201 } 00202 } 00203 } 00204 // ideal split at midpoint 00205 KMcoord ideal_cut_val = (bnds.lo[cut_dim] + bnds.hi[cut_dim])/2; 00206 00207 KMcoord min, max; 00208 kmMinMax(pa, pidx, n, cut_dim, min, max); // find min/max coordinates 00209 00210 if (ideal_cut_val < min) // slide to min or max as needed 00211 cut_val = min; 00212 else if (ideal_cut_val > max) 00213 cut_val = max; 00214 else 00215 cut_val = ideal_cut_val; 00216 00217 // permute points accordingly 00218 int br1, br2; 00219 kmPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2); 00220 //------------------------------------------------------------------ 00221 // On return: pa[pidx[[0..br1-1]] < cut_val 00222 // pa[pidx[[br1..br2-1]] = cut_val 00223 // pa[pidx[[br2..n-1]] > cut_val 00224 // 00225 // We can set n_lo to any value in the range [br1..br2] to satisfy 00226 // the exit conditions of the procedure. 00227 // 00228 // if ideal_cut_val < min (implying br2 >= 1), 00229 // then we select n_lo = 1 (so there is one point on left) and 00230 // if ideal_cut_val > max (implying br1 <= n-1), 00231 // then we select n_lo = n-1 (so there is one point on right). 00232 // Otherwise, we select n_lo as close to n/2 as possible within 00233 // [br1..br2]. 00234 //------------------------------------------------------------------ 00235 if (ideal_cut_val < min) n_lo = 1; 00236 else if (ideal_cut_val > max) n_lo = n-1; 00237 else if (br1 > n/2) n_lo = br1; 00238 else if (br2 < n/2) n_lo = br2; 00239 else n_lo = n/2; 00240 }