szsort.h
  1  #ifndef __SORTING_STRINGS_TERMINATED_BY_ZERO_H_
  2  #define __SORTING_STRINGS_TERMINATED_BY_ZERO_H_
  3  
  4  
  5  // This header provides a function to sort keys in dictionary order
  6  // by using multikey quicksort. And, the function can find same keys in
  7  // a given keyset.
  8  //
  9  // StringSort::Sort() receives 2 random access iterators (or pointers),
 10  // depth, and 2 functions (GetLetter and HandleSameKeys). Letters with
 11  // depth+ are used for sorting. GetLetter is used to get letters from keys,
 12  // and HandleSameKeys is called when same keys appear.
 13  //
 14  // GetLetter must receive a key (value type of an iterator) and depth,
 15  // and return a letter (unsigned-compatible).
 16  //
 17  // HandleSameKeys must receive 2 iterators (or pointers) and return true or
 18  // false. If this function returns false, sorting will stop without being
 19  // completed. If the existence of same keys is insignificant, please don't
 20  // pass HandleSameKeys so that a default handler is used. The default
 21  // handler does nothing and returns true.
 22  
 23  
 24  // For std::size_t.
 25  #include <cstddef>
 26  
 27  // For std::iter_swap().
 28  #include <algorithm>
 29  
 30  
 31  namespace StringSort {
 32  
 33  using std::size_t;
 34  
 35  // Default handler does nothing.
 36  struct DefaultHandler {
 37    template <typename RandomAccessIterator>
 38    bool operator()(RandomAccessIterator l, RandomAccessIterator r) {
 39      return true;
 40    }
 41  };
 42  
 43  // Compare a pair of keys.
 44  template <typename RandomAccessIterator, typename GetLetter>
 45  inline int CompareKeys(
 46      RandomAccessIterator lhs, RandomAccessIterator rhs,
 47      size_t depth, GetLetter get_letter) {
 48    unsigned l, r;
 49    do {
 50      l = get_letter(*lhs, depth);
 51      r = get_letter(*rhs, depth);
 52      ++depth;
 53    } while (l != '\0' && l == r);
 54    return (l < r) ? -1 : (l > r);
 55  }
 56  
 57  // Choose the median from 3 values.
 58  template <typename LetterType>
 59  inline LetterType GetMedian(LetterType l, LetterType m, LetterType r) {
 60    if (l < m) {
 61      if (m < r) return m;
 62      else if (l < r) return r;
 63      return l;
 64    } else if (l < r) return l;
 65    else if (m < r) return r;
 66    return m;
 67  }
 68  
 69  // Find same keys and call HandleSameKeys for them.
 70  template <typename RandomAccessIterator, typename GetLetter,
 71            typename HandleSameKeys>
 72  bool FindSameKeys(
 73      RandomAccessIterator l, RandomAccessIterator r, size_t depth,
 74      GetLetter get_letter, HandleSameKeys handle_same_keys) {
 75    RandomAccessIterator start = l;
 76    for (++l; l < r; ++l) {
 77      if (CompareKeys(start, l, depth, get_letter)) {
 78        if (l - start > 1 && !handle_same_keys(start, l))
 79          return false;
 80        start = l;
 81      }
 82    }
 83    return (r - start > 1) ? handle_same_keys(start, r) : true;
 84  }
 85  
 86  // Insertion sort for small range.
 87  template <typename RandomAccessIterator, typename GetLetter,
 88            typename HandleSameKeys>
 89  bool InsertionSort(
 90      RandomAccessIterator l, RandomAccessIterator r, size_t depth,
 91      GetLetter get_letter, HandleSameKeys handle_same_keys) {
 92    int equal_flag = 0;
 93    RandomAccessIterator i, j;
 94    for (i = l + 1; i < r; ++i) {
 95      for (j = i; j > l; --j) {
 96        int compare = CompareKeys(j - 1, j, depth, get_letter);
 97        equal_flag |= (compare == 0);
 98        if (compare <= 0)
 99          break;
100        std::iter_swap(j - 1, j);
101      }
102    }
103    if (equal_flag)
104      return FindSameKeys(l, r, depth, get_letter, handle_same_keys);
105    return true;
106  }
107  
108  // Sort an array by multi-key quick sort.
109  template <typename RandomAccessIterator, typename GetLetter,
110            typename HandleSameKeys>
111  bool Sort(RandomAccessIterator l, RandomAccessIterator r, size_t depth,
112                   GetLetter get_letter, HandleSameKeys handle_same_keys) {
113    while (r - l > 10) {
114      unsigned pivot;
115      RandomAccessIterator pl = l, pr = r, pivot_l = l, pivot_r = r;
116      pivot = GetMedian(get_letter(*l, depth),
117                        get_letter(*(l + (r - l) / 2), depth),
118                        get_letter(*(r - 1), depth));
119  
120      // Move keys less than the pivot to left.
121      // Move keys greater than the pivot to right.
122      for (;;) {
123        int diff;
124        while (pl < pr && (diff = get_letter(*pl, depth) - pivot) <= 0) {
125          if (!diff)
126            std::iter_swap(pl, pivot_l), ++pivot_l;
127          ++pl;
128        }
129        while (pl < pr && (diff = get_letter(*--pr, depth) - pivot) >= 0) {
130          if (!diff)
131            std::iter_swap(pr, --pivot_r);
132        }
133        if (pl >= pr)
134          break;
135        std::iter_swap(pl, pr);
136        ++pl;
137      }
138      while (pivot_l > l)
139        std::iter_swap(--pivot_l, --pl);
140      while (pivot_r < r)
141        std::iter_swap(pivot_r, pr), ++pivot_r, ++pr;
142  
143      // To avoid stack over flow.
144      // Use recursive call except for the largest range.
145      if (pl - l > pr - pl || r - pr > pr - pl) {
146        if (pr - pl > 1) {
147          if (pivot != '\0') {
148            if (!Sort(pl, pr, depth + 1, get_letter, handle_same_keys))
149              return false;
150          } else if (!handle_same_keys(pl, pr))
151            return false;
152        }
153        if (pl - l < r - pr) {
154          if (pl - l > 1 && !Sort(l, pl, depth, get_letter, handle_same_keys))
155            return false;
156          l = pr;
157        } else {
158          if (r - pr > 1 && !Sort(pr, r, depth, get_letter, handle_same_keys))
159            return false;
160          r = pl;
161        }
162      } else {
163        if (pl - l > 1 && !Sort(l, pl, depth, get_letter, handle_same_keys))
164          return false;
165        if (r - pr > 1 && !Sort(pr, r, depth, get_letter, handle_same_keys))
166          return false;
167        l = pl, r = pr;
168        if (pr - pl > 1) {
169          if (pivot != '\0')
170            ++depth;
171          else {
172            if (!handle_same_keys(pl, pr))
173              return false;
174            l = r;
175          }
176        }
177      }
178    }
179    if (r - l > 1)
180      return InsertionSort(l, r, depth, get_letter, handle_same_keys);
181    return true;
182  }
183  
184  // Sort an array by multi-key quick sort.
185  template <typename RandomAccessIterator, typename GetLetter>
186  inline void Sort(RandomAccessIterator l, RandomAccessIterator r, size_t depth,
187                   GetLetter get_letter) {
188    Sort(l, r, depth, get_letter, DefaultHandler());
189  }
190  
191  }  // namespace StringSort.
192  
193  
194  #endif  // __SORTING_STRINGS_TERMINATED_BY_ZERO_H_