szsort_macros.h
  1  #ifndef __SORTING_STRINGS_TERMINATED_BY_ZERO_MACROS_H_
  2  #define __SORTING_STRINGS_TERMINATED_BY_ZERO_MACROS_H_
  3  
  4  
  5  // For size_t, malloc and free.
  6  #include <stdio.h>
  7  #include <stdlib.h>
  8  
  9  
 10  // Define static functions to sort an array by multi-key quick sort.
 11  // Depth of recursive calls is limited to O(log_n).
 12  //
 13  // Arguments are as follows:
 14  // - StringSort: Function name, also used for the prefix of internal functions.
 15  // - Type: Type of keys.
 16  // - GetLetter: Function to get a letter from a key.
 17  // - HandleSameKeys: Function to handle same keys found in sorting.
 18  //
 19  // Example: DEFINE_SZSORT(string_sort, key_t, get_letter, handle_same_keys);
 20  //
 21  // This macro defines a function string_sort() to sort an array of key_t.
 22  //  "static void string_sort(key_t *, key_t *, size_t)"
 23  // Two user-defined functions get_letter() and handle_same_keys() are used in
 24  // sorting, and those functions must be defined as follows:
 25  //  "static unsigned char get_letter(const key_t *, size_t depth)"
 26  //  "static void handle_same_keys(key_t *, key_t *)"
 27  #define DEFINE_SZSORT(StringSort, Type, GetLetter, HandleSameKeys)\
 28  \
 29  /* To avoid problems about const. */\
 30  typedef Type StringSort ## Type;\
 31  typedef const StringSort ## Type *StringSort ## Pointer;\
 32  \
 33  /* These declarations are to check the function types. */\
 34  static unsigned char GetLetter(StringSort ## Pointer key, size_t depth);\
 35  static void HandleSameKeys(Type *l, Type *r);\
 36  \
 37  /* Function to choose the median from 3 values. */\
 38  static unsigned char StringSort ## Median(unsigned char l, unsigned char m,\
 39                                            unsigned char r) {\
 40    if (l < m) {\
 41      if (m < r) return m;\
 42      else if (l < r) return r;\
 43      return l;\
 44    } else if (l < r) return l;\
 45    else if (m < r) return r;\
 46    return m;\
 47  }\
 48  \
 49  /* Swap a pair of keys. */\
 50  static void StringSort ## SwapKeys(Type *lhs, Type *rhs) {\
 51    Type tmp = *lhs;\
 52    *lhs = *rhs;\
 53    *rhs = tmp;\
 54  }\
 55  \
 56  /* Compare a pair of keys. */\
 57  static int StringSort ## CompareKeys(\
 58      StringSort ## Pointer lhs, StringSort ## Pointer rhs, size_t depth) {\
 59    unsigned char l, r;\
 60    do {\
 61      l = GetLetter(lhs, depth);\
 62      r = GetLetter(rhs, depth++);\
 63    } while (l != '\0' && l == r);\
 64    return l - r;\
 65  }\
 66  \
 67  /* Find same keys and call HandleSameKeys() for them. */\
 68  static void StringSort ## FindSameKeys(Type *l, Type *r, size_t depth) {\
 69    Type *start = l;\
 70    for (l++; l < r; l++) {\
 71      if (StringSort ## CompareKeys(start, l, depth)) {\
 72        if (l - start > 1)\
 73          HandleSameKeys(start, l);\
 74        start = l;\
 75      }\
 76    }\
 77    if (r - start > 1) HandleSameKeys(start, r);\
 78  }\
 79  \
 80  /* Insertion sort for small range. */\
 81  static void StringSort ## InsertionSort(Type *l, Type *r, size_t depth) {\
 82    int equal_flag = 0;\
 83    Type *i, *j;\
 84    for (i = l + 1; i < r; i++) {\
 85      Type key = *i;\
 86      for (j = i; j > l; j--) {\
 87        int compare = StringSort ## CompareKeys(&key, j - 1, depth);\
 88        equal_flag |= (compare == 0);\
 89        if (compare >= 0) break;\
 90        *j = *(j - 1);\
 91      }\
 92      *j = key;\
 93    }\
 94    if (equal_flag) StringSort ## FindSameKeys(l, r, depth);\
 95  }\
 96  \
 97  /* Sort an array by multi-key quick sort. */\
 98  static void StringSort(Type *l, Type *r, size_t depth) {\
 99    while (r - l > 10) {\
100      Type *pl = l, *pr = r, *pivot_l = l, *pivot_r = r;\
101      unsigned char pivot;\
102      pivot = StringSort ## Median(GetLetter(l, depth),\
103                                   GetLetter(l + (r - l) / 2, depth),\
104                                   GetLetter(r - 1, depth));\
105  \
106      /* Move keys less than the pivot to left. */\
107      /* Move keys greater than the pivot to right. */\
108      for (;;) {\
109        int diff;\
110        while (pl < pr && (diff = GetLetter(pl, depth) - pivot) <= 0) {\
111          if (!diff) StringSort ## SwapKeys(pl, pivot_l++);\
112          pl++;\
113        }\
114        while (pl < pr && (diff = GetLetter(--pr, depth) - pivot) >= 0) {\
115          if (!diff) StringSort ## SwapKeys(pr, --pivot_r);\
116        }\
117        if (pl >= pr) break;\
118        StringSort ## SwapKeys(pl++, pr);\
119      }\
120      while (pivot_l > l) StringSort ## SwapKeys(--pivot_l, --pl);\
121      while (pivot_r < r) StringSort ## SwapKeys(pivot_r++, pr++);\
122  \
123      /* To avoid stack over flow. */\
124      /* Use recursive call only for the smallest range. */\
125      if (pl - l > pr - pl || r - pr > pr - pl) {\
126        if (pr - pl > 1) {\
127          if (pivot != '\0') StringSort(pl, pr, depth + 1);\
128          else HandleSameKeys(pl, pr);\
129        }\
130        if (pl - l < r - pr) {\
131          if (pl - l > 1) StringSort(l, pl, depth);\
132          l = pr;\
133        } else {\
134          if (r - pr > 1) StringSort(pr, r, depth);\
135          r = pl;\
136        }\
137      } else {\
138        if (pl - l > 1) StringSort(l, pl, depth);\
139        if (r - pr > 1) StringSort(pr, r, depth);\
140        l = pl, r = pr;\
141        if (pr - pl > 1) {\
142          if (pivot != '\0') depth++;\
143          else HandleSameKeys(pl, pr), l = r;\
144        }\
145      }\
146    }\
147    if (r - l > 1) StringSort ## InsertionSort(l, r, depth);\
148  }\
149  \
150  /* Declaration of the multi-key quick sort function. */\
151  /* Semicolon must be added after this macro because of this declaration. */\
152  static void StringSort(Type *l, Type *r, size_t depth)
153  
154  
155  // Define static functions to sort an array by using the combination of
156  // radix sort and multi-key quick sort.
157  // This macro is suitable for an array which has many keys.
158  //
159  // This macro needs similar arguments that DEFINE_SZSORT() needs.
160  // Please see the comments about DEFINE_SZSORT().
161  #define DEFINE_HSZSORT(HugeStringSort, Type, GetLetter, HandleSameKeys)\
162  \
163  /* Define functions for multi-key quick sort. */\
164  DEFINE_SZSORT(HugeStringSort ## StringSort, Type, GetLetter, HandleSameKeys);\
165  \
166  /* Calculate the bucket id from the head 2 letters. */\
167  static size_t HugeStringSort ## GetBucket(\
168      HugeStringSort ## StringSort ## Pointer key, size_t depth) {\
169    size_t id = GetLetter(key, depth);\
170    return id ? ((id << 8) + GetLetter(key, depth + 1)) : 0;\
171  }\
172  \
173  /* Sort an array which has many keys. */\
174  /* The combination of radix sort and multi-key quick sort is used. */\
175  static Type *HugeStringSort(Type *l, Type *r, size_t depth) {\
176    size_t i, num = r - l;\
177    size_t *buf, *bucket;\
178    Type *keys, *p;\
179  \
180    if ((keys = (Type *)malloc(sizeof(*keys) * num)) == NULL)\
181      return NULL;\
182    if ((buf = (size_t *)malloc(sizeof(*buf) * 65538)) == NULL) {\
183      free(keys);\
184      return NULL;\
185    }\
186  \
187    /* Divide keys to buckets by using their head 2 letters. */\
188    for (bucket = buf + 2, i = 0; i < 65538; i++)\
189      buf[i] = 0;\
190    for (p = l; p < r; p++)\
191      bucket[HugeStringSort ## GetBucket(p, depth)]++;\
192    for (bucket--, i = 2; i < 65536; i++)\
193      bucket[i] += bucket[i - 1];\
194    for (p = l; p < r; p++)\
195      keys[bucket[HugeStringSort ## GetBucket(p, depth)]++] = *p;\
196  \
197    /* Apply multi-key quick sort to each bucket. */\
198    for (bucket--, i = 0; i < 65536; ++i) {\
199      if (bucket[i + 1] - bucket[i] > 1) {\
200        if (i & 0xFF)\
201          HugeStringSort ## StringSort(keys + bucket[i],\
202                                       keys + bucket[i + 1], depth + 2);\
203        else\
204          HandleSameKeys(keys + bucket[i], keys + bucket[i + 1]);\
205      }\
206    }\
207  \
208    free(buf);\
209    return keys;\
210  }\
211  \
212  /* Declaration of the radix and multi-key quick sort function. */\
213  /* Semicolon must be added after this macro because of this declaration. */\
214  static Type *HugeStringSort(Type *l, Type *r, size_t depth)
215  
216  
217  #endif  // __SORTING_STRINGS_TERMINATED_BY_ZERO_MACROS_H_