آیا میشه کاری کرد که کد سریع تر اجرا بشه؟
یکی از چالشهایی که هر برنامه نویس با آن دست و پنجه نرم میکند زمان اجرای کدهاست و خواسته اصلی اینکه کاری کنیم تا کد سریعتر و با پرفورمنس بهتر اجرا شود. در این مقاله با هم نگاهی خواهیم انداخت به کد مرتب سازی در زبان C و آن را بهبود خواهیم داد.
·۷ دقیقه برای خواندن·algorithm

مرتب سازی در زبان C
همانطور که میدانید انواع مختلفی از الگوریتمهای مرتب سازی در برنامه نویسی به زبان های مختلف وجود دارد در این مثال ما از الگوریتم مرتب سازی quicksort استفاده میکنیم
کد اصلی این الگوریتم را در هدر فایل sort.h در زیر میبینید
// SPDX-License-Identifier: MIT
// sort.h - Branchless Quicksort
// (c) christof.kaser@gmail.com
#ifndef SORT_H
#define SORT_H
#ifndef BLQS_CMP
#define BLQS_CMP(a, b) ((a) < (b))
#endif
#include <stddef.h>
#include <string.h>
#define min(a, b) (((a) < (b)) ? (a) : (b))
#define SMALLPART 1024
#define SWSZ 512
#define UNROLL 16
#define sort2(a, b) do { \
unsigned m = BLQS_CMP(a, b); \
BLQS_TYPE x = a; \
a = m ? a : b; \
b = m ? b : x; \
} while(0)
#define sort3(a, b, c) do { \
sort2(a, b); sort2(b, c); sort2(a, b); \
} while(0)
#define sort4(a, b, c, d) do { \
sort2(a, b); sort2(c, d); sort2(a, c); \
sort2(b, d); sort2(b, c); \
} while(0)
#define sort5(a, b, c, d, e) do { \
sort2(b, c); sort2(d, e); sort2(b, d); \
sort2(a, c); sort2(a, d); sort2(c, e); \
sort2(a, b); sort2(c, d); sort2(b, c); \
} while(0)
#define sort6(a, b, c, d, e, f) do { \
sort2(a, b); sort2(c, d); sort2(e, f); \
sort2(a, c); sort2(b, d); sort2(e, f); \
sort2(a, e); sort2(b, f); sort2(c, e); \
sort2(d, f); sort2(b, c); sort2(d, e); \
sort2(c, d); \
} while(0)
#define sort7(a, b, c, d, e, f, g) do { \
sort2(a, b); sort2(c, d); sort2(a, c); \
sort2(b, d); sort2(b, c); sort2(e, f); \
sort2(e, g); sort2(f, g); sort2(a, e); \
sort2(b, f); sort2(c, g); sort2(b, e); \
sort2(d, g); sort2(c, e); sort2(b, c); \
sort2(d, f); sort2(d, e); \
} while(0)
#define sort8(a,b,c,d,e,f,g,h) do { \
sort2(a,b); sort2(c,d); sort2(e,f); sort2(g,h); \
sort2(a,c); sort2(b,d); sort2(e,g); sort2(f,h); \
sort2(b,c); sort2(f,g); \
sort2(a,e); sort2(b,f); sort2(c,g); sort2(d,h); \
sort2(c,e); sort2(d,f); \
sort2(b,c); sort2(d,e); sort2(f,g); \
} while (0)
#define sort9(a,b,c,d,e,f,g,h,i) do { \
sort2(a,d); sort2(b,h); sort2(c,f); sort2(e,i); \
sort2(a,h); sort2(c,e); sort2(d,i); sort2(f,g); \
sort2(a,c); sort2(b,d); sort2(e,f); sort2(h,i); \
sort2(b,e); sort2(d,g); sort2(f,h); \
sort2(a,b); sort2(c,e); sort2(d,f); sort2(g,i); \
sort2(c,d); sort2(e,f); sort2(g,h); \
sort2(b,c); sort2(d,e); sort2(f,g); \
} while (0)
#define sort10(a,b,c,d,e,f,g,h,i,j) do { \
sort2(a,i); sort2(b,j); sort2(c,h); sort2(d,f); sort2(e,g); \
sort2(a,c); sort2(b,e); sort2(f,i); sort2(h,j); \
sort2(a,d); sort2(c,e); sort2(f,h); sort2(g,j); \
sort2(a,b); sort2(d,g); sort2(i,j); \
sort2(b,f); sort2(c,d); sort2(e,i); sort2(g,h); \
sort2(b,c); sort2(d,f); sort2(e,g); sort2(h,i); \
sort2(c,d); sort2(e,f); sort2(g,h); \
sort2(d,e); sort2(f,g); \
} while (0)
#define sort11(a,b,c,d,e,f,g,h,i,j,k) do { \
sort2(a,j); sort2(b,g); sort2(c,e); sort2(d,h); sort2(f,i); \
sort2(a,b); sort2(d,f); sort2(e,k); sort2(g,j); sort2(h,i); \
sort2(b,d); sort2(c,f); sort2(e,h); sort2(i,k); \
sort2(a,e); sort2(b,c); sort2(d,h); sort2(f,j); sort2(g,i); \
sort2(a,b); sort2(c,g); sort2(e,f); sort2(h,i); sort2(j,k); \
sort2(c,e); sort2(d,g); sort2(f,h); sort2(i,j); \
sort2(b,c); sort2(d,e); sort2(f,g); sort2(h,i); \
sort2(c,d); sort2(e,f); sort2(g,h); \
} while (0)
#define sort12(a,b,c,d,e,f,g,h,i,j,k,l) do { \
sort2(a,i); sort2(b,h); sort2(c,g); sort2(d,l); sort2(e,k); sort2(f,j); \
sort2(a,c); sort2(b,e); sort2(d,f); sort2(g,i); sort2(h,k); sort2(j,l); \
sort2(a,b); sort2(c,j); sort2(e,h); sort2(f,g); sort2(k,l); \
sort2(b,d); sort2(c,h); sort2(e,j); sort2(i,k); \
sort2(a,b); sort2(c,d); sort2(e,f); sort2(g,h); sort2(i,j); sort2(k,l); \
sort2(b,c); sort2(d,f); sort2(g,i); sort2(j,k); \
sort2(c,e); sort2(d,g); sort2(f,i); sort2(h,j); \
sort2(b,c); sort2(d,e); sort2(f,g); sort2(h,i); sort2(j,k); \
} while (0)
static void sorting_network(BLQS_TYPE* l, int partszm1_min_1) {
switch (partszm1_min_1) {
case 0: break;
case 1: sort2(l[0],l[1]); break;
case 2: sort3(l[0],l[1],l[2]); break;
case 3: sort4(l[0],l[1],l[2],l[3]); break;
case 4: sort5(l[0],l[1],l[2],l[3],l[4]); break;
case 5: sort6(l[0],l[1],l[2],l[3],l[4],l[5]); break;
case 6: sort7(l[0],l[1],l[2],l[3],l[4],l[5],l[6]); break;
case 7: sort8(l[0],l[1],l[2],l[3],l[4],l[5],l[6],l[7]); break;
case 8: sort9(l[0],l[1],l[2],l[3],l[4],l[5],l[6],l[7],l[8]); break;
case 9: sort10(l[0],l[1],l[2],l[3],l[4],l[5],l[6],l[7],l[8],l[9]); break;
case 10: sort11(l[0],l[1],l[2],l[3],l[4],l[5],l[6],l[7],l[8],l[9],l[10]); break;
case 11: sort12(l[0],l[1],l[2],l[3],l[4],l[5],l[6],l[7],l[8],l[9],l[10],l[11]); break;
}
}
#define med5(a,b,c,d,e) do { \
sort2(a,b); sort2(c,d); sort2(a,c); \
sort2(b,d); sort2(b,c); sort2(c,e); \
sort2(b,c); \
} while(0)
static BLQS_TYPE* partition_small(BLQS_TYPE* left, BLQS_TYPE* right) {
BLQS_TYPE* outerleft = left;
BLQS_TYPE* pivp = left + 6;
BLQS_TYPE piv = *pivp;
BLQS_TYPE l1 = left[1],l2 = left[2];
BLQS_TYPE r1 = right[-1], r0 = *right;
med5(l1, l2, piv, r1, r0);
left[1] = l1; left[2] = l2;
right[-1] = r1; *right = r0;
left += 3;
right -= 2;
*pivp = *outerleft;
BLQS_TYPE swbuf[SMALLPART];
BLQS_TYPE* sw = swbuf;
BLQS_TYPE* lwr = left;
while (right - left >= UNROLL) for (int i = UNROLL; i--;) {
BLQS_TYPE x = *left++;
if (BLQS_CMP(x, piv)) { *lwr = x; lwr++; }
else { *sw = x; sw++; }
}
while (left <= right) {
BLQS_TYPE x = *left++;
if (BLQS_CMP(x, piv)) { *lwr = x; lwr++; }
else { *sw = x; sw++; }
}
memcpy(lwr, swbuf, (sw - swbuf) * sizeof(BLQS_TYPE));
lwr -= 1;
*outerleft = *lwr;
*lwr = piv;
return lwr;
}
static BLQS_TYPE* partition(BLQS_TYPE* left, BLQS_TYPE* right) {
BLQS_TYPE* outerleft = left;
BLQS_TYPE* pivp = left + (right - left) / 2;
BLQS_TYPE piv = *pivp;
med5(left[1],left[2],left[3],left[4],left[5]);
med5(left[11],left[12],left[13],left[14],left[15]);
med5(pivp[-2], pivp[-1], piv, pivp[1], pivp[2]);
med5(right[-14], right[-13], right[-12], right[-11], right[-10]);
med5(right[-4], right[-3], right[-2], right[-1], right[0]);
med5(left[3], left[13], piv, right[-12], right[-2]);
left += 1;
*pivp = *outerleft;
BLQS_TYPE swbuf[SWSZ];
BLQS_TYPE *rwr = right, *sw = swbuf;
BLQS_TYPE *lwr = left;
while (UNROLL < SWSZ - (sw - swbuf) && left < right - UNROLL) {
ptrdiff_t avail = min(right - left, SWSZ - (sw - swbuf));
BLQS_TYPE* endp = right - avail;
while (right > endp + UNROLL) {
for (int i = UNROLL; i--;) {
BLQS_TYPE x = *right--;
if (BLQS_CMP(x, piv)) { *sw = x; sw++; }
else { *rwr = x; rwr--; }
}
}
}
while (right - left >= UNROLL &&
(rwr - right > UNROLL || left - lwr > UNROLL)) {
while (rwr - right > UNROLL && right - left >= UNROLL) {
for (int i = UNROLL; i--;) {
BLQS_TYPE x = *left++;
if (BLQS_CMP(x, piv)) { *lwr = x; lwr++; }
else { *rwr = x; rwr--; }
}
}
while (left - lwr > UNROLL && right - left >= UNROLL) {
for (int i = UNROLL; i--;) {
BLQS_TYPE x = *right--;
if (BLQS_CMP(x, piv)) { *lwr = x; lwr++; }
else { *rwr = x; rwr--; }
}
}
}
do {
while (rwr > right && left <= right) {
BLQS_TYPE x = *left++;
if (BLQS_CMP(x, piv)) { *lwr = x; lwr++; }
else { *rwr = x; rwr--; }
}
while (lwr < left && left <= right) {
BLQS_TYPE x = *right--;
if (BLQS_CMP(x, piv)) { *lwr = x; lwr++; }
else { *rwr = x; rwr--; }
}
} while ((lwr < left||rwr > right) && left <= right);
while (left <= right && !BLQS_CMP(*right, piv)) {
right--;
rwr--;
}
memcpy(lwr, swbuf, (sw - swbuf) * sizeof(BLQS_TYPE));
*outerleft = *rwr;
*rwr = piv;
return rwr;
}
static void smallsort(BLQS_TYPE* left, BLQS_TYPE* right) {
while (right - left > 11) {
BLQS_TYPE* mid = partition_small(left, right);
smallsort(left, mid - 1);
left = mid + 1;
}
sorting_network(left, right - left);
}
static void sortr(BLQS_TYPE* left, BLQS_TYPE* right) {
while (1) {
ptrdiff_t partszm1 = right - left;
if (partszm1 <= SMALLPART) break;
BLQS_TYPE* mid = partition(left, right);
if (mid - left < partszm1 / 16) {
if (mid > left) sortr(left, mid - 1);
BLQS_TYPE piv = *mid;
mid += 1;
// collect duplicates
for (BLQS_TYPE* p = mid; p <= right; p++) {
if (!BLQS_CMP(piv, *p)) {
BLQS_TYPE h = *mid;
*mid = *p;
*p = h;
mid++;
}
}
left = mid;
if (right - left < SMALLPART) break;
mid = partition(left, right);
}
if (mid - left < right - mid) {
sortr(left, mid - 1);
left = mid + 1;
}
else {
sortr(mid + 1, right);
right = mid - 1;
}
}
smallsort(left, right);
}
static void sort(BLQS_TYPE* data, int len) {
if (len < 2) return;
for (BLQS_TYPE* p = data + 1; p < data + len; p++) {
if (BLQS_CMP(*p, *(p - 1))) goto not_sorted;
}
return;
not_sorted:
sortr(data, data + len - 1);
}
#endif
قبل از ادامه اجازه بدید توضیح بدیم چطور از این هدر فایل در کد sort.c استفاده کنیم
// SPDX-License-Identifier: MIT
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
#define BLQS_CMP(a, b) ((a) < (b))
#define BLQS_TYPE double
#include "sort.h"
#define SIZE 50000000
double data[SIZE];
double ts(void) {
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + tv.tv_usec / 1000000.0;
}
int main() {
double t0;
for (int i = 0; i < SIZE; i++) data[i] = rand() / 1024.0;
t0 = ts();
sort(data, SIZE);
printf("Time: %.2fs\n", ts() - t0);
}
زمان اجرای این کد در یک کامپیوتر macOS/M1 حدودا 4.39 ثانیه برای مرتب کردن ۵۰ میلیون عدد double است.
در ادامه ما کد sort.h را کمی تغییر میدیم و به شکل زیر بازنویسی میکنیم
// SPDX-License-Identifier: MIT
// blqsort.h - Branchless Quicksort
// (c) christof.kaser@gmail.com
#ifndef SORT_H
#define SORT_H
#ifndef BLQS_CMP
#define BLQS_CMP(a, b) ((a) < (b))
#endif
#include <stddef.h>
#include <string.h>
#define min(a, b) (((a) < (b)) ? (a) : (b))
#define SMALLPART 2048
#define SWSZ 512
#define UNROLL 16
#define sort2(a, b) do { \
unsigned m = BLQS_CMP(a, b); \
BLQS_TYPE x = a; \
a = m ? a : b; \
b = m ? b : x; \
} while(0)
#define sort3(a, b, c) do { \
sort2(a, b); sort2(b, c); sort2(a, b); \
} while(0)
#define sort4(a, b, c, d) do { \
sort2(a, b); sort2(c, d); sort2(a, c); \
sort2(b, d); sort2(b, c); \
} while(0)
#define sort5(a, b, c, d, e) do { \
sort2(b, c); sort2(d, e); sort2(b, d); \
sort2(a, c); sort2(a, d); sort2(c, e); \
sort2(a, b); sort2(c, d); sort2(b, c); \
} while(0)
#define sort6(a, b, c, d, e, f) do { \
sort2(a, b); sort2(c, d); sort2(e, f); \
sort2(a, c); sort2(b, d); sort2(e, f); \
sort2(a, e); sort2(b, f); sort2(c, e); \
sort2(d, f); sort2(b, c); sort2(d, e); \
sort2(c, d); \
} while(0)
#define sort7(a, b, c, d, e, f, g) do { \
sort2(a, b); sort2(c, d); sort2(a, c); \
sort2(b, d); sort2(b, c); sort2(e, f); \
sort2(e, g); sort2(f, g); sort2(a, e); \
sort2(b, f); sort2(c, g); sort2(b, e); \
sort2(d, g); sort2(c, e); sort2(b, c); \
sort2(d, f); sort2(d, e); \
} while(0)
#define sort8(a,b,c,d,e,f,g,h) do { \
sort2(a,b); sort2(c,d); sort2(e,f); sort2(g,h); \
sort2(a,c); sort2(b,d); sort2(e,g); sort2(f,h); \
sort2(b,c); sort2(f,g); \
sort2(a,e); sort2(b,f); sort2(c,g); sort2(d,h); \
sort2(c,e); sort2(d,f); \
sort2(b,c); sort2(d,e); sort2(f,g); \
} while (0)
#define sort9(a,b,c,d,e,f,g,h,i) do { \
sort2(a,d); sort2(b,h); sort2(c,f); sort2(e,i); \
sort2(a,h); sort2(c,e); sort2(d,i); sort2(f,g); \
sort2(a,c); sort2(b,d); sort2(e,f); sort2(h,i); \
sort2(b,e); sort2(d,g); sort2(f,h); \
sort2(a,b); sort2(c,e); sort2(d,f); sort2(g,i); \
sort2(c,d); sort2(e,f); sort2(g,h); \
sort2(b,c); sort2(d,e); sort2(f,g); \
} while (0)
#define sort10(a,b,c,d,e,f,g,h,i,j) do { \
sort2(a,i); sort2(b,j); sort2(c,h); sort2(d,f); sort2(e,g); \
sort2(a,c); sort2(b,e); sort2(f,i); sort2(h,j); \
sort2(a,d); sort2(c,e); sort2(f,h); sort2(g,j); \
sort2(a,b); sort2(d,g); sort2(i,j); \
sort2(b,f); sort2(c,d); sort2(e,i); sort2(g,h); \
sort2(b,c); sort2(d,f); sort2(e,g); sort2(h,i); \
sort2(c,d); sort2(e,f); sort2(g,h); \
sort2(d,e); sort2(f,g); \
} while (0)
#define sort11(a,b,c,d,e,f,g,h,i,j,k) do { \
sort2(a,j); sort2(b,g); sort2(c,e); sort2(d,h); sort2(f,i); \
sort2(a,b); sort2(d,f); sort2(e,k); sort2(g,j); sort2(h,i); \
sort2(b,d); sort2(c,f); sort2(e,h); sort2(i,k); \
sort2(a,e); sort2(b,c); sort2(d,h); sort2(f,j); sort2(g,i); \
sort2(a,b); sort2(c,g); sort2(e,f); sort2(h,i); sort2(j,k); \
sort2(c,e); sort2(d,g); sort2(f,h); sort2(i,j); \
sort2(b,c); sort2(d,e); sort2(f,g); sort2(h,i); \
sort2(c,d); sort2(e,f); sort2(g,h); \
} while (0)
#define sort12(a,b,c,d,e,f,g,h,i,j,k,l) do { \
sort2(a,i); sort2(b,h); sort2(c,g); sort2(d,l); sort2(e,k); sort2(f,j); \
sort2(a,c); sort2(b,e); sort2(d,f); sort2(g,i); sort2(h,k); sort2(j,l); \
sort2(a,b); sort2(c,j); sort2(e,h); sort2(f,g); sort2(k,l); \
sort2(b,d); sort2(c,h); sort2(e,j); sort2(i,k); \
sort2(a,b); sort2(c,d); sort2(e,f); sort2(g,h); sort2(i,j); sort2(k,l); \
sort2(b,c); sort2(d,f); sort2(g,i); sort2(j,k); \
sort2(c,e); sort2(d,g); sort2(f,i); sort2(h,j); \
sort2(b,c); sort2(d,e); sort2(f,g); sort2(h,i); sort2(j,k); \
} while (0)
static void sorting_network(BLQS_TYPE* l, int partszm1_min_1) {
switch (partszm1_min_1) {
case 0: break;
case 1: sort2(l[0],l[1]); break;
case 2: sort3(l[0],l[1],l[2]); break;
case 3: sort4(l[0],l[1],l[2],l[3]); break;
case 4: sort5(l[0],l[1],l[2],l[3],l[4]); break;
case 5: sort6(l[0],l[1],l[2],l[3],l[4],l[5]); break;
case 6: sort7(l[0],l[1],l[2],l[3],l[4],l[5],l[6]); break;
case 7: sort8(l[0],l[1],l[2],l[3],l[4],l[5],l[6],l[7]); break;
case 8: sort9(l[0],l[1],l[2],l[3],l[4],l[5],l[6],l[7],l[8]); break;
case 9: sort10(l[0],l[1],l[2],l[3],l[4],l[5],l[6],l[7],l[8],l[9]); break;
case 10: sort11(l[0],l[1],l[2],l[3],l[4],l[5],l[6],l[7],l[8],l[9],l[10]); break;
case 11: sort12(l[0],l[1],l[2],l[3],l[4],l[5],l[6],l[7],l[8],l[9],l[10],l[11]); break;
}
}
#define med5(a,b,c,d,e) do { \
sort2(a,b); sort2(c,d); sort2(a,c); \
sort2(b,d); sort2(b,c); sort2(c,e); \
sort2(b,c); \
} while(0)
static BLQS_TYPE* partition_small(BLQS_TYPE* left, BLQS_TYPE* right) {
BLQS_TYPE* outerleft = left;
BLQS_TYPE* pivp = left + 6;
BLQS_TYPE piv = *pivp;
BLQS_TYPE l1 = left[1],l2 = left[2];
BLQS_TYPE r1 = right[-1], r0 = *right;
med5(l1, l2, piv, r1, r0);
left[1] = l1; left[2] = l2;
right[-1] = r1; *right = r0;
left += 3;
right -= 2;
*pivp = *outerleft;
BLQS_TYPE swbuf[SMALLPART];
BLQS_TYPE* sw = swbuf;
BLQS_TYPE* lwr = left;
while (right - left >= UNROLL) for (int i = UNROLL; i--;) {
BLQS_TYPE x = *left++;
if (BLQS_CMP(x, piv)) *lwr++ = x;
else *sw++ = x;
}
while (left <= right) {
BLQS_TYPE x = *left++;
if (BLQS_CMP(x, piv)) *lwr++ = x;
else *sw++ = x;
}
memcpy(lwr, swbuf, (sw - swbuf) * sizeof(BLQS_TYPE));
lwr -= 1;
*outerleft = *lwr;
*lwr = piv;
return lwr;
}
static BLQS_TYPE* partition(BLQS_TYPE* left, BLQS_TYPE* right) {
BLQS_TYPE* outerleft = left;
BLQS_TYPE* pivp = left + (right - left) / 2;
BLQS_TYPE piv = *pivp;
med5(left[1],left[2],left[3],left[4],left[5]);
med5(left[11],left[12],left[13],left[14],left[15]);
med5(pivp[-2], pivp[-1], piv, pivp[1], pivp[2]);
med5(right[-14], right[-13], right[-12], right[-11], right[-10]);
med5(right[-4], right[-3], right[-2], right[-1], right[0]);
med5(left[3], left[13], piv, right[-12], right[-2]);
left += 1;
*pivp = *outerleft;
BLQS_TYPE swbuf[SWSZ];
BLQS_TYPE *rwr = right, *sw = swbuf;
BLQS_TYPE *lwr = left;
while (UNROLL < SWSZ - (sw - swbuf) && left < right - UNROLL) {
ptrdiff_t avail = min(right - left, SWSZ - (sw - swbuf));
BLQS_TYPE* endp = right - avail;
while (right > endp + UNROLL) {
for (int i = UNROLL; i--;) {
BLQS_TYPE x = *right--;
if (BLQS_CMP(x, piv)) *sw++ = x;
else *rwr-- = x;
}
}
}
while (right - left >= UNROLL &&
(rwr - right > UNROLL || left - lwr > UNROLL)) {
while (rwr - right > UNROLL && right - left >= UNROLL) {
for (int i = UNROLL; i--;) {
BLQS_TYPE x = *left++;
if (BLQS_CMP(x, piv)) *lwr++ = x;
else *rwr-- = x;
}
}
while (left - lwr > UNROLL && right - left >= UNROLL) {
for (int i = UNROLL; i--;) {
BLQS_TYPE x = *right--;
if (BLQS_CMP(x, piv)) *lwr++ = x;
else *rwr-- = x;
}
}
}
do {
while (rwr > right && left <= right) {
BLQS_TYPE x = *left++;
if (BLQS_CMP(x, piv)) *lwr++ = x;
else *rwr-- = x;
}
while (lwr < left && left <= right) {
BLQS_TYPE x = *right--;
if (BLQS_CMP(x, piv)) *lwr++ = x;
else *rwr-- = x;
}
} while ((lwr < left||rwr > right) && left <= right);
while (left <= right && !BLQS_CMP(*right, piv)) {
right--;
rwr--;
}
memcpy(lwr, swbuf, (sw - swbuf) * sizeof(BLQS_TYPE));
*outerleft = *rwr;
*rwr = piv;
return rwr;
}
static void smallsort(BLQS_TYPE* left, BLQS_TYPE* right) {
while (right - left > 11) {
BLQS_TYPE* mid = partition_small(left, right);
smallsort(left, mid - 1);
left = mid + 1;
}
sorting_network(left, right - left);
}
static void sortr(BLQS_TYPE* left, BLQS_TYPE* right) {
while (1) {
ptrdiff_t partszm1 = right - left;
if (partszm1 <= SMALLPART) break;
BLQS_TYPE* mid = partition(left, right);
if (mid - left < partszm1 / 16) {
if (mid > left) sortr(left, mid - 1);
BLQS_TYPE piv = *mid;
mid += 1;
// collect duplicates
for (BLQS_TYPE* p = mid; p <= right; p++) {
if (!BLQS_CMP(piv, *p)) {
BLQS_TYPE h = *mid;
*mid = *p;
*p = h;
mid++;
}
}
left = mid;
if (right - left < SMALLPART) break;
mid = partition(left, right);
}
if (mid - left < right - mid) {
sortr(left, mid - 1);
left = mid + 1;
}
else {
sortr(mid + 1, right);
right = mid - 1;
}
}
smallsort(left, right);
}
static void sort(BLQS_TYPE* data, int len) {
if (len < 2) return;
for (BLQS_TYPE* p = data + 1; p < data + len; p++) {
if (BLQS_CMP(*p, *(p - 1))) goto not_sorted;
}
return;
not_sorted:
sortr(data, data + len - 1);
}
#endif