https://sintheta.nexus/blog/feed.xml

binsearch benchmarks

2024-11-28

Binary search implementations and crude benchmarking

  • V1: iterative straightforward solution
  • V2: iterative slighly optimized solution
    • one less comparison in the loop
  • bsearch(): C stdlib's bsearch w/ custom compare()
/* Heap allocation due to large stack size needed given large N value
 *
 * $ gcc -O0 kr_0301.c -o kr_0301 && ./kr_0301 && rm kr_0301
 *
 * N: 100000000
 * V1 seconds: 26.352719
 * V2 seconds: 17.922611
 * bsearch() : 9.016424
 *
 * $ gcc -O3 kr_0301.c -o kr_0301 && ./kr_0301 && rm kr_0301
 *
 * N: 100000000
 * V1 seconds: 15.006659
 * V2 seconds: 10.209212
 * bsearch() : 4.310327
 *
 */

I wasn't surprised regarding -O0, but the fact V2 stays significantly faster even with highest performance optimizations is quite surprising. Grain of salt: Might be the case I'm not measuring what I think I'm measuring, benchmarking accurately is quite the science and the approach here not super sophisticated.

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define N 100000000

enum VERSION { V1, V2, stdlib};

int64_t binsearchV1(int64_t x, int64_t a[], int64_t n)
{
    int64_t low, mid, high;

    low = 0;
    high = n - 1;

    while (low <= high) 
    {
        mid = (low+high) / 2;

        if (x < a[mid])
            high = mid - 1;
        else if (x > a[mid])
            low = mid + 1;
        else
            return mid;
    }

    return -1;
}

int64_t binsearchV2(int64_t x, int64_t a[], int64_t n)
{
    int64_t low, mid, high;

    low = 0;
    high = n - 1;

    while (low < high) 
    {
        mid = (low+high) / 2 + 1;

        if (x < a[mid])
            high = mid - 1;
        else
            low = mid;
    }
    /* we now only check whether x < a[mid]
     * x == a[low] (previous else) now not a return
     * we implicitly return when low == high by changing while expression `<`
     * we still have to adjust for a one-off error
     * when x >= a[mid] we `low = mid + 1` adjusting low to be one past
     * the potentially correctly found index
     * solution: remove `+1` from `low = mid + 1;` and add then computing mid
     * each beginning of a loop
     */

    return x == a[low] ? low : -1;
}

int compare(const void *a, const void *b) {
    int64_t arg1 = *(const int64_t *)a;
    int64_t arg2 = *(const int64_t *)b;

    if (arg1 < arg2) return -1;
    if (arg1 > arg2) return 1;
    return 0;
}

double benchmark(enum VERSION v, int64_t* a)
{
    clock_t start, end;
    start = clock();

    switch(v)
    {
        case V1:
            for (int64_t i = 0; i < N; i++)
                (void) binsearchV1(a[i], a, N);
        case V2:
            for (int64_t i = 0; i < N; i++)
                (void) binsearchV2(a[i], a, N);
        case stdlib:
            for (int64_t i = 0; i < N; i++)
                (void) bsearch(&a[i], a, N, sizeof(a[0]), compare);
    }

    end = clock();
    return ((double)(end - start)) / CLOCKS_PER_SEC;
}

int main(void)
{
    int64_t* a = malloc(N * sizeof(int64_t));

    for (int64_t i = 0; i < N; i++)
        a[i] = i;

    printf("N: %d\n", N);
    printf("V1 seconds: %lf\n", benchmark(V1, a));
    printf("V2 seconds: %lf\n", benchmark(V2, a));
    printf("bsearch() : %lf\n", benchmark(stdlib, a));
}

/* Heap allocation due to large stack size needed given large N value
 *
 * $ gcc -O0 kr_0301.c -o kr_0301 && ./kr_0301 && rm kr_0301
 *
 * N: 100000000
 * V1 seconds: 26.352719
 * V2 seconds: 17.922611
 * bsearch() : 9.016424
 *
 * $ gcc -O3 kr_0301.c -o kr_0301 && ./kr_0301 && rm kr_0301
 *
 * N: 100000000
 * V1 seconds: 15.006659
 * V2 seconds: 10.209212
 * bsearch() : 4.310327
 *
 */