In the previous post we showed how to use KLEE to generate tests for a simple program using integer variables. In this post we will demonstrate how to use KLEE with arrays and assumptions.

Let’s look at the following implementation of binary search:

int binary_search(int arr[], int size, int target) {
    print_data(arr, size, target);
    int low = 0;
    int high = size - 1;
    int mid;
    while (low <= high) {
        mid = (low + high)/2;
        if (arr[mid] == target) {
            return mid;
        }
        if (arr[mid] < target) {
            low = mid + 1;
        }
        if (arr[mid] > target) {
            high = mid - 1;
        }
    }
    return -1;
}

To check this function, we can create a C file as follows (bin_search0.c):

#include <klee/klee.h>
#include <stdio.h>
#include <assert.h>

void print_data(int arr[], int size, int target) {
    printf("searching for %d in:\n[", target);
    for (int i=0; i < size-1; i++) {
        printf("%d, ", arr[i]);
    }
    printf("%d]\n", arr[size-1]);
}

int binary_search(int arr[], int size, int target) {
    print_data(arr, size, target);
    int low = 0;
    int high = size - 1;
    int mid;
    while (low <= high) {
        mid = (low + high)/2;
        if (arr[mid] == target) {
            return mid;
        }
        if (arr[mid] < target) {
            low = mid + 1;
        }
        if (arr[mid] > target) {
            high = mid - 1;
        }
    }
    return -1;
}

int main() {
  int a[10];
  int x;
  klee_make_symbolic(&a, sizeof(a), "a");
  klee_make_symbolic(&x, sizeof(x), "x");
  int result = binary_search(a, 10, x);
  printf("result = %d\n", result);
  // check correctness
  if (result != -1) {
    assert(a[result] == x);
  } else {
    // if result == -1, then we didn't find it. Therefore, it shouldn't be in the array
    for (int i = 0; i < 10; i++) {
      assert(a[i] != x);
    }
  }
  return 1;
}

In the code above, we are declaring a symbolic array “a” that contains 10 elements; a symbolic int variable to represent the value to search, and call to the binary_search function. If the result is different from -1, then the value of the array at that index should be equal to the value we are searching. Otherwise, no element should be equal to the value we are searching.

We will now run the commands as explained in the previous post:

docker run -v /path_to_klee_dir/klee:/tmp/klee --rm -ti --ulimit='stack=-1:-1' klee/klee:2.1
cd /tmp/klee
clang -I ~/klee_src/include/  -emit-llvm -c -g -O0 -Xclang -disable-O0-optnone bin_search0.c
klee --external-calls=all bin_search0.bc

We obtain the following output:

The line in red shows that there is a failed assertion!

Let’s find the input that causes the assertion to fail:

export LD_LIBRARY_PATH=/home/klee/klee_build/lib/:$LD_LIBRARY_PATH
gcc -I ~/klee_src/include -L ~/klee_build/lib/  bin_search0.c -lkleeRuntest
ls -1 klee-last/*.ktest | awk '{printf("echo \"----------------\"\n echo %s\n KTEST_FILE=%s ./a.out\n", $0, $0)}' | sh

This is the output (I’m leaving only the lines before and after the assertion fail):

In test000008.ktest, we are looking for the value 0 in the array, but the binary search function says that the value is not present.

The array is not sorted!

We want KLEE to check binary search assuming that the array is sorted. Luckily for us, there is a function klee_assume that allows exactly that.

Let’s consider this new version (bin_search1.c):

#include <klee/klee.h>
#include <stdio.h>
#include <assert.h>

void print_data(int arr[], int size, int target) {
    printf("searching for %d in:\n[", target);
    for (int i=0; i < size-1; i++) {
        printf("%d, ", arr[i]);
    }
    printf("%d]\n", arr[size-1]);
}

int binary_search(int arr[], int size, int target) {
    print_data(arr, size, target);
    int low = 0;
    int high = size - 1;
    int mid;
    while (low <= high) {
        mid = (low + high)/2;
        if (arr[mid] == target) {
            return mid;
        }
        if (arr[mid] < target) {
            low = mid + 1;
        }
        if (arr[mid] > target) {
            high = mid - 1;
        }
    }
    return -1;
}

int sorted(int arr[], int size) {
    for (int i = 0; i < size-1; i++) {
        if (arr[i] > arr[i+1]) {
            return 0;
        }
    }
    return 1;
}

int main() {
  unsigned short n;
  klee_make_symbolic(&n, sizeof(n), "n");
  klee_assume(n > 0);
  klee_assume(n < 65535);
  int a[n];
  int x;
  klee_make_symbolic(&a, sizeof(a), "a");
  klee_assume(sorted(a, 10));
  klee_make_symbolic(&x, sizeof(x), "x");
  int result = binary_search(a, 10, x);
  printf("result = %d\n", result);
  if (result != -1) {
    assert(a[result] == x);
  } else {
    // if result == -1, then we didn't find it. Therefore, it shouldn't be in the array
    for (int i = 0; i < 10; i++) {
      assert(a[i] != x);
    }
  }
  return 1;
}

Line 50 contains the line indicating that KLEE should assume that the array is sorted, and lines 33 to 40 implement the check. However, we obtain the following output:

“invalid klee_assume call (provably false”

If we run the tests as before, we see that the assertion always holds and the test input arrays are indeed sorted.

gcc -I ~/klee_src/include -L ~/klee_build/lib/  bin_search0.c -lkleeRuntest
ls -1 klee-last/*.ktest | awk '{printf("echo \"----------------\"\n echo %s\n KTEST_FILE=%s ./a.out\n", $0, $0)}' | sh

However, if you want to make sure that KLEE doesn’t output this kind of error, you can convert the function “sorted” to always exit at the end; and avoid boolean logic which in turn may cause different compilers to apply short-circuit logic and output different results.

Here is a new version of the file (bin_search.c):

#include <klee/klee.h>
#include <stdio.h>
#include <assert.h>

void print_data(int arr[], int size, int target) {
    printf("searching for %d in:\n[", target);
    for (int i=0; i < size-1; i++) {
        printf("%d, ", arr[i]);
    }
    printf("%d]\n", arr[size-1]);
}

int binary_search(int arr[], int size, int target) {
    print_data(arr, size, target);
    int low = 0;
    int high = size - 1;
    int mid;
    while (low <= high) {
        mid = (low + high)/2;
        if (arr[mid] == target) {
            return mid;
        }
        if (arr[mid] < target) {
            low = mid + 1;
        }
        if (arr[mid] > target) {
            high = mid - 1;
        }
    }
    return -1;
}

int sorted(int arr[], int size) {
    int found_error = 0;
    for (int i = 0; i < size-1; i++) {
        found_error = found_error | (arr[i] > arr[i+1]);
    }
    return !found_error;
}

int main() {
  int a[10];
  int x;
  klee_make_symbolic(&a, sizeof(a), "a");
  klee_assume(sorted(a, 10));
  klee_make_symbolic(&x, sizeof(x), "x");
  int result = binary_search(a, 10, x);
  printf("result = %d\n", result);
  if (result != -1) {
    assert(a[result] == x);
  } else {
    // if result == -1, then we didn't find it. Therefore, it shouldn't be in the array
    for (int i = 0; i < 10; i++) {
      assert(a[i] != x);
    }
  }  return 1;
}
No errors!

In particular, we can see that there are test cases for each possible location where the element may be found:

ls -1 klee-last/*.ktest | awk '{printf("KTEST_FILE=%s ./a.out\n", $0)}' | sh  | grep result | sort

This way, we succeeded in checking the correctness of binary search (for an array of 10 elements).

To play with these ideas, you can try inserting bugs or making changes to the code.

For example, what happens if in the while loop we change <= with < ?

while (low <= high)