Bugfix + added M17 decoder to the linux CI

2025-10-11 00:09:55 +02:00 · 2021-10-02 17:01:23 +02:00
parent 26fa23c8f5
commit b4213ea049
86 changed files with 6601 additions and 20 deletions
--- a/core/libcorrect/tools/CMakeLists.txt
+++ b/core/libcorrect/tools/CMakeLists.txt
@@ -0,0 +1,29 @@
+add_executable(rs_find_primitive_poly EXCLUDE_FROM_ALL find_rs_primitive_poly.c)
+target_link_libraries(rs_find_primitive_poly correct_static)
+set(all_tools ${all_tools} rs_find_primitive_poly)
+
+if(HAVE_LIBFEC)
+    add_executable(conv_find_libfec_poly EXCLUDE_FROM_ALL find_conv_libfec_poly.c)
+    target_link_libraries(conv_find_libfec_poly correct_static fec)
+    set(all_tools ${all_tools} conv_find_libfec_poly)
+endif()
+
+if(HAVE_SSE)
+    add_executable(conv_find_optim_poly EXCLUDE_FROM_ALL find_conv_optim_poly.c $<TARGET_OBJECTS:error_sim_sse>)
+    target_link_libraries(conv_find_optim_poly correct_static)
+    set(all_tools ${all_tools} conv_find_optim_poly)
+
+    add_executable(conv_find_optim_poly_annealing EXCLUDE_FROM_ALL find_conv_optim_poly_annealing.c $<TARGET_OBJECTS:error_sim_sse>)
+    target_link_libraries(conv_find_optim_poly_annealing correct_static)
+    set(all_tools ${all_tools} conv_find_optim_poly_annealing)
+else()
+    add_executable(conv_find_optim_poly EXCLUDE_FROM_ALL find_conv_optim_poly.c $<TARGET_OBJECTS:error_sim>)
+    target_link_libraries(conv_find_optim_poly correct_static)
+    set(all_tools ${all_tools} conv_find_optim_poly)
+
+    add_executable(conv_find_optim_poly_annealing EXCLUDE_FROM_ALL find_conv_optim_poly_annealing.c $<TARGET_OBJECTS:error_sim>)
+    target_link_libraries(conv_find_optim_poly_annealing correct_static)
+    set(all_tools ${all_tools} conv_find_optim_poly_annealing)
+endif()
+
+add_custom_target(tools DEPENDS ${all_tools})
--- a/core/libcorrect/tools/find_conv_libfec_poly.c
+++ b/core/libcorrect/tools/find_conv_libfec_poly.c
@@ -0,0 +1,279 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <time.h>
+#include <stddef.h>
+#include <assert.h>
+
+#include <correct.h>
+#include <fec.h>
+
+// this program allows us to find all of the polynomials that come with libfec
+// this way, we can provide compatibility with libfec-encoded streams and vice versa
+// we can do this without directly copy-pasting from libfec's source, thanks
+//   to this finder
+
+typedef struct {
+    void *vit;
+    int update_len;
+    int (*init)(void *, int);
+    int (*update)(void *, unsigned char *, int);
+    int (*chainback)(void *, unsigned char *, unsigned int, unsigned int);
+} libfec_decoder_t;
+
+void byte2bit(uint8_t *bytes, uint8_t *bits, size_t n_bits) {
+    unsigned char cmask = 0x80;
+    for (size_t i = 0; i < n_bits; i++) {
+        bits[i] = (bytes[i/8] & cmask) ? 255 : 0;
+        cmask >>= 1;
+        if (!cmask) {
+            cmask = 0x80;
+        }
+    }
+}
+
+correct_convolutional_polynomial_t *resize_poly_list(correct_convolutional_polynomial_t *polys, size_t cap) {
+    polys = realloc(polys, cap * sizeof(correct_convolutional_polynomial_t));
+    return polys;
+}
+
+void find_poly_coeff(size_t rate, size_t order, uint8_t *msg, size_t msg_len, libfec_decoder_t libfec, correct_convolutional_polynomial_t **polys_dest, size_t *polys_len, size_t search_coeff) {
+    // find a single coefficient of an unknown convolutional polynomial
+    // we are given a payload to encode, and we'll test all possible coefficients
+    //    to see which ones yield correct decodings by libfec, which has some
+    //    unknown polynomial "baked in"
+
+    // temp poly (this will be the one we search with)
+    correct_convolutional_polynomial_t *poly = malloc(rate * sizeof(correct_convolutional_polynomial_t));
+
+    // what's the largest coefficient value we'll test?
+    correct_convolutional_polynomial_t maxcoeff = (1 << order) - 1;
+
+    // note that we start about half way in
+    // this sum asks that we have the
+    //   a) highest order bit set
+    //   b) lowest order bit set
+    // we're only interested in coefficient values for which this is
+    //   true because if it weren't, the coefficient would actually be
+    //   of a smaller order than its supposed given order
+    correct_convolutional_polynomial_t startcoeff = (1 << (order - 1)) + 1;
+
+    // the values of this don't really matter except for the coeff we're searching for
+    // but just to be safe, we set them all
+    for (size_t i = 0; i < rate; i++) {
+        poly[i] = startcoeff;
+    }
+
+    // create a dummy encoder so that we can find how long the resulting encoded value is
+    correct_convolutional *conv_dummy = correct_convolutional_create(rate, order, poly);
+    size_t enclen_bits = correct_convolutional_encode_len(conv_dummy, msg_len);
+    size_t enclen = (enclen_bits % 8) ? (enclen_bits / 8 + 1) : enclen_bits / 8;
+    correct_convolutional_destroy(conv_dummy);
+
+    // compact encoded format (this comes from libcorrect)
+    uint8_t *encoded = malloc(enclen * sizeof(uint8_t));
+    // soft encoded format (this goes to libfec, one byte per bit)
+    uint8_t *encoded_bits = malloc(enclen * 8 * sizeof(uint8_t));
+    // resulting decoded message which we'll compare to our given payload
+    uint8_t *msg_cmp = malloc(msg_len * sizeof(uint8_t));
+
+    // we keep a list of coefficients which yielded correct decodings
+    // there could be 0, 1, or more than 1, and we'll return all of them
+    // we'll dynamically resize this as we go
+    size_t polys_cap = 1;
+    *polys_len = 0;
+    correct_convolutional_polynomial_t *polys = NULL;
+    polys = resize_poly_list(polys, polys_cap);
+
+    // iteration constants -- we go by 2 because we want the lowest order bit to
+    // stay set
+    for (correct_convolutional_polynomial_t i = startcoeff; i <= maxcoeff; i += 2) {
+        poly[search_coeff] = i;
+        correct_convolutional *conv = correct_convolutional_create(rate, order, poly);
+
+        correct_convolutional_encode(conv, (uint8_t*)msg, msg_len, encoded);
+        byte2bit(encoded, encoded_bits, enclen);
+
+        // now erase all the bits we're not searching for
+        for (size_t i = 0; i < msg_len * 8; i++) {
+            for (size_t j = 0; j < rate; j++) {
+                if (j != search_coeff) {
+                    // 128 is a soft erasure
+                    encoded_bits[i * rate + j] = 128;
+                }
+            }
+        }
+
+        libfec.init(libfec.vit, 0);
+        libfec.update(libfec.vit, encoded_bits, libfec.update_len);
+        libfec.chainback(libfec.vit, msg_cmp, 8 * msg_len, 0);
+
+        correct_convolutional_destroy(conv);
+
+        if (memcmp(msg_cmp, msg, msg_len) == 0) {
+            // match found
+
+            // resize list to make room
+            if (*polys_len == polys_cap) {
+                polys = resize_poly_list(polys, polys_cap * 2);
+                polys_cap *= 2;
+            }
+            polys[*polys_len] = i;
+            *polys_len = *polys_len + 1;
+        }
+    }
+
+    polys = resize_poly_list(polys, *polys_len);
+    *polys_dest = polys;
+    free(poly);
+    free(msg_cmp);
+    free(encoded);
+    free(encoded_bits);
+}
+
+// we choose 2 bytes because we need a payload that's longer than
+// the shift register under test. since that includes an order 15
+// s.r., we need at least 15 bits.
+size_t msg_len = 2;
+
+void find_poly(size_t rate, size_t order, libfec_decoder_t libfec, correct_convolutional_polynomial_t *poly) {
+    // find the complete set of coefficients that are "baked in" to
+    //   one particular method of libfec
+    // most of this method is described by find_poly_coeff
+
+    // for each coeff we want to find, we'll generate random 2-byte payloads and give
+    //   them to find_poly_coeff. If find_poly_coeff returns an empty list, we
+    //   try again. If it returns a nonempty list, then we find the intersection of
+    //   all the coefficient values find_poly_coeff has given us so far (we start
+    //   with the complete set). we are finished when only one coeff value remains
+
+    // we perform this process for each coeff e.g. 6 times for a rate 1/6 polynomial
+
+    uint8_t msg[msg_len];
+
+    // this is the list returned to us by find_poly_coeff
+    correct_convolutional_polynomial_t *polys;
+    // the list's length is written here
+    size_t polys_len;
+
+    printf("rate 1/%zu order %zu poly:", rate, order);
+
+    for (size_t search_coeff = 0; search_coeff < rate; search_coeff++) {
+        correct_convolutional_polynomial_t *fit = NULL;
+        size_t fit_len = 0;
+        size_t fit_cap = 0;
+        bool done = false;
+
+        while (!done) {
+            for (size_t i = 0; i < msg_len; i++) {
+                msg[i] = rand() % 256;
+            }
+            find_poly_coeff(rate, order, msg, msg_len, libfec, &polys, &polys_len, search_coeff);
+
+            if (polys_len == 0) {
+                // skip if none fit (this is a special case)
+                continue;
+            }
+
+            if (fit_len == 0) {
+                // the very first intersection
+                // we'll just copy the list handed to us
+                fit_cap = polys_len;
+                fit_len = polys_len;
+                fit = resize_poly_list(fit, fit_cap);
+                for (size_t i = 0; i < polys_len; i++) {
+                    fit[i] = polys[i];
+                }
+            } else {
+                // find intersection
+                ptrdiff_t polys_iter = 0;
+                ptrdiff_t fit_iter = 0;
+                ptrdiff_t new_fit_iter = 0;
+                // the lists generated by find_poly_coeff are sorted
+                // so we just retain the sorted property and walk both
+                while (polys_iter < polys_len && fit_iter < fit_len) {
+                    if (polys[polys_iter] < fit[fit_iter]) {
+                        polys_iter++;
+                    } else if (polys[polys_iter] > fit[fit_iter]) {
+                        fit_iter++;
+                    } else {
+                        fit[new_fit_iter] = fit[fit_iter];
+                        polys_iter++;
+                        fit_iter++;
+                        new_fit_iter++;
+                    }
+                }
+                // if new_fit_iter is 0 here then we don't intersect at all
+                // in this case we have to restart the search for this coeff
+                if (new_fit_iter != 0) {
+                    fit_len = new_fit_iter;
+                } else {
+                    free(fit);
+                    fit = NULL;
+                    fit_cap = 0;
+                    fit_len = 0;
+                }
+            }
+
+            free(polys);
+
+            if (fit_len == 1) {
+                poly[search_coeff] = fit[0];
+                if (order <= 9) {
+                    printf(" %04o", fit[0]);
+                } else {
+                    printf(" %06o", fit[0]);
+                }
+                done = true;
+            }
+        }
+
+        free(fit);
+    }
+    printf("\n");
+}
+
+int main() {
+    libfec_decoder_t libfec;
+
+    srand(time(NULL));
+
+    setbuf(stdout, NULL);
+
+    correct_convolutional_polynomial_t poly[6];
+
+    libfec.vit = create_viterbi27(8 * msg_len);
+    libfec.update_len = 8 * msg_len + 6;
+    libfec.init = init_viterbi27;
+    libfec.update = update_viterbi27_blk;
+    libfec.chainback = chainback_viterbi27;
+    find_poly(2, 7, libfec, poly);
+    delete_viterbi27(libfec.vit);
+
+    libfec.vit = create_viterbi29(8 * msg_len);
+    libfec.update_len = 8 * msg_len + 8;
+    libfec.init = init_viterbi29;
+    libfec.update = update_viterbi29_blk;
+    libfec.chainback = chainback_viterbi29;
+    find_poly(2, 9, libfec, poly);
+    delete_viterbi29(libfec.vit);
+
+    libfec.vit = create_viterbi39(8 * msg_len);
+    libfec.update_len = 8 * msg_len + 8;
+    libfec.init = init_viterbi39;
+    libfec.update = update_viterbi39_blk;
+    libfec.chainback = chainback_viterbi39;
+    find_poly(3, 9, libfec, poly);
+    delete_viterbi39(libfec.vit);
+
+    libfec.vit = create_viterbi615(8 * msg_len);
+    libfec.update_len = 8 * msg_len + 14;
+    libfec.init = init_viterbi615;
+    libfec.update = update_viterbi615_blk;
+    libfec.chainback = chainback_viterbi615;
+    find_poly(6, 15, libfec, poly);
+    delete_viterbi615(libfec.vit);
+
+    return 0;
+}
--- a/core/libcorrect/tools/find_conv_optim_poly.c
+++ b/core/libcorrect/tools/find_conv_optim_poly.c
@@ -0,0 +1,330 @@
+#include <stdbool.h>
+#include <float.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <stddef.h>
+#include <limits.h>
+#include <pthread.h>
+
+#if HAVE_SSE
+#include "correct/util/error-sim-sse.h"
+typedef correct_convolutional_sse conv_t;
+static conv_t*(*conv_create)(size_t, size_t, const uint16_t *) = correct_convolutional_sse_create;
+static void(*conv_destroy)(conv_t *) = correct_convolutional_sse_destroy;
+static size_t(*conv_enclen)(void *, size_t) = conv_correct_sse_enclen;
+static void(*conv_encode)(void *, uint8_t *, size_t, uint8_t *) = conv_correct_sse_encode;
+static void(*conv_decode)(void *, uint8_t *, size_t, uint8_t *) = conv_correct_sse_decode;
+#else
+#include "correct/util/error-sim.h"
+typedef correct_convolutional conv_t;
+static conv_t*(*conv_create)(size_t, size_t, const uint16_t *) = correct_convolutional_create;
+static void(*conv_destroy)(conv_t *) = correct_convolutional_destroy;
+static size_t(*conv_enclen)(void *, size_t) = conv_correct_enclen;
+static void(*conv_encode)(void *, uint8_t *, size_t, uint8_t *) = conv_correct_encode;
+static void(*conv_decode)(void *, uint8_t *, size_t, uint8_t *) = conv_correct_decode;
+#endif
+
+typedef struct {
+    conv_t *conv;
+    correct_convolutional_polynomial_t *poly;
+} conv_tester_t;
+
+typedef struct {
+    int *distances;
+    float cost;
+    correct_convolutional_polynomial_t *poly;
+} conv_result_t;
+
+int compare_conv_results(const void *avoid, const void *bvoid) {
+    const conv_result_t *a = (const conv_result_t *)avoid;
+    const conv_result_t *b = (const conv_result_t *)bvoid;
+
+    if (a->cost > b->cost) {
+        return 1;
+    }
+    return -1;
+}
+
+typedef struct {
+    size_t rate;
+    size_t order;
+    conv_result_t *items;
+    size_t items_len;
+    conv_testbench *scratch;
+    uint8_t *msg;
+    size_t msg_len;
+    size_t test_offset;
+    double bpsk_voltage;
+} exhaustive_thread_args;
+
+void *search_exhaustive_thread(void *vargs) {
+    exhaustive_thread_args *args = (exhaustive_thread_args *)vargs;
+    conv_t *conv;
+    for (size_t i = 0; i < args->items_len; i++) {
+        conv = conv_create(args->rate, args->order, args->items[i].poly);
+        args->scratch->encode = conv_encode;
+        args->scratch->encoder = conv;
+        args->scratch->decode = conv_decode;
+        args->scratch->decoder = conv;
+        args->items[i].distances[args->test_offset] += test_conv_noise(args->scratch, args->msg, args->msg_len, args->bpsk_voltage);
+        conv_destroy(conv);
+    }
+    pthread_exit(NULL);
+}
+
+void search_exhaustive(size_t rate, size_t order,
+                       size_t n_bytes, uint8_t *msg,
+                       conv_testbench **scratches, size_t num_scratches,
+                       float *weights,
+                       conv_result_t *items,
+                       size_t items_len, double bpsk_voltage) {
+
+    exhaustive_thread_args *args = malloc(num_scratches * sizeof(exhaustive_thread_args));
+    pthread_t *threads = malloc(num_scratches * sizeof(pthread_t));
+
+    for (size_t i = 0; i < num_scratches; i++) {
+        args[i].rate = rate;
+        args[i].order = order;
+        args[i].items = items;
+        args[i].items_len = items_len;
+        args[i].scratch = scratches[i];
+        args[i].msg = msg;
+        args[i].msg_len = n_bytes;
+        args[i].test_offset = i;
+        args[i].bpsk_voltage = bpsk_voltage;
+        pthread_attr_t attr;
+        pthread_attr_init(&attr);
+        pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
+        pthread_create(&threads[i], &attr, search_exhaustive_thread, &args[i]);
+    }
+
+    for (size_t i = 0; i < num_scratches; i++) {
+        pthread_join(threads[i], NULL);
+    }
+
+    free(args);
+    free(threads);
+
+}
+
+void search_exhaustive_init(conv_result_t *items, size_t items_len,
+                            size_t num_scratches) {
+    for (size_t i = 0; i < items_len; i++) {
+        for (size_t j = 0; j < num_scratches; j++) {
+            items[i].distances[j] = 0;
+        }
+    }
+}
+
+void search_exhaustive_fin(conv_result_t *items, size_t items_len,
+                           float *weights, size_t weights_len) {
+    for (size_t i = 0; i < items_len; i++) {
+        items[i].cost = 0;
+        for (size_t j = 0; j < weights_len; j++) {
+            items[i].cost += weights[j] * items[i].distances[j];
+        }
+    }
+
+    qsort(items, items_len, sizeof(conv_result_t), compare_conv_results);
+}
+
+const size_t max_block_len = 16384;
+const size_t max_msg_len = 50000000;
+
+void test(size_t rate, size_t order,
+          conv_tester_t start, conv_testbench **scratches,
+          size_t num_scratches, float *weights,
+          size_t n_bytes, double *eb_n0,
+          double bpsk_bit_energy, size_t n_iter,
+          double bpsk_voltage) {
+
+    uint8_t *msg = malloc(max_block_len * sizeof(uint8_t));
+
+    correct_convolutional_polynomial_t maxcoeff = (1 << order) - 1;
+    correct_convolutional_polynomial_t startcoeff = (1 << (order - 1)) + 1;
+    size_t num_polys = (maxcoeff - startcoeff) / 2 + 1;
+    size_t convs_len = 1;
+    for (size_t i = 0; i < rate; i++) {
+        convs_len *= num_polys;
+    }
+
+    conv_result_t *exhaustive = malloc(convs_len * sizeof(conv_result_t));
+    correct_convolutional_polynomial_t *iter_poly = malloc(rate * sizeof(correct_convolutional_polynomial_t));
+
+    for (size_t i = 0; i < rate; i++) {
+        iter_poly[i] = startcoeff;
+    }
+
+    // init exhaustive with all polys
+    for (size_t i = 0; i < convs_len; i++) {
+        exhaustive[i].poly = malloc(rate * sizeof(correct_convolutional_polynomial_t));
+        exhaustive[i].distances = calloc(num_scratches, sizeof(int));
+        exhaustive[i].cost = 0;
+        memcpy(exhaustive[i].poly, iter_poly, rate * sizeof(correct_convolutional_polynomial_t));
+        // this next loop adds 2 with "carry"
+        for (size_t j = 0; j < rate; j++) {
+            if (iter_poly[j] < maxcoeff) {
+                iter_poly[j] += 2;
+                // no more carries to propagate
+                break;
+            } else {
+                iter_poly[j] = startcoeff;
+            }
+        }
+    }
+    free(iter_poly);
+
+    while (convs_len > 20) {
+        size_t bytes_remaining = n_bytes;
+
+        // call init(), which sets all the error metrics to 0 for our new run
+        search_exhaustive_init(exhaustive, convs_len, num_scratches);
+
+        while (bytes_remaining) {
+            // in order to keep memory usage constant, we separate the msg into
+            // blocks and send each one through
+            // each time we do this, we have to calculate a new noise for each
+            // testbench
+
+            size_t block_len = (max_block_len < bytes_remaining) ? max_block_len : bytes_remaining;
+            bytes_remaining -= block_len;
+
+            for (unsigned int j = 0; j < block_len; j++) {
+                msg[j] = rand() % 256;
+            }
+
+            for (size_t i = 0; i < num_scratches; i++) {
+                scratches[i] = resize_conv_testbench(scratches[i], conv_enclen, start.conv, block_len);
+                build_white_noise(scratches[i]->noise, scratches[i]->enclen, eb_n0[i], bpsk_bit_energy);
+            }
+
+            search_exhaustive(rate, order,
+                              block_len, msg, scratches, num_scratches, weights,
+                              exhaustive, convs_len, bpsk_voltage);
+        }
+
+        // call fin(), which calculates a cost metric for all of the distances
+        // added by our msg block iterations and then sorts by this metric
+        search_exhaustive_fin(exhaustive, convs_len, weights, num_scratches);
+
+        // decide parameters for next loop iter
+        // if we've reduced to 20 or fewer items, we're going to just select
+        // those and declare the test done
+        size_t new_convs_len = (convs_len / 2) < 20 ? 20 : convs_len / 2;
+
+        // normally we'll double the message length each time we halve
+        // the number of entries so that each iter takes roughly the
+        // same time but has twice the resolution of the previous run.
+        //
+        // however, if we've reached max_msg_len, then we assume that
+        // the error stats collected are likely converged to whatever
+        // final value they'll take, and adding more length will not
+        // help us get better metrics. if we're at that point, then
+        // we just select the top 20 items and declare them winners
+        if (n_bytes >= max_msg_len) {
+            // converged case
+            new_convs_len = 20;
+        } else {
+            // increase our error metric resolution next run
+            n_bytes *= 2;
+            n_bytes = (n_bytes < max_msg_len) ? n_bytes : max_msg_len;
+        }
+        for (size_t i = new_convs_len; i < convs_len; i++) {
+            // these entries lost, free their memory here
+            free(exhaustive[i].poly);
+            free(exhaustive[i].distances);
+        }
+        convs_len = new_convs_len;
+        printf("exhaustive run: %zu items remain\n", convs_len);
+    }
+
+    for (size_t i = 0; i < convs_len; i++) {
+        for (size_t j = 0; j < rate; j++) {
+            printf(" %06o", exhaustive[i].poly[j]);
+        }
+        printf(":");
+        for (size_t j = 0; j < num_scratches; j++) {
+            printf(" %.2e@%.1fdB", exhaustive[i].distances[j]/((float)n_bytes * 8), eb_n0[j]);
+        }
+        printf("\n");
+    }
+
+    for (size_t i = 0; i < convs_len; i++) {
+        free(exhaustive[i].poly);
+        free(exhaustive[i].distances);
+    }
+    free(exhaustive);
+    free(msg);
+}
+
+int main(int argc, char **argv) {
+    srand(time(NULL));
+
+    size_t rate, order, n_bytes, n_iter;
+
+    sscanf(argv[1], "%zu", &rate);
+    sscanf(argv[2], "%zu", &order);
+    sscanf(argv[3], "%zu", &n_bytes);
+    sscanf(argv[4], "%zu", &n_iter);
+
+    double bpsk_voltage = 1.0/sqrt(2.0);
+    double bpsk_sym_energy = 2*pow(bpsk_voltage, 2.0);
+    double bpsk_bit_energy = bpsk_sym_energy/1.0;
+
+    bpsk_bit_energy = bpsk_sym_energy * rate;  // rate bits transmitted for every input bit
+
+    correct_convolutional_polynomial_t maxcoeff = (1 << order) - 1;
+    correct_convolutional_polynomial_t startcoeff = (1 << (order - 1)) + 1;
+
+    conv_tester_t start;
+
+    start.poly = malloc(rate * sizeof(correct_convolutional_polynomial_t));
+
+    for (size_t i = 0; i < rate; i++) {
+        start.poly[i] = ((maxcoeff - startcoeff) / 2) + startcoeff + 1;
+    }
+
+    start.conv = conv_create(rate, order, start.poly);
+
+    size_t num_scratches = 4;
+    float *weights;
+    conv_testbench **scratches = malloc(num_scratches * sizeof(conv_testbench *));
+    double *eb_n0;
+
+    for (size_t i = 0; i < num_scratches; i++) {
+        scratches[i] = resize_conv_testbench(NULL, conv_enclen, start.conv, max_block_len);
+    }
+
+    switch (order) {
+        case 6:
+            eb_n0 = (double[]){6.0, 5.5, 5.0, 4.5};
+            weights = (float[]){8000, 400, 20, 1};
+            break;
+        case 7:
+            eb_n0 = (double[]){5.5, 5.0, 4.5, 4.0};
+            weights = (float[]){8000, 400, 20, 1};
+            break;
+        case 8:
+        case 9:
+            eb_n0 = (double[]){5.0, 4.5, 4.0, 3.5};
+            weights = (float[]){8000, 400, 20, 1};
+            break;
+        default:
+            eb_n0 = (double[]){4.5, 4.0, 3.5, 3.0};
+            weights = (float[]){8000, 400, 20, 1};
+    }
+
+    test(rate, order, start, scratches, num_scratches, weights, n_bytes, eb_n0, bpsk_bit_energy, n_iter, bpsk_voltage);
+
+    free(start.poly);
+    conv_destroy(start.conv);
+    for (size_t i = 0; i < num_scratches; i++) {
+        free_scratch(scratches[i]);
+    }
+    free(scratches);
+
+    return 0;
+}
--- a/core/libcorrect/tools/find_conv_optim_poly_annealing.c
+++ b/core/libcorrect/tools/find_conv_optim_poly_annealing.c
@@ -0,0 +1,350 @@
+#include <stdbool.h>
+#include <float.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <stddef.h>
+#include <limits.h>
+#include <pthread.h>
+#include <signal.h>
+
+#if HAVE_SSE
+#include "correct/util/error-sim-sse.h"
+typedef correct_convolutional_sse conv_t;
+static conv_t*(*conv_create)(size_t, size_t, const uint16_t *) = correct_convolutional_sse_create;
+static void(*conv_destroy)(conv_t *) = correct_convolutional_sse_destroy;
+static size_t(*conv_enclen)(void *, size_t) = conv_correct_sse_enclen;
+static void(*conv_encode)(void *, uint8_t *, size_t, uint8_t *) = conv_correct_sse_encode;
+static void(*conv_decode)(void *, uint8_t *, size_t, uint8_t *) = conv_correct_sse_decode;
+#else
+#include "correct/util/error-sim.h"
+typedef correct_convolutional conv_t;
+static conv_t*(*conv_create)(size_t, size_t, const uint16_t *) = correct_convolutional_create;
+static void(*conv_destroy)(conv_t *) = correct_convolutional_destroy;
+static size_t(*conv_enclen)(void *, size_t) = conv_correct_enclen;
+static void(*conv_encode)(void *, uint8_t *, size_t, uint8_t *) = conv_correct_encode;
+static void(*conv_decode)(void *, uint8_t *, size_t, uint8_t *) = conv_correct_decode;
+#endif
+
+typedef struct {
+    conv_t *conv;
+    correct_convolutional_polynomial_t *poly;
+} conv_tester_t;
+
+void shuffle(int *a, size_t len) {
+    for (size_t i = 0; i < len - 2; i++) {
+        size_t j = rand() % (len - i) + i;
+        int temp = a[i];
+        a[i] = a[j];
+        a[j] = temp;
+    }
+}
+
+int rand_geo(float p, int max) {
+    int geo = 1;
+    while (geo < max) {
+        if (rand() / (float)RAND_MAX > p) {
+            geo++;
+        } else {
+            break;
+        }
+    }
+    return geo;
+}
+
+void next_neighbor(correct_convolutional_polynomial_t *start,
+                   correct_convolutional_polynomial_t *neighbor, size_t rate, size_t order) {
+    int coeffs[rate * (order - 2)];
+    for (int i = 0; i < rate * (order - 2); i++) {
+        coeffs[i] = i;
+    }
+    shuffle(coeffs, rate * (order - 2));
+
+    memcpy(neighbor, start, rate * sizeof(correct_convolutional_polynomial_t));
+    size_t nflips = rand_geo(0.4, rate * (order - 2));
+    for (int i = 0; i < nflips; i++) {
+        ptrdiff_t index = coeffs[i] / (order - 2);
+        // decide which bit to flip
+        // we avoid the edge bits to prevent creating a degenerate poly
+        neighbor[index] ^= 1 << (coeffs[i] % (order - 2) + 1);
+    }
+}
+
+bool accept(float cost_a, float cost_b, double temperature) {
+    if (cost_b < cost_a) {
+        return true;
+    }
+
+    float p = (float)(rand()) / (float)(RAND_MAX);
+
+    return exp((cost_a - cost_b) / (cost_a * temperature)) > p;
+}
+
+typedef struct {
+    size_t rate;
+    size_t order;
+    correct_convolutional_polynomial_t *poly;
+    unsigned int distance;
+    conv_testbench *scratch;
+    size_t msg_len;
+    double eb_n0;
+    double bpsk_voltage;
+    double bpsk_bit_energy;
+} thread_args;
+
+const size_t max_block_len = 16384;
+
+void *find_cost_thread(void *vargs) {
+    thread_args *args = (thread_args *)vargs;
+    conv_t *conv;
+    uint8_t *msg = malloc(max_block_len);
+
+    conv = conv_create(args->rate, args->order, args->poly);
+    args->distance = 0;
+    conv_testbench *scratch = args->scratch;
+
+    size_t bytes_remaining = args->msg_len;
+    while (bytes_remaining) {
+        // in order to keep memory usage constant, we separate the msg into
+        // blocks and send each one through
+        // each time we do this, we have to calculate a new noise for each
+        // testbench
+        size_t block_len = (max_block_len < bytes_remaining) ? max_block_len : bytes_remaining;
+        bytes_remaining -= block_len;
+
+        for (unsigned int j = 0; j < block_len; j++) {
+            msg[j] = rand() % 256;
+        }
+
+        scratch = resize_conv_testbench(scratch, conv_enclen, conv, block_len);
+        scratch->encode = conv_encode;
+        scratch->encoder = conv;
+        scratch->decode = conv_decode;
+        scratch->decoder = conv;
+
+        build_white_noise(scratch->noise, scratch->enclen, args->eb_n0, args->bpsk_bit_energy);
+
+        args->distance += test_conv_noise(scratch, msg, block_len, args->bpsk_voltage);
+    }
+    conv_destroy(conv);
+    free(msg);
+    pthread_exit(NULL);
+}
+
+float find_cost(size_t rate, size_t order, correct_convolutional_polynomial_t *poly, size_t msg_len,
+                conv_testbench **scratches, size_t num_scratches, float *weights, double *eb_n0,
+                double bpsk_voltage, double bpsk_bit_energy) {
+    thread_args *args = malloc(num_scratches * sizeof(thread_args));
+    pthread_t *threads = malloc(num_scratches * sizeof(pthread_t));
+
+    for (size_t i = 0; i < num_scratches; i++) {
+        args[i].rate = rate;
+        args[i].order = order;
+        args[i].poly = poly;
+        args[i].scratch = scratches[i];
+        args[i].msg_len = msg_len;
+        args[i].eb_n0 = eb_n0[i];
+        args[i].bpsk_voltage = bpsk_voltage;
+        args[i].bpsk_bit_energy = bpsk_bit_energy;
+        pthread_attr_t attr;
+        pthread_attr_init(&attr);
+        pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
+        pthread_create(&threads[i], &attr, find_cost_thread, &args[i]);
+    }
+
+    for (size_t i = 0; i < num_scratches; i++) {
+        pthread_join(threads[i], NULL);
+    }
+
+    float cost = 0;
+    printf("poly:");
+    for (size_t i = 0; i < rate; i++) {
+        printf(" %06o", poly[i]);
+    }
+    printf(" error:");
+    for (size_t i = 0; i < num_scratches; i++) {
+        cost += weights[i] * args[i].distance;
+        printf(" %.2e@%.1fdB", (args[i].distance / (float)(msg_len * 8)), eb_n0[i]);
+    }
+    printf("\n");
+
+    free(args);
+    free(threads);
+
+    return cost;
+}
+
+static bool terminated = false;
+
+void sig_handler(int sig) {
+    if (sig == SIGINT || sig == SIGTERM || sig == SIGHUP) {
+        if (!terminated) {
+            terminated = true;
+            printf("terminating after current poly\n");
+        }
+    }
+}
+
+void search_simulated_annealing(size_t rate, size_t order, size_t n_steps, conv_tester_t *start,
+                                size_t n_bytes, conv_testbench **scratches, size_t num_scratches,
+                                float *weights, double start_temperature, double cooling_factor,
+                                double *eb_n0, double bpsk_voltage, double bpsk_bit_energy) {
+    // perform simulated annealing to find the optimal polynomial
+
+    float cost = find_cost(rate, order, start->poly, n_bytes, scratches, num_scratches, weights,
+                           eb_n0, bpsk_voltage, bpsk_bit_energy);
+
+    correct_convolutional_polynomial_t *neighbor_poly =
+        malloc(rate * sizeof(correct_convolutional_polynomial_t));
+
+    correct_convolutional_polynomial_t *state =
+        malloc(rate * sizeof(correct_convolutional_polynomial_t));
+    correct_convolutional_polynomial_t *best =
+        malloc(rate * sizeof(correct_convolutional_polynomial_t));
+
+    float best_cost = cost;
+
+    memcpy(state, start->poly, rate * sizeof(correct_convolutional_polynomial_t));
+    memcpy(best, start->poly, rate * sizeof(correct_convolutional_polynomial_t));
+
+    double temperature = start_temperature;
+
+    for (size_t i = 0; i < n_steps; i++) {
+        next_neighbor(state, neighbor_poly, rate, order);
+        float neighbor_cost =
+            find_cost(rate, order, neighbor_poly, n_bytes, scratches, num_scratches, weights, eb_n0,
+                      bpsk_voltage, bpsk_bit_energy);
+        if (accept(cost, neighbor_cost, temperature)) {
+            // we're moving to our neighbor's house
+            memcpy(state, neighbor_poly, rate * sizeof(correct_convolutional_polynomial_t));
+            cost = neighbor_cost;
+        } else {
+            // actually where we live now is nice
+        }
+
+        if (cost < best_cost) {
+            best_cost = cost;
+            memcpy(best, state, rate * sizeof(correct_convolutional_polynomial_t));
+        }
+
+        temperature *= cooling_factor;
+
+        if (terminated) {
+            break;
+        }
+    }
+
+    printf("last state:");
+    for (size_t i = 0; i < rate; i++) {
+        printf(" %06o", state[i]);
+    }
+    printf("\n");
+
+    printf("best state:");
+    for (size_t i = 0; i < rate; i++) {
+        printf(" %06o", best[i]);
+    }
+
+    memcpy(start->poly, best, rate * sizeof(correct_convolutional_polynomial_t));
+
+    free(state);
+    free(best);
+    free(neighbor_poly);
+}
+
+void test_sa(size_t rate, size_t order, conv_tester_t start, conv_testbench **scratches,
+             size_t num_scratches, float *weights, size_t n_bytes, double *eb_n0,
+             double bpsk_bit_energy, size_t n_iter, double bpsk_voltage) {
+    for (size_t i = 0; i < n_iter; i++) {
+        double temperature = (i == 0) ? 0.5 : 250;
+        double cooling_factor = (i == 0) ? 0.985 : 0.95;
+        size_t n_steps = (i == 0) ? 500 : 100;
+
+        search_simulated_annealing(rate, order, n_steps, &start, n_bytes, scratches, num_scratches,
+                                   weights, temperature, cooling_factor, eb_n0, bpsk_voltage,
+                                   bpsk_bit_energy);
+    }
+}
+
+int main(int argc, char **argv) {
+    srand(time(NULL));
+
+    signal(SIGINT, sig_handler);
+    signal(SIGTERM, sig_handler);
+    signal(SIGHUP, sig_handler);
+
+    size_t rate, order, n_bytes, n_iter;
+
+    sscanf(argv[1], "%zu", &rate);
+    sscanf(argv[2], "%zu", &order);
+    sscanf(argv[3], "%zu", &n_bytes);
+    sscanf(argv[4], "%zu", &n_iter);
+
+    double bpsk_voltage = 1.0 / sqrt(2.0);
+    double bpsk_sym_energy = 2 * pow(bpsk_voltage, 2.0);
+    double bpsk_bit_energy = bpsk_sym_energy / 1.0;
+
+    bpsk_bit_energy = bpsk_sym_energy * rate;  // rate bits transmitted for every input bit
+
+    // correct_convolutional_polynomial_t maxcoeff = (1 << order) - 1;
+    correct_convolutional_polynomial_t startcoeff = (1 << (order - 1)) + 1;
+
+    conv_tester_t start;
+
+    start.poly = malloc(rate * sizeof(correct_convolutional_polynomial_t));
+
+    for (size_t i = 0; i < rate; i++) {
+        start.poly[i] = ((rand() % (1 << (order - 2))) << 1) + startcoeff;
+    }
+
+    start.conv = conv_create(rate, order, start.poly);
+
+    size_t num_scratches = 4;
+    float *weights;
+    conv_testbench **scratches = malloc(num_scratches * sizeof(conv_testbench *));
+    double *eb_n0;
+
+    for (size_t i = 0; i < num_scratches; i++) {
+        scratches[i] = resize_conv_testbench(NULL, conv_enclen, start.conv, max_block_len);
+    }
+
+    switch (order) {
+        case 6:
+            eb_n0 = (double[]){6.0, 5.5, 5.0, 4.5};
+            weights = (float[]){8000, 400, 20, 1};
+            break;
+        case 7:
+        case 8:
+            eb_n0 = (double[]){5.5, 5.0, 4.5, 4.0};
+            weights = (float[]){8000, 400, 20, 1};
+            break;
+        case 9:
+        case 10:
+            eb_n0 = (double[]){5.0, 4.5, 4.0, 3.5};
+            weights = (float[]){8000, 400, 20, 1};
+            break;
+        case 11:
+        case 12:
+        case 13:
+            eb_n0 = (double[]){4.5, 4.0, 3.5, 3.0};
+            weights = (float[]){8000, 400, 20, 1};
+            break;
+        default:
+            eb_n0 = (double[]){3.5, 3.0, 2.5, 2.0};
+            weights = (float[]){8000, 400, 20, 1};
+    }
+
+    test_sa(rate, order, start, scratches, num_scratches, weights, n_bytes, eb_n0, bpsk_bit_energy,
+            n_iter, bpsk_voltage);
+
+    free(start.poly);
+    conv_destroy(start.conv);
+    for (size_t i = 0; i < num_scratches; i++) {
+        free_scratch(scratches[i]);
+    }
+    free(scratches);
+
+    return 0;
+}
--- a/core/libcorrect/tools/find_rs_primitive_poly.c
+++ b/core/libcorrect/tools/find_rs_primitive_poly.c
@@ -0,0 +1,51 @@
+#include "correct/reed-solomon.h"
+
+size_t block_size = 255;
+int power_max = 8;
+
+// visit all of the elements from the poly
+bool trypoly(field_operation_t poly, field_logarithm_t *log) {
+    memset(log, 0, block_size + 1);
+    field_operation_t element = 1;
+    log[0] = (field_logarithm_t)0;
+    for (field_operation_t i = 1; i < block_size + 1; i++) {
+        element = element * 2;
+        element = (element > block_size) ? (element ^ poly) : element;
+        if (log[element] != 0) {
+            return false;
+        }
+        log[element] = (field_logarithm_t)i;
+    }
+    return true;
+}
+
+int main() {
+    field_logarithm_t *log = malloc((block_size + 1) * sizeof(field_logarithm_t));
+    for (field_operation_t i = (block_size + 1); i < (block_size + 1) << 1; i++) {
+        if (trypoly(i, log)) {
+            printf("0x%x valid: ", i);
+            field_operation_t poly = i;
+            int power = power_max;
+            while(poly) {
+                if (poly & (block_size + 1)) {
+                    if (power > 1) {
+                        printf("x^%d", power);
+                    } else if (power) {
+                        printf("x");
+                    } else {
+                        printf("1");
+                    }
+                    if (poly & block_size) {
+                        printf(" + ");
+                    }
+                }
+                power--;
+                poly <<= 1;
+                poly &= (block_size << 1) + 1;
+            }
+            printf("\n");
+        }
+    }
+    free(log);
+    return 0;
+}