mirror of
https://github.com/AlexandreRouma/SDRPlusPlus.git
synced 2025-06-28 05:17:50 +02:00
Bugfix + added M17 decoder to the linux CI
This commit is contained in:
2
core/libcorrect/src/CMakeLists.txt
Normal file
2
core/libcorrect/src/CMakeLists.txt
Normal file
@ -0,0 +1,2 @@
|
||||
add_subdirectory(convolutional)
|
||||
add_subdirectory(reed-solomon)
|
5
core/libcorrect/src/convolutional/CMakeLists.txt
Normal file
5
core/libcorrect/src/convolutional/CMakeLists.txt
Normal file
@ -0,0 +1,5 @@
|
||||
set(SRCFILES bit.c metric.c history_buffer.c error_buffer.c lookup.c convolutional.c encode.c decode.c)
|
||||
add_library(correct-convolutional OBJECT ${SRCFILES})
|
||||
if(HAVE_SSE)
|
||||
add_subdirectory(sse)
|
||||
endif()
|
232
core/libcorrect/src/convolutional/bit.c
Normal file
232
core/libcorrect/src/convolutional/bit.c
Normal file
@ -0,0 +1,232 @@
|
||||
#include "correct/convolutional/bit.h"
|
||||
|
||||
bit_writer_t *bit_writer_create(uint8_t *bytes, size_t len) {
|
||||
bit_writer_t *w = calloc(1, sizeof(bit_writer_t));
|
||||
|
||||
if (bytes) {
|
||||
bit_writer_reconfigure(w, bytes, len);
|
||||
}
|
||||
|
||||
return w;
|
||||
}
|
||||
|
||||
void bit_writer_reconfigure(bit_writer_t *w, uint8_t *bytes, size_t len) {
|
||||
w->bytes = bytes;
|
||||
w->len = len;
|
||||
|
||||
w->current_byte = 0;
|
||||
w->current_byte_len = 0;
|
||||
w->byte_index = 0;
|
||||
}
|
||||
|
||||
void bit_writer_destroy(bit_writer_t *w) {
|
||||
free(w);
|
||||
}
|
||||
|
||||
void bit_writer_write(bit_writer_t *w, uint8_t val, unsigned int n) {
|
||||
for (size_t j = 0; j < n; j++) {
|
||||
bit_writer_write_1(w, val);
|
||||
val >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
void bit_writer_write_1(bit_writer_t *w, uint8_t val) {
|
||||
w->current_byte |= val & 1;
|
||||
w->current_byte_len++;
|
||||
|
||||
if (w->current_byte_len == 8) {
|
||||
// 8 bits in a byte -- move to the next byte
|
||||
w->bytes[w->byte_index] = w->current_byte;
|
||||
w->byte_index++;
|
||||
w->current_byte_len = 0;
|
||||
w->current_byte = 0;
|
||||
} else {
|
||||
w->current_byte <<= 1;
|
||||
}
|
||||
}
|
||||
|
||||
void bit_writer_write_bitlist(bit_writer_t *w, uint8_t *l, size_t len) {
|
||||
// first close the current byte
|
||||
// we might have been given too few elements to do that. be careful.
|
||||
size_t close_len = 8 - w->current_byte_len;
|
||||
close_len = (close_len < len) ? close_len : len;
|
||||
|
||||
uint16_t b = w->current_byte;
|
||||
|
||||
for (ptrdiff_t i = 0; i < close_len; i++) {
|
||||
b |= l[i];
|
||||
b <<= 1;
|
||||
}
|
||||
|
||||
|
||||
l += close_len;
|
||||
len -= close_len;
|
||||
|
||||
uint8_t *bytes = w->bytes;
|
||||
size_t byte_index = w->byte_index;
|
||||
|
||||
if (w->current_byte_len + close_len == 8) {
|
||||
b >>= 1;
|
||||
bytes[byte_index] = b;
|
||||
byte_index++;
|
||||
} else {
|
||||
w->current_byte = b;
|
||||
w->current_byte_len += close_len;
|
||||
return;
|
||||
}
|
||||
|
||||
size_t full_bytes = len/8;
|
||||
|
||||
for (size_t i = 0; i < full_bytes; i++) {
|
||||
bytes[byte_index] = l[0] << 7 | l[1] << 6 | l[2] << 5 |
|
||||
l[3] << 4 | l[4] << 3 | l[5] << 2 |
|
||||
l[6] << 1 | l[7];
|
||||
byte_index += 1;
|
||||
l += 8;
|
||||
}
|
||||
|
||||
len -= 8*full_bytes;
|
||||
|
||||
b = 0;
|
||||
for (ptrdiff_t i = 0; i < len; i++) {
|
||||
b |= l[i];
|
||||
b <<= 1;
|
||||
}
|
||||
|
||||
w->current_byte = b;
|
||||
w->byte_index = byte_index;
|
||||
w->current_byte_len = len;
|
||||
}
|
||||
|
||||
void bit_writer_write_bitlist_reversed(bit_writer_t *w, uint8_t *l, size_t len) {
|
||||
l = l + len - 1;
|
||||
|
||||
uint8_t *bytes = w->bytes;
|
||||
size_t byte_index = w->byte_index;
|
||||
uint16_t b;
|
||||
|
||||
if (w->current_byte_len != 0) {
|
||||
size_t close_len = 8 - w->current_byte_len;
|
||||
close_len = (close_len < len) ? close_len : len;
|
||||
|
||||
b = w->current_byte;
|
||||
|
||||
for (ptrdiff_t i = 0; i < close_len; i++) {
|
||||
b |= *l;
|
||||
b <<= 1;
|
||||
l--;
|
||||
}
|
||||
|
||||
len -= close_len;
|
||||
|
||||
if (w->current_byte_len + close_len == 8) {
|
||||
b >>= 1;
|
||||
bytes[byte_index] = b;
|
||||
byte_index++;
|
||||
} else {
|
||||
w->current_byte = b;
|
||||
w->current_byte_len += close_len;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
size_t full_bytes = len/8;
|
||||
|
||||
for (size_t i = 0; i < full_bytes; i++) {
|
||||
bytes[byte_index] = l[0] << 7 | l[-1] << 6 | l[-2] << 5 |
|
||||
l[-3] << 4 | l[-4] << 3 | l[-5] << 2 |
|
||||
l[-6] << 1 | l[-7];
|
||||
byte_index += 1;
|
||||
l -= 8;
|
||||
}
|
||||
|
||||
len -= 8*full_bytes;
|
||||
|
||||
b = 0;
|
||||
for (ptrdiff_t i = 0; i < len; i++) {
|
||||
b |= *l;
|
||||
b <<= 1;
|
||||
l--;
|
||||
}
|
||||
|
||||
w->current_byte = (uint8_t)b;
|
||||
w->byte_index = byte_index;
|
||||
w->current_byte_len = len;
|
||||
}
|
||||
|
||||
void bit_writer_flush_byte(bit_writer_t *w) {
|
||||
if (w->current_byte_len != 0) {
|
||||
w->current_byte <<= (8 - w->current_byte_len);
|
||||
w->bytes[w->byte_index] = w->current_byte;
|
||||
w->byte_index++;
|
||||
w->current_byte_len = 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t bit_writer_length(bit_writer_t *w) {
|
||||
return w->byte_index;
|
||||
}
|
||||
|
||||
uint8_t reverse_byte(uint8_t b) {
|
||||
return (b & 0x80) >> 7 | (b & 0x40) >> 5 | (b & 0x20) >> 3 |
|
||||
(b & 0x10) >> 1 | (b & 0x08) << 1 | (b & 0x04) << 3 |
|
||||
(b & 0x02) << 5 | (b & 0x01) << 7;
|
||||
}
|
||||
|
||||
static uint8_t reverse_table[256];
|
||||
|
||||
void create_reverse_table() {
|
||||
for (uint16_t i = 0; i < 256; i++) {
|
||||
reverse_table[i] = reverse_byte(i);
|
||||
}
|
||||
}
|
||||
|
||||
bit_reader_t *bit_reader_create(const uint8_t *bytes, size_t len) {
|
||||
bit_reader_t *r = calloc(1, sizeof(bit_reader_t));
|
||||
|
||||
static bool reverse_table_created = false;
|
||||
|
||||
if (!reverse_table_created) {
|
||||
create_reverse_table();
|
||||
reverse_table_created = true;
|
||||
}
|
||||
|
||||
if (bytes) {
|
||||
bit_reader_reconfigure(r, bytes, len);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
void bit_reader_reconfigure(bit_reader_t *r, const uint8_t *bytes, size_t len) {
|
||||
r->bytes = bytes;
|
||||
r->len = len;
|
||||
|
||||
r->current_byte_len = 8;
|
||||
r->current_byte = bytes[0];
|
||||
r->byte_index = 0;
|
||||
}
|
||||
|
||||
void bit_reader_destroy(bit_reader_t *r) {
|
||||
free(r);
|
||||
}
|
||||
|
||||
uint8_t bit_reader_read(bit_reader_t *r, unsigned int n) {
|
||||
unsigned int read = 0;
|
||||
unsigned int n_copy = n;
|
||||
|
||||
if (r->current_byte_len < n) {
|
||||
read = r->current_byte & ((1 << r->current_byte_len) - 1);
|
||||
r->byte_index++;
|
||||
r->current_byte = r->bytes[r->byte_index];
|
||||
n -= r->current_byte_len;
|
||||
r->current_byte_len = 8;
|
||||
read <<= n;
|
||||
}
|
||||
|
||||
uint8_t copy_mask = (1 << n) - 1;
|
||||
copy_mask <<= (r->current_byte_len - n);
|
||||
read |= (r->current_byte & copy_mask) >> (r->current_byte_len - n);
|
||||
r->current_byte_len -= n;
|
||||
return reverse_table[read] >> (8 - n_copy);
|
||||
}
|
59
core/libcorrect/src/convolutional/convolutional.c
Normal file
59
core/libcorrect/src/convolutional/convolutional.c
Normal file
@ -0,0 +1,59 @@
|
||||
#include "correct/convolutional/convolutional.h"
|
||||
|
||||
// https://www.youtube.com/watch?v=b3_lVSrPB6w
|
||||
|
||||
correct_convolutional *_correct_convolutional_init(correct_convolutional *conv,
|
||||
size_t rate, size_t order,
|
||||
const polynomial_t *poly) {
|
||||
if (order > 8 * sizeof(shift_register_t)) {
|
||||
// XXX turn this into an error code
|
||||
// printf("order must be smaller than 8 * sizeof(shift_register_t)\n");
|
||||
return NULL;
|
||||
}
|
||||
if (rate < 2) {
|
||||
// XXX turn this into an error code
|
||||
// printf("rate must be 2 or greater\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
conv->order = order;
|
||||
conv->rate = rate;
|
||||
conv->numstates = 1 << order;
|
||||
|
||||
unsigned int *table = malloc(sizeof(unsigned int) * (1 << order));
|
||||
fill_table(conv->rate, conv->order, poly, table);
|
||||
*(unsigned int **)&conv->table = table;
|
||||
|
||||
conv->bit_writer = bit_writer_create(NULL, 0);
|
||||
conv->bit_reader = bit_reader_create(NULL, 0);
|
||||
|
||||
conv->has_init_decode = false;
|
||||
return conv;
|
||||
}
|
||||
|
||||
correct_convolutional *correct_convolutional_create(size_t rate, size_t order,
|
||||
const polynomial_t *poly) {
|
||||
correct_convolutional *conv = malloc(sizeof(correct_convolutional));
|
||||
correct_convolutional *init_conv = _correct_convolutional_init(conv, rate, order, poly);
|
||||
if (!init_conv) {
|
||||
free(conv);
|
||||
}
|
||||
return init_conv;
|
||||
}
|
||||
|
||||
void _correct_convolutional_teardown(correct_convolutional *conv) {
|
||||
free(*(unsigned int **)&conv->table);
|
||||
bit_writer_destroy(conv->bit_writer);
|
||||
bit_reader_destroy(conv->bit_reader);
|
||||
if (conv->has_init_decode) {
|
||||
pair_lookup_destroy(conv->pair_lookup);
|
||||
history_buffer_destroy(conv->history_buffer);
|
||||
error_buffer_destroy(conv->errors);
|
||||
free(conv->distances);
|
||||
}
|
||||
}
|
||||
|
||||
void correct_convolutional_destroy(correct_convolutional *conv) {
|
||||
_correct_convolutional_teardown(conv);
|
||||
free(conv);
|
||||
}
|
321
core/libcorrect/src/convolutional/decode.c
Normal file
321
core/libcorrect/src/convolutional/decode.c
Normal file
@ -0,0 +1,321 @@
|
||||
#include "correct/convolutional/convolutional.h"
|
||||
|
||||
void conv_decode_print_iter(correct_convolutional *conv, unsigned int iter,
|
||||
unsigned int winner_index) {
|
||||
if (iter < 2220) {
|
||||
return;
|
||||
}
|
||||
printf("iteration: %d\n", iter);
|
||||
distance_t *errors = conv->errors->write_errors;
|
||||
printf("errors:\n");
|
||||
for (shift_register_t i = 0; i < conv->numstates / 2; i++) {
|
||||
printf("%2d: %d\n", i, errors[i]);
|
||||
}
|
||||
printf("\n");
|
||||
printf("history:\n");
|
||||
for (shift_register_t i = 0; i < conv->numstates / 2; i++) {
|
||||
printf("%2d: ", i);
|
||||
for (unsigned int j = 0; j <= winner_index; j++) {
|
||||
printf("%d", conv->history_buffer->history[j][i] ? 1 : 0);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void convolutional_decode_warmup(correct_convolutional *conv, unsigned int sets,
|
||||
const uint8_t *soft) {
|
||||
// first phase: load shiftregister up from 0 (order goes from 1 to conv->order)
|
||||
// we are building up error metrics for the first order bits
|
||||
for (unsigned int i = 0; i < conv->order - 1 && i < sets; i++) {
|
||||
// peel off rate bits from encoded to recover the same `out` as in the encoding process
|
||||
// the difference being that this `out` will have the channel noise/errors applied
|
||||
unsigned int out;
|
||||
if (!soft) {
|
||||
out = bit_reader_read(conv->bit_reader, conv->rate);
|
||||
}
|
||||
const distance_t *read_errors = conv->errors->read_errors;
|
||||
distance_t *write_errors = conv->errors->write_errors;
|
||||
// walk all of the state we have so far
|
||||
for (size_t j = 0; j < (1 << (i + 1)); j += 1) {
|
||||
unsigned int last = j >> 1;
|
||||
distance_t dist;
|
||||
if (soft) {
|
||||
if (conv->soft_measurement == CORRECT_SOFT_LINEAR) {
|
||||
dist = metric_soft_distance_linear(conv->table[j], soft + i * conv->rate,
|
||||
conv->rate);
|
||||
} else {
|
||||
dist = metric_soft_distance_quadratic(conv->table[j], soft + i * conv->rate,
|
||||
conv->rate);
|
||||
}
|
||||
} else {
|
||||
dist = metric_distance(conv->table[j], out);
|
||||
}
|
||||
write_errors[j] = dist + read_errors[last];
|
||||
}
|
||||
error_buffer_swap(conv->errors);
|
||||
}
|
||||
}
|
||||
|
||||
void convolutional_decode_inner(correct_convolutional *conv, unsigned int sets,
|
||||
const uint8_t *soft) {
|
||||
shift_register_t highbit = 1 << (conv->order - 1);
|
||||
for (unsigned int i = conv->order - 1; i < (sets - conv->order + 1); i++) {
|
||||
distance_t *distances = conv->distances;
|
||||
// lasterrors are the aggregate bit errors for the states of shiftregister for the previous
|
||||
// time slice
|
||||
if (soft) {
|
||||
if (conv->soft_measurement == CORRECT_SOFT_LINEAR) {
|
||||
for (unsigned int j = 0; j < 1 << (conv->rate); j++) {
|
||||
distances[j] =
|
||||
metric_soft_distance_linear(j, soft + i * conv->rate, conv->rate);
|
||||
}
|
||||
} else {
|
||||
for (unsigned int j = 0; j < 1 << (conv->rate); j++) {
|
||||
distances[j] =
|
||||
metric_soft_distance_quadratic(j, soft + i * conv->rate, conv->rate);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
unsigned int out = bit_reader_read(conv->bit_reader, conv->rate);
|
||||
for (unsigned int i = 0; i < 1 << (conv->rate); i++) {
|
||||
distances[i] = metric_distance(i, out);
|
||||
}
|
||||
}
|
||||
pair_lookup_t pair_lookup = conv->pair_lookup;
|
||||
pair_lookup_fill_distance(pair_lookup, distances);
|
||||
|
||||
// a mask to get the high order bit from the shift register
|
||||
unsigned int num_iter = highbit << 1;
|
||||
const distance_t *read_errors = conv->errors->read_errors;
|
||||
// aggregate bit errors for this time slice
|
||||
distance_t *write_errors = conv->errors->write_errors;
|
||||
|
||||
uint8_t *history = history_buffer_get_slice(conv->history_buffer);
|
||||
// walk through all states, ignoring oldest bit
|
||||
// we will track a best register state (path) and the number of bit errors at that path at
|
||||
// this time slice
|
||||
// this loop considers two paths per iteration (high order bit set, clear)
|
||||
// so, it only runs numstates/2 iterations
|
||||
// we'll update the history for every state and find the path with the least aggregated bit
|
||||
// errors
|
||||
|
||||
// now run the main loop
|
||||
// we calculate 2 sets of 2 register states here (4 states per iter)
|
||||
// this creates 2 sets which share a predecessor, and 2 sets which share a successor
|
||||
//
|
||||
// the first set definition is the two states that are the same except for the least order
|
||||
// bit
|
||||
// these two share a predecessor because their high n - 1 bits are the same (differ only by
|
||||
// newest bit)
|
||||
//
|
||||
// the second set definition is the two states that are the same except for the high order
|
||||
// bit
|
||||
// these two share a successor because the oldest high order bit will be shifted out, and
|
||||
// the other bits will be present in the successor
|
||||
//
|
||||
shift_register_t highbase = highbit >> 1;
|
||||
for (shift_register_t low = 0, high = highbit, base = 0; high < num_iter;
|
||||
low += 8, high += 8, base += 4) {
|
||||
// shifted-right ancestors
|
||||
// low and low_plus_one share low_past_error
|
||||
// note that they are the same when shifted right by 1
|
||||
// same goes for high and high_plus_one
|
||||
for (shift_register_t offset = 0, base_offset = 0; base_offset < 4;
|
||||
offset += 2, base_offset += 1) {
|
||||
distance_pair_key_t low_key = pair_lookup.keys[base + base_offset];
|
||||
distance_pair_key_t high_key = pair_lookup.keys[highbase + base + base_offset];
|
||||
distance_pair_t low_concat_dist = pair_lookup.distances[low_key];
|
||||
distance_pair_t high_concat_dist = pair_lookup.distances[high_key];
|
||||
|
||||
distance_t low_past_error = read_errors[base + base_offset];
|
||||
distance_t high_past_error = read_errors[highbase + base + base_offset];
|
||||
|
||||
distance_t low_error = (low_concat_dist & 0xffff) + low_past_error;
|
||||
distance_t high_error = (high_concat_dist & 0xffff) + high_past_error;
|
||||
|
||||
shift_register_t successor = low + offset;
|
||||
distance_t error;
|
||||
uint8_t history_mask;
|
||||
if (low_error <= high_error) {
|
||||
error = low_error;
|
||||
history_mask = 0;
|
||||
} else {
|
||||
error = high_error;
|
||||
history_mask = 1;
|
||||
}
|
||||
write_errors[successor] = error;
|
||||
history[successor] = history_mask;
|
||||
|
||||
shift_register_t low_plus_one = low + offset + 1;
|
||||
|
||||
distance_t low_plus_one_error = (low_concat_dist >> 16) + low_past_error;
|
||||
distance_t high_plus_one_error = (high_concat_dist >> 16) + high_past_error;
|
||||
|
||||
shift_register_t plus_one_successor = low_plus_one;
|
||||
distance_t plus_one_error;
|
||||
uint8_t plus_one_history_mask;
|
||||
if (low_plus_one_error <= high_plus_one_error) {
|
||||
plus_one_error = low_plus_one_error;
|
||||
plus_one_history_mask = 0;
|
||||
} else {
|
||||
plus_one_error = high_plus_one_error;
|
||||
plus_one_history_mask = 1;
|
||||
}
|
||||
write_errors[plus_one_successor] = plus_one_error;
|
||||
history[plus_one_successor] = plus_one_history_mask;
|
||||
}
|
||||
}
|
||||
|
||||
history_buffer_process(conv->history_buffer, write_errors, conv->bit_writer);
|
||||
error_buffer_swap(conv->errors);
|
||||
}
|
||||
}
|
||||
|
||||
void convolutional_decode_tail(correct_convolutional *conv, unsigned int sets,
|
||||
const uint8_t *soft) {
|
||||
// flush state registers
|
||||
// now we only shift in 0s, skipping 1-successors
|
||||
shift_register_t highbit = 1 << (conv->order - 1);
|
||||
for (unsigned int i = sets - conv->order + 1; i < sets; i++) {
|
||||
// lasterrors are the aggregate bit errors for the states of shiftregister for the previous
|
||||
// time slice
|
||||
const distance_t *read_errors = conv->errors->read_errors;
|
||||
// aggregate bit errors for this time slice
|
||||
distance_t *write_errors = conv->errors->write_errors;
|
||||
|
||||
uint8_t *history = history_buffer_get_slice(conv->history_buffer);
|
||||
|
||||
// calculate the distance from all output states to our sliced bits
|
||||
distance_t *distances = conv->distances;
|
||||
if (soft) {
|
||||
if (conv->soft_measurement == CORRECT_SOFT_LINEAR) {
|
||||
for (unsigned int j = 0; j < 1 << (conv->rate); j++) {
|
||||
distances[j] =
|
||||
metric_soft_distance_linear(j, soft + i * conv->rate, conv->rate);
|
||||
}
|
||||
} else {
|
||||
for (unsigned int j = 0; j < 1 << (conv->rate); j++) {
|
||||
distances[j] =
|
||||
metric_soft_distance_quadratic(j, soft + i * conv->rate, conv->rate);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
unsigned int out = bit_reader_read(conv->bit_reader, conv->rate);
|
||||
for (unsigned int i = 0; i < 1 << (conv->rate); i++) {
|
||||
distances[i] = metric_distance(i, out);
|
||||
}
|
||||
}
|
||||
const unsigned int *table = conv->table;
|
||||
|
||||
// a mask to get the high order bit from the shift register
|
||||
unsigned int num_iter = highbit << 1;
|
||||
unsigned int skip = 1 << (conv->order - (sets - i));
|
||||
unsigned int base_skip = skip >> 1;
|
||||
|
||||
shift_register_t highbase = highbit >> 1;
|
||||
for (shift_register_t low = 0, high = highbit, base = 0; high < num_iter;
|
||||
low += skip, high += skip, base += base_skip) {
|
||||
unsigned int low_output = table[low];
|
||||
unsigned int high_output = table[high];
|
||||
distance_t low_dist = distances[low_output];
|
||||
distance_t high_dist = distances[high_output];
|
||||
|
||||
distance_t low_past_error = read_errors[base];
|
||||
distance_t high_past_error = read_errors[highbase + base];
|
||||
|
||||
distance_t low_error = low_dist + low_past_error;
|
||||
distance_t high_error = high_dist + high_past_error;
|
||||
|
||||
shift_register_t successor = low;
|
||||
distance_t error;
|
||||
uint8_t history_mask;
|
||||
if (low_error < high_error) {
|
||||
error = low_error;
|
||||
history_mask = 0;
|
||||
} else {
|
||||
error = high_error;
|
||||
history_mask = 1;
|
||||
}
|
||||
write_errors[successor] = error;
|
||||
history[successor] = history_mask;
|
||||
}
|
||||
|
||||
history_buffer_process_skip(conv->history_buffer, write_errors, conv->bit_writer, skip);
|
||||
error_buffer_swap(conv->errors);
|
||||
}
|
||||
}
|
||||
|
||||
void _convolutional_decode_init(correct_convolutional *conv, unsigned int min_traceback,
|
||||
unsigned int traceback_length, unsigned int renormalize_interval) {
|
||||
conv->has_init_decode = true;
|
||||
|
||||
conv->distances = calloc(1 << (conv->rate), sizeof(distance_t));
|
||||
conv->pair_lookup = pair_lookup_create(conv->rate, conv->order, conv->table);
|
||||
|
||||
conv->soft_measurement = CORRECT_SOFT_LINEAR;
|
||||
|
||||
// we limit history to go back as far as 5 * the order of our polynomial
|
||||
conv->history_buffer = history_buffer_create(min_traceback, traceback_length, renormalize_interval,
|
||||
conv->numstates / 2, 1 << (conv->order - 1));
|
||||
|
||||
conv->errors = error_buffer_create(conv->numstates);
|
||||
}
|
||||
|
||||
static ssize_t _convolutional_decode(correct_convolutional *conv, size_t num_encoded_bits,
|
||||
size_t num_encoded_bytes, uint8_t *msg,
|
||||
const soft_t *soft_encoded) {
|
||||
if (!conv->has_init_decode) {
|
||||
uint64_t max_error_per_input = conv->rate * soft_max;
|
||||
unsigned int renormalize_interval = distance_max / max_error_per_input;
|
||||
_convolutional_decode_init(conv, 5 * conv->order, 15 * conv->order, renormalize_interval);
|
||||
}
|
||||
|
||||
size_t sets = num_encoded_bits / conv->rate;
|
||||
// XXX fix this vvvvvv
|
||||
size_t decoded_len_bytes = num_encoded_bytes;
|
||||
bit_writer_reconfigure(conv->bit_writer, msg, decoded_len_bytes);
|
||||
|
||||
error_buffer_reset(conv->errors);
|
||||
history_buffer_reset(conv->history_buffer);
|
||||
|
||||
// no outputs are generated during warmup
|
||||
convolutional_decode_warmup(conv, sets, soft_encoded);
|
||||
convolutional_decode_inner(conv, sets, soft_encoded);
|
||||
convolutional_decode_tail(conv, sets, soft_encoded);
|
||||
|
||||
history_buffer_flush(conv->history_buffer, conv->bit_writer);
|
||||
|
||||
return bit_writer_length(conv->bit_writer);
|
||||
}
|
||||
|
||||
// perform viterbi decoding
|
||||
// hard decoder
|
||||
ssize_t correct_convolutional_decode(correct_convolutional *conv, const uint8_t *encoded,
|
||||
size_t num_encoded_bits, uint8_t *msg) {
|
||||
if (num_encoded_bits % conv->rate) {
|
||||
// XXX turn this into an error code
|
||||
// printf("encoded length of message must be a multiple of rate\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t num_encoded_bytes =
|
||||
(num_encoded_bits % 8) ? (num_encoded_bits / 8 + 1) : (num_encoded_bits / 8);
|
||||
bit_reader_reconfigure(conv->bit_reader, encoded, num_encoded_bytes);
|
||||
|
||||
return _convolutional_decode(conv, num_encoded_bits, num_encoded_bytes, msg, NULL);
|
||||
}
|
||||
|
||||
ssize_t correct_convolutional_decode_soft(correct_convolutional *conv, const soft_t *encoded,
|
||||
size_t num_encoded_bits, uint8_t *msg) {
|
||||
if (num_encoded_bits % conv->rate) {
|
||||
// XXX turn this into an error code
|
||||
// printf("encoded length of message must be a multiple of rate\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t num_encoded_bytes =
|
||||
(num_encoded_bits % 8) ? (num_encoded_bits / 8 + 1) : (num_encoded_bits / 8);
|
||||
|
||||
return _convolutional_decode(conv, num_encoded_bits, num_encoded_bytes, msg, encoded);
|
||||
}
|
61
core/libcorrect/src/convolutional/encode.c
Normal file
61
core/libcorrect/src/convolutional/encode.c
Normal file
@ -0,0 +1,61 @@
|
||||
#include "correct/convolutional/convolutional.h"
|
||||
|
||||
size_t correct_convolutional_encode_len(correct_convolutional *conv, size_t msg_len) {
|
||||
size_t msgbits = 8 * msg_len;
|
||||
size_t encodedbits = conv->rate * (msgbits + conv->order + 1);
|
||||
return encodedbits;
|
||||
}
|
||||
|
||||
// shift in most significant bit every time, one byte at a time
|
||||
// shift register takes most recent bit on right, shifts left
|
||||
// poly is written in same order, just & mask message w/ poly
|
||||
|
||||
// assume that encoded length is long enough?
|
||||
size_t correct_convolutional_encode(correct_convolutional *conv,
|
||||
const uint8_t *msg,
|
||||
size_t msg_len,
|
||||
uint8_t *encoded) {
|
||||
// convolutional code convolves filter coefficients, given by
|
||||
// the polynomial, with some history from our message.
|
||||
// the history is stored as single subsequent bits in shiftregister
|
||||
shift_register_t shiftregister = 0;
|
||||
|
||||
// shiftmask is the shiftregister bit mask that removes bits
|
||||
// that extend beyond order
|
||||
// e.g. if order is 7, then remove the 8th bit and beyond
|
||||
unsigned int shiftmask = (1 << conv->order) - 1;
|
||||
|
||||
size_t encoded_len_bits = correct_convolutional_encode_len(conv, msg_len);
|
||||
size_t encoded_len = (encoded_len_bits % 8) ? (encoded_len_bits / 8 + 1) : (encoded_len_bits / 8);
|
||||
bit_writer_reconfigure(conv->bit_writer, encoded, encoded_len);
|
||||
|
||||
bit_reader_reconfigure(conv->bit_reader, msg, msg_len);
|
||||
|
||||
for (size_t i = 0; i < 8 * msg_len; i++) {
|
||||
// shiftregister has oldest bits on left, newest on right
|
||||
shiftregister <<= 1;
|
||||
shiftregister |= bit_reader_read(conv->bit_reader, 1);
|
||||
shiftregister &= shiftmask;
|
||||
// shift most significant bit from byte and move down one bit at a time
|
||||
|
||||
// we do direct lookup of our convolutional output here
|
||||
// all of the bits from this convolution are stored in this row
|
||||
unsigned int out = conv->table[shiftregister];
|
||||
bit_writer_write(conv->bit_writer, out, conv->rate);
|
||||
}
|
||||
|
||||
// now flush the shiftregister
|
||||
// this is simply running the loop as above but without any new inputs
|
||||
// or rather, the new input string is all 0s
|
||||
for (size_t i = 0; i < conv->order + 1; i++) {
|
||||
shiftregister <<= 1;
|
||||
shiftregister &= shiftmask;
|
||||
unsigned int out = conv->table[shiftregister];
|
||||
bit_writer_write(conv->bit_writer, out, conv->rate);
|
||||
}
|
||||
|
||||
// 0-fill any remaining bits on our final byte
|
||||
bit_writer_flush_byte(conv->bit_writer);
|
||||
|
||||
return encoded_len_bits;
|
||||
}
|
43
core/libcorrect/src/convolutional/error_buffer.c
Normal file
43
core/libcorrect/src/convolutional/error_buffer.c
Normal file
@ -0,0 +1,43 @@
|
||||
#include "correct/convolutional/error_buffer.h"
|
||||
|
||||
error_buffer_t *error_buffer_create(unsigned int num_states) {
|
||||
error_buffer_t *buf = calloc(1, sizeof(error_buffer_t));
|
||||
|
||||
// how large are the error buffers?
|
||||
buf->num_states = num_states;
|
||||
|
||||
// save two error metrics, one for last round and one for this
|
||||
// (double buffer)
|
||||
// the error metric is the aggregated number of bit errors found
|
||||
// at a given path which terminates at a particular shift register state
|
||||
buf->errors[0] = calloc(sizeof(distance_t), num_states);
|
||||
buf->errors[1] = calloc(sizeof(distance_t), num_states);
|
||||
|
||||
// which buffer are we using, 0 or 1?
|
||||
buf->index = 0;
|
||||
|
||||
buf->read_errors = buf->errors[0];
|
||||
buf->write_errors = buf->errors[1];
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
void error_buffer_destroy(error_buffer_t *buf) {
|
||||
free(buf->errors[0]);
|
||||
free(buf->errors[1]);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
void error_buffer_reset(error_buffer_t *buf) {
|
||||
memset(buf->errors[0], 0, buf->num_states * sizeof(distance_t));
|
||||
memset(buf->errors[1], 0, buf->num_states * sizeof(distance_t));
|
||||
buf->index = 0;
|
||||
buf->read_errors = buf->errors[0];
|
||||
buf->write_errors = buf->errors[1];
|
||||
}
|
||||
|
||||
void error_buffer_swap(error_buffer_t *buf) {
|
||||
buf->read_errors = buf->errors[buf->index];
|
||||
buf->index = (buf->index + 1) % 2;
|
||||
buf->write_errors = buf->errors[buf->index];
|
||||
}
|
158
core/libcorrect/src/convolutional/history_buffer.c
Normal file
158
core/libcorrect/src/convolutional/history_buffer.c
Normal file
@ -0,0 +1,158 @@
|
||||
#include "correct/convolutional/history_buffer.h"
|
||||
|
||||
history_buffer *history_buffer_create(unsigned int min_traceback_length,
|
||||
unsigned int traceback_group_length,
|
||||
unsigned int renormalize_interval, unsigned int num_states,
|
||||
shift_register_t highbit) {
|
||||
history_buffer *buf = calloc(1, sizeof(history_buffer));
|
||||
|
||||
*(unsigned int *)&buf->min_traceback_length = min_traceback_length;
|
||||
*(unsigned int *)&buf->traceback_group_length = traceback_group_length;
|
||||
*(unsigned int *)&buf->cap = min_traceback_length + traceback_group_length;
|
||||
*(unsigned int *)&buf->num_states = num_states;
|
||||
*(shift_register_t *)&buf->highbit = highbit;
|
||||
|
||||
buf->history = malloc(buf->cap * sizeof(uint8_t *));
|
||||
for (unsigned int i = 0; i < buf->cap; i++) {
|
||||
buf->history[i] = calloc(num_states, sizeof(uint8_t));
|
||||
}
|
||||
buf->fetched = malloc(buf->cap * sizeof(uint8_t));
|
||||
|
||||
buf->index = 0;
|
||||
buf->len = 0;
|
||||
|
||||
buf->renormalize_counter = 0;
|
||||
buf->renormalize_interval = renormalize_interval;
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
void history_buffer_destroy(history_buffer *buf) {
|
||||
for (unsigned int i = 0; i < buf->cap; i++) {
|
||||
free(buf->history[i]);
|
||||
}
|
||||
free(buf->history);
|
||||
free(buf->fetched);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
void history_buffer_reset(history_buffer *buf) {
|
||||
buf->len = 0;
|
||||
buf->index = 0;
|
||||
}
|
||||
|
||||
uint8_t *history_buffer_get_slice(history_buffer *buf) { return buf->history[buf->index]; }
|
||||
|
||||
shift_register_t history_buffer_search(history_buffer *buf, const distance_t *distances,
|
||||
unsigned int search_every) {
|
||||
shift_register_t bestpath;
|
||||
distance_t leasterror = USHRT_MAX;
|
||||
// search for a state with the least error
|
||||
for (shift_register_t state = 0; state < buf->num_states; state += search_every) {
|
||||
if (distances[state] < leasterror) {
|
||||
leasterror = distances[state];
|
||||
bestpath = state;
|
||||
}
|
||||
}
|
||||
return bestpath;
|
||||
}
|
||||
|
||||
void history_buffer_renormalize(history_buffer *buf, distance_t *distances,
|
||||
shift_register_t min_register) {
|
||||
distance_t min_distance = distances[min_register];
|
||||
for (shift_register_t i = 0; i < buf->num_states; i++) {
|
||||
distances[i] -= min_distance;
|
||||
}
|
||||
}
|
||||
|
||||
void history_buffer_traceback(history_buffer *buf, shift_register_t bestpath,
|
||||
unsigned int min_traceback_length, bit_writer_t *output) {
|
||||
unsigned int fetched_index = 0;
|
||||
shift_register_t highbit = buf->highbit;
|
||||
unsigned int index = buf->index;
|
||||
unsigned int cap = buf->cap;
|
||||
for (unsigned int j = 0; j < min_traceback_length; j++) {
|
||||
if (index == 0) {
|
||||
index = cap - 1;
|
||||
} else {
|
||||
index--;
|
||||
}
|
||||
// we're walking backwards from what the work we did before
|
||||
// so, we'll shift high order bits in
|
||||
// the path will cross multiple different shift register states, and we determine
|
||||
// which state by going backwards one time slice at a time
|
||||
uint8_t history = buf->history[index][bestpath];
|
||||
shift_register_t pathbit = history ? highbit : 0;
|
||||
bestpath |= pathbit;
|
||||
bestpath >>= 1;
|
||||
}
|
||||
unsigned int prefetch_index = index;
|
||||
if (prefetch_index == 0) {
|
||||
prefetch_index = cap - 1;
|
||||
} else {
|
||||
prefetch_index--;
|
||||
}
|
||||
unsigned int len = buf->len;
|
||||
for (unsigned int j = min_traceback_length; j < len; j++) {
|
||||
index = prefetch_index;
|
||||
if (prefetch_index == 0) {
|
||||
prefetch_index = cap - 1;
|
||||
} else {
|
||||
prefetch_index--;
|
||||
}
|
||||
prefetch(buf->history[prefetch_index]);
|
||||
// we're walking backwards from what the work we did before
|
||||
// so, we'll shift high order bits in
|
||||
// the path will cross multiple different shift register states, and we determine
|
||||
// which state by going backwards one time slice at a time
|
||||
uint8_t history = buf->history[index][bestpath];
|
||||
shift_register_t pathbit = history ? highbit : 0;
|
||||
bestpath |= pathbit;
|
||||
bestpath >>= 1;
|
||||
buf->fetched[fetched_index] = (pathbit ? 1 : 0);
|
||||
fetched_index++;
|
||||
}
|
||||
bit_writer_write_bitlist_reversed(output, buf->fetched, fetched_index);
|
||||
buf->len -= fetched_index;
|
||||
}
|
||||
|
||||
void history_buffer_process_skip(history_buffer *buf, distance_t *distances, bit_writer_t *output,
|
||||
unsigned int skip) {
|
||||
buf->index++;
|
||||
if (buf->index == buf->cap) {
|
||||
buf->index = 0;
|
||||
}
|
||||
|
||||
buf->renormalize_counter++;
|
||||
buf->len++;
|
||||
|
||||
// there are four ways these branches can resolve
|
||||
// a) we are neither renormalizing nor doing a traceback
|
||||
// b) we are renormalizing but not doing a traceback
|
||||
// c) we are renormalizing and doing a traceback
|
||||
// d) we are not renormalizing but we are doing a traceback
|
||||
// in case c, we want to save the effort of finding the bestpath
|
||||
// since that's expensive
|
||||
// so we have to check for that case after we renormalize
|
||||
if (buf->renormalize_counter == buf->renormalize_interval) {
|
||||
buf->renormalize_counter = 0;
|
||||
shift_register_t bestpath = history_buffer_search(buf, distances, skip);
|
||||
history_buffer_renormalize(buf, distances, bestpath);
|
||||
if (buf->len == buf->cap) {
|
||||
// reuse the bestpath found for renormalizing
|
||||
history_buffer_traceback(buf, bestpath, buf->min_traceback_length, output);
|
||||
}
|
||||
} else if (buf->len == buf->cap) {
|
||||
// not renormalizing, find the bestpath here
|
||||
shift_register_t bestpath = history_buffer_search(buf, distances, skip);
|
||||
history_buffer_traceback(buf, bestpath, buf->min_traceback_length, output);
|
||||
}
|
||||
}
|
||||
|
||||
void history_buffer_process(history_buffer *buf, distance_t *distances, bit_writer_t *output) {
|
||||
history_buffer_process_skip(buf, distances, output, 1);
|
||||
}
|
||||
|
||||
void history_buffer_flush(history_buffer *buf, bit_writer_t *output) {
|
||||
history_buffer_traceback(buf, 0, 0, output);
|
||||
}
|
74
core/libcorrect/src/convolutional/lookup.c
Normal file
74
core/libcorrect/src/convolutional/lookup.c
Normal file
@ -0,0 +1,74 @@
|
||||
#include "correct/convolutional/lookup.h"
|
||||
|
||||
// table has numstates rows
|
||||
// each row contains all of the polynomial output bits concatenated together
|
||||
// e.g. for rate 2, we have 2 bits in each row
|
||||
// the first poly gets the LEAST significant bit, last poly gets most significant
|
||||
void fill_table(unsigned int rate,
|
||||
unsigned int order,
|
||||
const polynomial_t *poly,
|
||||
unsigned int *table) {
|
||||
for (shift_register_t i = 0; i < 1 << order; i++) {
|
||||
unsigned int out = 0;
|
||||
unsigned int mask = 1;
|
||||
for (size_t j = 0; j < rate; j++) {
|
||||
out |= (popcount(i & poly[j]) % 2) ? mask : 0;
|
||||
mask <<= 1;
|
||||
}
|
||||
table[i] = out;
|
||||
}
|
||||
}
|
||||
|
||||
pair_lookup_t pair_lookup_create(unsigned int rate,
|
||||
unsigned int order,
|
||||
const unsigned int *table) {
|
||||
pair_lookup_t pairs;
|
||||
|
||||
pairs.keys = malloc(sizeof(unsigned int) * (1 << (order - 1)));
|
||||
pairs.outputs = calloc((1 << (rate * 2)), sizeof(unsigned int));
|
||||
unsigned int *inv_outputs = calloc((1 << (rate * 2)), sizeof(unsigned int));
|
||||
unsigned int output_counter = 1;
|
||||
// for every (even-numbered) shift register state, find the concatenated output of the state
|
||||
// and the subsequent state that follows it (low bit set). then, check to see if this
|
||||
// concatenated output has a unique key assigned to it already. if not, give it a key.
|
||||
// if it does, retrieve the key. assign this key to the shift register state.
|
||||
for (unsigned int i = 0; i < (1 << (order - 1)); i++) {
|
||||
// first get the concatenated pair of outputs
|
||||
unsigned int out = table[i * 2 + 1];
|
||||
out <<= rate;
|
||||
out |= table[i * 2];
|
||||
|
||||
// does this concatenated output exist in the outputs table yet?
|
||||
if (!inv_outputs[out]) {
|
||||
// doesn't exist, allocate a new key
|
||||
inv_outputs[out] = output_counter;
|
||||
pairs.outputs[output_counter] = out;
|
||||
output_counter++;
|
||||
}
|
||||
// set the opaque key for the ith shift register state to the concatenated output entry
|
||||
pairs.keys[i] = inv_outputs[out];
|
||||
}
|
||||
pairs.outputs_len = output_counter;
|
||||
pairs.output_mask = (1 << (rate)) - 1;
|
||||
pairs.output_width = rate;
|
||||
pairs.distances = calloc(pairs.outputs_len, sizeof(distance_pair_t));
|
||||
free(inv_outputs);
|
||||
return pairs;
|
||||
}
|
||||
|
||||
void pair_lookup_destroy(pair_lookup_t pairs) {
|
||||
free(pairs.keys);
|
||||
free(pairs.outputs);
|
||||
free(pairs.distances);
|
||||
}
|
||||
|
||||
void pair_lookup_fill_distance(pair_lookup_t pairs, distance_t *distances) {
|
||||
for (unsigned int i = 1; i < pairs.outputs_len; i += 1) {
|
||||
output_pair_t concat_out = pairs.outputs[i];
|
||||
unsigned int i_0 = concat_out & pairs.output_mask;
|
||||
concat_out >>= pairs.output_width;
|
||||
unsigned int i_1 = concat_out;
|
||||
|
||||
pairs.distances[i] = (distances[i_1] << 16) | distances[i_0];
|
||||
}
|
||||
}
|
17
core/libcorrect/src/convolutional/metric.c
Normal file
17
core/libcorrect/src/convolutional/metric.c
Normal file
@ -0,0 +1,17 @@
|
||||
#include "correct/convolutional/metric.h"
|
||||
|
||||
// measure the square of the euclidean distance between x and y
|
||||
// since euclidean dist is sqrt(a^2 + b^2 + ... + n^2), the square is just
|
||||
// a^2 + b^2 + ... + n^2
|
||||
distance_t metric_soft_distance_quadratic(unsigned int hard_x, const uint8_t *soft_y, size_t len) {
|
||||
distance_t dist = 0;
|
||||
for (unsigned int i = 0; i < len; i++) {
|
||||
// first, convert hard_x to a soft measurement (0 -> 0, 1 - > 255)
|
||||
unsigned int soft_x = (hard_x & 1) ? 255 : 0;
|
||||
hard_x >>= 1;
|
||||
int d = soft_y[i] - soft_x;
|
||||
dist += d*d;
|
||||
}
|
||||
return dist >> 3;
|
||||
}
|
||||
|
2
core/libcorrect/src/convolutional/sse/CMakeLists.txt
Normal file
2
core/libcorrect/src/convolutional/sse/CMakeLists.txt
Normal file
@ -0,0 +1,2 @@
|
||||
set(SRCFILES lookup.c convolutional.c encode.c decode.c)
|
||||
add_library(correct-convolutional-sse OBJECT ${SRCFILES})
|
21
core/libcorrect/src/convolutional/sse/convolutional.c
Normal file
21
core/libcorrect/src/convolutional/sse/convolutional.c
Normal file
@ -0,0 +1,21 @@
|
||||
#include "correct/convolutional/sse/convolutional.h"
|
||||
|
||||
correct_convolutional_sse *correct_convolutional_sse_create(size_t rate,
|
||||
size_t order,
|
||||
const polynomial_t *poly) {
|
||||
correct_convolutional_sse *conv = malloc(sizeof(correct_convolutional_sse));
|
||||
correct_convolutional *init_conv = _correct_convolutional_init(&conv->base_conv, rate, order, poly);
|
||||
if (!init_conv) {
|
||||
free(conv);
|
||||
conv = NULL;
|
||||
}
|
||||
return conv;
|
||||
}
|
||||
|
||||
void correct_convolutional_sse_destroy(correct_convolutional_sse *conv) {
|
||||
if (conv->base_conv.has_init_decode) {
|
||||
oct_lookup_destroy(conv->oct_lookup);
|
||||
}
|
||||
_correct_convolutional_teardown(&conv->base_conv);
|
||||
free(conv);
|
||||
}
|
319
core/libcorrect/src/convolutional/sse/decode.c
Normal file
319
core/libcorrect/src/convolutional/sse/decode.c
Normal file
@ -0,0 +1,319 @@
|
||||
#include "correct/convolutional/sse/convolutional.h"
|
||||
|
||||
static void convolutional_sse_decode_inner(correct_convolutional_sse *sse_conv, unsigned int sets,
|
||||
const uint8_t *soft) {
|
||||
correct_convolutional *conv = &sse_conv->base_conv;
|
||||
shift_register_t highbit = 1 << (conv->order - 1);
|
||||
unsigned int hist_buf_index = conv->history_buffer->index;
|
||||
unsigned int hist_buf_cap = conv->history_buffer->cap;
|
||||
unsigned int hist_buf_len = conv->history_buffer->len;
|
||||
unsigned int hist_buf_rn_int = conv->history_buffer->renormalize_interval;
|
||||
unsigned int hist_buf_rn_cnt = conv->history_buffer->renormalize_counter;
|
||||
for (unsigned int i = conv->order - 1; i < (sets - conv->order + 1); i++) {
|
||||
distance_t *distances = conv->distances;
|
||||
// lasterrors are the aggregate bit errors for the states of
|
||||
// shiftregister for the previous time slice
|
||||
if (soft) {
|
||||
if (conv->soft_measurement == CORRECT_SOFT_LINEAR) {
|
||||
for (unsigned int j = 0; j < 1 << (conv->rate); j++) {
|
||||
distances[j] =
|
||||
metric_soft_distance_linear(j, soft + i * conv->rate, conv->rate);
|
||||
}
|
||||
} else {
|
||||
for (unsigned int j = 0; j < 1 << (conv->rate); j++) {
|
||||
distances[j] =
|
||||
metric_soft_distance_quadratic(j, soft + i * conv->rate, conv->rate);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
unsigned int out = bit_reader_read(conv->bit_reader, conv->rate);
|
||||
for (unsigned int i = 0; i < 1 << (conv->rate); i++) {
|
||||
distances[i] = metric_distance(i, out);
|
||||
}
|
||||
}
|
||||
oct_lookup_t oct_lookup = sse_conv->oct_lookup;
|
||||
oct_lookup_fill_distance(oct_lookup, distances);
|
||||
|
||||
// a mask to get the high order bit from the shift register
|
||||
unsigned int num_iter = highbit << 1;
|
||||
const distance_t *read_errors = conv->errors->read_errors;
|
||||
// aggregate bit errors for this time slice
|
||||
distance_t *write_errors = conv->errors->write_errors;
|
||||
|
||||
uint8_t *history = conv->history_buffer->history[hist_buf_index];
|
||||
;
|
||||
// walk through all states, ignoring oldest bit
|
||||
// we will track a best register state (path) and the number of bit
|
||||
// errors at that path at this time slice
|
||||
// this loop considers two paths per iteration (high order bit set,
|
||||
// clear)
|
||||
// so, it only runs numstates/2 iterations
|
||||
// we'll update the history for every state and find the path with the
|
||||
// least aggregated bit errors
|
||||
|
||||
// now run the main loop
|
||||
// we calculate 2 sets of 2 register states here (4 states per iter)
|
||||
// this creates 2 sets which share a predecessor, and 2 sets which share
|
||||
// a successor
|
||||
//
|
||||
// the first set definition is the two states that are the same except
|
||||
// for the least order bit
|
||||
// these two share a predecessor because their high n - 1 bits are the
|
||||
// same (differ only by newest bit)
|
||||
//
|
||||
// the second set definition is the two states that are the same except
|
||||
// for the high order bit
|
||||
// these two share a successor because the oldest high order bit will be
|
||||
// shifted out, and the other bits will be present in the successor
|
||||
//
|
||||
shift_register_t highbase = highbit >> 1;
|
||||
shift_register_t oct_highbase = highbase >> 2;
|
||||
for (shift_register_t low = 0, high = highbit, base = 0, oct = 0; high < num_iter;
|
||||
low += 32, high += 32, base += 16, oct += 4) {
|
||||
// shifted-right ancestors
|
||||
// low and low_plus_one share low_past_error
|
||||
// note that they are the same when shifted right by 1
|
||||
// same goes for high and high_plus_one
|
||||
__m128i past_shuffle_mask =
|
||||
_mm_set_epi32(0x07060706, 0x05040504, 0x03020302, 0x01000100);
|
||||
__m128i hist_mask =
|
||||
_mm_set_epi32(0x80808080, 0x80808080, 0x0e0c0a09, 0x07050301);
|
||||
|
||||
// the loop below calculates 64 register states per loop iteration
|
||||
// it does this by packing the 128-bit xmm registers with 8, 16-bit
|
||||
// distances
|
||||
// 4 of these registers hold distances for convolutional shift
|
||||
// register states with the high bit cleared
|
||||
// and 4 hold distances for the corresponding shift register
|
||||
// states with the high bit set
|
||||
// since each xmm register holds 8 distances, this adds up to a
|
||||
// total of 8 * 8 = 64 shift register states
|
||||
for (shift_register_t offset = 0, base_offset = 0; base_offset < 16;
|
||||
offset += 32, base_offset += 16) {
|
||||
// load the past error for the register states with the high
|
||||
// order bit cleared
|
||||
__m128i low_past_error =
|
||||
_mm_loadl_epi64((const __m128i *)(read_errors + base + base_offset));
|
||||
__m128i low_past_error0 =
|
||||
_mm_loadl_epi64((const __m128i *)(read_errors + base + base_offset + 4));
|
||||
__m128i low_past_error1 =
|
||||
_mm_loadl_epi64((const __m128i *)(read_errors + base + base_offset + 8));
|
||||
__m128i low_past_error2 =
|
||||
_mm_loadl_epi64((const __m128i *)(read_errors + base + base_offset + 12));
|
||||
|
||||
// shuffle the low past error
|
||||
// register states that differ only by their low order bit share
|
||||
// a past error
|
||||
low_past_error = _mm_shuffle_epi8(low_past_error, past_shuffle_mask);
|
||||
low_past_error0 = _mm_shuffle_epi8(low_past_error0, past_shuffle_mask);
|
||||
low_past_error1 = _mm_shuffle_epi8(low_past_error1, past_shuffle_mask);
|
||||
low_past_error2 = _mm_shuffle_epi8(low_past_error2, past_shuffle_mask);
|
||||
|
||||
// repeat past error lookup for register states with high order
|
||||
// bit set
|
||||
__m128i high_past_error =
|
||||
_mm_loadl_epi64((const __m128i *)(read_errors + highbase + base + base_offset));
|
||||
__m128i high_past_error0 = _mm_loadl_epi64(
|
||||
(const __m128i *)(read_errors + highbase + base + base_offset + 4));
|
||||
__m128i high_past_error1 = _mm_loadl_epi64(
|
||||
(const __m128i *)(read_errors + highbase + base + base_offset + 8));
|
||||
__m128i high_past_error2 = _mm_loadl_epi64(
|
||||
(const __m128i *)(read_errors + highbase + base + base_offset + 12));
|
||||
|
||||
high_past_error = _mm_shuffle_epi8(high_past_error, past_shuffle_mask);
|
||||
high_past_error0 = _mm_shuffle_epi8(high_past_error0, past_shuffle_mask);
|
||||
high_past_error1 = _mm_shuffle_epi8(high_past_error1, past_shuffle_mask);
|
||||
high_past_error2 = _mm_shuffle_epi8(high_past_error2, past_shuffle_mask);
|
||||
|
||||
// __m128i this_shuffle_mask = (__m128i){0x80800100, 0x80800302,
|
||||
// 0x80800504, 0x80800706};
|
||||
|
||||
// load the opaque oct distance table keys from out loop index
|
||||
distance_oct_key_t low_key = oct_lookup.keys[oct + (base_offset / 4)];
|
||||
distance_oct_key_t low_key0 = oct_lookup.keys[oct + (base_offset / 4) + 1];
|
||||
distance_oct_key_t low_key1 = oct_lookup.keys[oct + (base_offset / 4) + 2];
|
||||
distance_oct_key_t low_key2 = oct_lookup.keys[oct + (base_offset / 4) + 3];
|
||||
|
||||
// load the distances for the register states with high order
|
||||
// bit cleared
|
||||
__m128i low_this_error =
|
||||
_mm_load_si128((const __m128i *)(oct_lookup.distances + low_key));
|
||||
__m128i low_this_error0 =
|
||||
_mm_load_si128((const __m128i *)(oct_lookup.distances + low_key0));
|
||||
__m128i low_this_error1 =
|
||||
_mm_load_si128((const __m128i *)(oct_lookup.distances + low_key1));
|
||||
__m128i low_this_error2 =
|
||||
_mm_load_si128((const __m128i *)(oct_lookup.distances + low_key2));
|
||||
|
||||
// add the distance for this time slice to the past distances
|
||||
__m128i low_error = _mm_add_epi16(low_past_error, low_this_error);
|
||||
__m128i low_error0 = _mm_add_epi16(low_past_error0, low_this_error0);
|
||||
__m128i low_error1 = _mm_add_epi16(low_past_error1, low_this_error1);
|
||||
__m128i low_error2 = _mm_add_epi16(low_past_error2, low_this_error2);
|
||||
|
||||
// repeat oct distance table lookup for registers with high
|
||||
// order bit set
|
||||
distance_oct_key_t high_key =
|
||||
oct_lookup.keys[oct_highbase + oct + (base_offset / 4)];
|
||||
distance_oct_key_t high_key0 =
|
||||
oct_lookup.keys[oct_highbase + oct + (base_offset / 4) + 1];
|
||||
distance_oct_key_t high_key1 =
|
||||
oct_lookup.keys[oct_highbase + oct + (base_offset / 4) + 2];
|
||||
distance_oct_key_t high_key2 =
|
||||
oct_lookup.keys[oct_highbase + oct + (base_offset / 4) + 3];
|
||||
|
||||
__m128i high_this_error =
|
||||
_mm_load_si128((const __m128i *)(oct_lookup.distances + high_key));
|
||||
__m128i high_this_error0 =
|
||||
_mm_load_si128((const __m128i *)(oct_lookup.distances + high_key0));
|
||||
__m128i high_this_error1 =
|
||||
_mm_load_si128((const __m128i *)(oct_lookup.distances + high_key1));
|
||||
__m128i high_this_error2 =
|
||||
_mm_load_si128((const __m128i *)(oct_lookup.distances + high_key2));
|
||||
|
||||
__m128i high_error = _mm_add_epi16(high_past_error, high_this_error);
|
||||
__m128i high_error0 = _mm_add_epi16(high_past_error0, high_this_error0);
|
||||
__m128i high_error1 = _mm_add_epi16(high_past_error1, high_this_error1);
|
||||
__m128i high_error2 = _mm_add_epi16(high_past_error2, high_this_error2);
|
||||
|
||||
// distances for this time slice calculated
|
||||
|
||||
// find the least error between registers who differ only in
|
||||
// their high order bit
|
||||
__m128i min_error = _mm_min_epu16(low_error, high_error);
|
||||
__m128i min_error0 = _mm_min_epu16(low_error0, high_error0);
|
||||
__m128i min_error1 = _mm_min_epu16(low_error1, high_error1);
|
||||
__m128i min_error2 = _mm_min_epu16(low_error2, high_error2);
|
||||
|
||||
_mm_store_si128((__m128i *)(write_errors + low + offset), min_error);
|
||||
_mm_store_si128((__m128i *)(write_errors + low + offset + 8), min_error0);
|
||||
_mm_store_si128((__m128i *)(write_errors + low + offset + 16), min_error1);
|
||||
_mm_store_si128((__m128i *)(write_errors + low + offset + 24), min_error2);
|
||||
|
||||
// generate history bits as (low_error > least_error)
|
||||
// this operation fills each element with all 1s if true and 0s
|
||||
// if false
|
||||
// in other words, we set the history bit to 1 if
|
||||
// the register state with high order bit set was the least
|
||||
// error
|
||||
__m128i hist = _mm_cmpgt_epi16(low_error, min_error);
|
||||
// pack the bits down from 16-bit wide to 8-bit wide to
|
||||
// accomodate history table
|
||||
hist = _mm_shuffle_epi8(hist, hist_mask);
|
||||
|
||||
__m128i hist0 = _mm_cmpgt_epi16(low_error0, min_error0);
|
||||
hist0 = _mm_shuffle_epi8(hist0, hist_mask);
|
||||
|
||||
__m128i hist1 = _mm_cmpgt_epi16(low_error1, min_error1);
|
||||
hist1 = _mm_shuffle_epi8(hist1, hist_mask);
|
||||
|
||||
__m128i hist2 = _mm_cmpgt_epi16(low_error2, min_error2);
|
||||
hist2 = _mm_shuffle_epi8(hist2, hist_mask);
|
||||
|
||||
// write the least error so that the next time slice sees it as
|
||||
// the past error
|
||||
// store the history bits set by cmp and shuffle operations
|
||||
_mm_storel_epi64((__m128i *)(history + low + offset), hist);
|
||||
_mm_storel_epi64((__m128i *)(history + low + offset + 8), hist0);
|
||||
_mm_storel_epi64((__m128i *)(history + low + offset + 16), hist1);
|
||||
_mm_storel_epi64((__m128i *)(history + low + offset + 24), hist2);
|
||||
}
|
||||
}
|
||||
|
||||
// bypass the call to history buffer
|
||||
// we should really make that function inline and remove this below
|
||||
if (hist_buf_len == hist_buf_cap - 1 || hist_buf_rn_cnt == hist_buf_rn_int - 1) {
|
||||
// restore hist buffer state and invoke it
|
||||
conv->history_buffer->len = hist_buf_len;
|
||||
conv->history_buffer->index = hist_buf_index;
|
||||
conv->history_buffer->renormalize_counter = hist_buf_rn_cnt;
|
||||
history_buffer_process(conv->history_buffer, write_errors, conv->bit_writer);
|
||||
// restore our local values
|
||||
hist_buf_len = conv->history_buffer->len;
|
||||
hist_buf_index = conv->history_buffer->index;
|
||||
hist_buf_cap = conv->history_buffer->cap;
|
||||
hist_buf_rn_cnt = conv->history_buffer->renormalize_counter;
|
||||
} else {
|
||||
hist_buf_len++;
|
||||
hist_buf_index++;
|
||||
if (hist_buf_index == hist_buf_cap) {
|
||||
hist_buf_index = 0;
|
||||
}
|
||||
hist_buf_rn_cnt++;
|
||||
}
|
||||
error_buffer_swap(conv->errors);
|
||||
}
|
||||
conv->history_buffer->len = hist_buf_len;
|
||||
conv->history_buffer->index = hist_buf_index;
|
||||
conv->history_buffer->renormalize_counter = hist_buf_rn_cnt;
|
||||
}
|
||||
|
||||
static void _convolutional_sse_decode_init(correct_convolutional_sse *conv,
|
||||
unsigned int min_traceback,
|
||||
unsigned int traceback_length,
|
||||
unsigned int renormalize_interval) {
|
||||
_convolutional_decode_init(&conv->base_conv, min_traceback, traceback_length,
|
||||
renormalize_interval);
|
||||
conv->oct_lookup =
|
||||
oct_lookup_create(conv->base_conv.rate, conv->base_conv.order, conv->base_conv.table);
|
||||
}
|
||||
|
||||
static ssize_t _convolutional_sse_decode(correct_convolutional_sse *sse_conv,
|
||||
size_t num_encoded_bits, size_t num_encoded_bytes,
|
||||
uint8_t *msg, const soft_t *soft_encoded) {
|
||||
correct_convolutional *conv = &sse_conv->base_conv;
|
||||
if (!conv->has_init_decode) {
|
||||
uint64_t max_error_per_input = conv->rate * soft_max;
|
||||
// sse implementation unfortunately uses signed math on our unsigned values
|
||||
// reduces usable distance by /2
|
||||
unsigned int renormalize_interval = (distance_max / 2) / max_error_per_input;
|
||||
_convolutional_sse_decode_init(sse_conv, 5 * conv->order, 100 * conv->order,
|
||||
renormalize_interval);
|
||||
}
|
||||
|
||||
size_t sets = num_encoded_bits / conv->rate;
|
||||
// XXX fix this vvvvvv
|
||||
size_t decoded_len_bytes = num_encoded_bytes;
|
||||
bit_writer_reconfigure(conv->bit_writer, msg, decoded_len_bytes);
|
||||
|
||||
error_buffer_reset(conv->errors);
|
||||
history_buffer_reset(conv->history_buffer);
|
||||
|
||||
// no outputs are generated during warmup
|
||||
convolutional_decode_warmup(conv, sets, soft_encoded);
|
||||
convolutional_sse_decode_inner(sse_conv, sets, soft_encoded);
|
||||
convolutional_decode_tail(conv, sets, soft_encoded);
|
||||
|
||||
history_buffer_flush(conv->history_buffer, conv->bit_writer);
|
||||
|
||||
return bit_writer_length(conv->bit_writer);
|
||||
}
|
||||
|
||||
ssize_t correct_convolutional_sse_decode(correct_convolutional_sse *conv, const uint8_t *encoded,
|
||||
size_t num_encoded_bits, uint8_t *msg) {
|
||||
if (num_encoded_bits % conv->base_conv.rate) {
|
||||
// XXX turn this into an error code
|
||||
// printf("encoded length of message must be a multiple of rate\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t num_encoded_bytes =
|
||||
(num_encoded_bits % 8) ? (num_encoded_bits / 8 + 1) : (num_encoded_bits / 8);
|
||||
bit_reader_reconfigure(conv->base_conv.bit_reader, encoded, num_encoded_bytes);
|
||||
|
||||
return _convolutional_sse_decode(conv, num_encoded_bits, num_encoded_bytes, msg, NULL);
|
||||
}
|
||||
|
||||
ssize_t correct_convolutional_sse_decode_soft(correct_convolutional_sse *conv, const soft_t *encoded,
|
||||
size_t num_encoded_bits, uint8_t *msg) {
|
||||
if (num_encoded_bits % conv->base_conv.rate) {
|
||||
// XXX turn this into an error code
|
||||
// printf("encoded length of message must be a multiple of rate\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t num_encoded_bytes =
|
||||
(num_encoded_bits % 8) ? (num_encoded_bits / 8 + 1) : (num_encoded_bits / 8);
|
||||
|
||||
return _convolutional_sse_decode(conv, num_encoded_bits, num_encoded_bytes, msg, encoded);
|
||||
}
|
9
core/libcorrect/src/convolutional/sse/encode.c
Normal file
9
core/libcorrect/src/convolutional/sse/encode.c
Normal file
@ -0,0 +1,9 @@
|
||||
#include "correct/convolutional/sse/convolutional.h"
|
||||
|
||||
size_t correct_convolutional_sse_encode_len(correct_convolutional_sse *conv, size_t msg_len) {
|
||||
return correct_convolutional_encode_len(&conv->base_conv, msg_len);
|
||||
}
|
||||
|
||||
size_t correct_convolutional_sse_encode(correct_convolutional_sse *conv, const uint8_t *msg, size_t msg_len, uint8_t *encoded) {
|
||||
return correct_convolutional_encode(&conv->base_conv, msg, msg_len, encoded);
|
||||
}
|
183
core/libcorrect/src/convolutional/sse/lookup.c
Normal file
183
core/libcorrect/src/convolutional/sse/lookup.c
Normal file
@ -0,0 +1,183 @@
|
||||
#include "correct/convolutional/sse/lookup.h"
|
||||
|
||||
quad_lookup_t quad_lookup_create(unsigned int rate,
|
||||
unsigned int order,
|
||||
const unsigned int *table) {
|
||||
quad_lookup_t quads;
|
||||
|
||||
quads.keys = malloc(sizeof(unsigned int) * (1 << (order - 2)));
|
||||
quads.outputs = calloc((1 << (rate * 4)), sizeof(unsigned int));
|
||||
unsigned int *inv_outputs = calloc((1 << (rate * 4)), sizeof(unsigned int));
|
||||
unsigned int output_counter = 1;
|
||||
// for every (even-numbered) shift register state, find the concatenated output of the state
|
||||
// and the subsequent state that follows it (low bit set). then, check to see if this
|
||||
// concatenated output has a unique key assigned to it already. if not, give it a key.
|
||||
// if it does, retrieve the key. assign this key to the shift register state.
|
||||
for (unsigned int i = 0; i < (1 << (order - 2)); i++) {
|
||||
// first get the concatenated quad of outputs
|
||||
unsigned int out = table[i * 4 + 3];
|
||||
out <<= rate;
|
||||
out |= table[i * 4 + 2];
|
||||
out <<= rate;
|
||||
out |= table[i * 4 + 1];
|
||||
out <<= rate;
|
||||
out |= table[i * 4];
|
||||
|
||||
// does this concatenated output exist in the outputs table yet?
|
||||
if (!inv_outputs[out]) {
|
||||
// doesn't exist, allocate a new key
|
||||
inv_outputs[out] = output_counter;
|
||||
quads.outputs[output_counter] = out;
|
||||
output_counter++;
|
||||
}
|
||||
// set the opaque key for the ith shift register state to the concatenated output entry
|
||||
quads.keys[i] = inv_outputs[out];
|
||||
}
|
||||
quads.outputs_len = output_counter;
|
||||
quads.output_mask = (1 << (rate)) - 1;
|
||||
quads.output_width = rate;
|
||||
quads.distances = calloc(quads.outputs_len, sizeof(distance_quad_t));
|
||||
free(inv_outputs);
|
||||
return quads;
|
||||
}
|
||||
|
||||
void quad_lookup_destroy(quad_lookup_t quads) {
|
||||
free(quads.keys);
|
||||
free(quads.outputs);
|
||||
free(quads.distances);
|
||||
}
|
||||
|
||||
void quad_lookup_fill_distance(quad_lookup_t quads, distance_t *distances) {
|
||||
for (unsigned int i = 1; i < quads.outputs_len; i += 1) {
|
||||
output_quad_t concat_out = quads.outputs[i];
|
||||
unsigned int i_0 = concat_out & quads.output_mask;
|
||||
concat_out >>= quads.output_width;
|
||||
unsigned int i_1 = concat_out & quads.output_mask;
|
||||
concat_out >>= quads.output_width;
|
||||
unsigned int i_2 = concat_out & quads.output_mask;
|
||||
concat_out >>= quads.output_width;
|
||||
unsigned int i_3 = concat_out;
|
||||
|
||||
quads.distances[i] = ((uint64_t)distances[i_3] << 48) | ((uint64_t)distances[i_2] << 32) | (distances[i_1] << 16) | distances[i_0];
|
||||
}
|
||||
}
|
||||
|
||||
distance_oct_key_t oct_lookup_find_key(output_oct_t *outputs, output_oct_t out, size_t num_keys) {
|
||||
for (size_t i = 1; i < num_keys; i++) {
|
||||
if (outputs[i] == out) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
oct_lookup_t oct_lookup_create(unsigned int rate,
|
||||
unsigned int order,
|
||||
const unsigned int *table) {
|
||||
oct_lookup_t octs;
|
||||
|
||||
octs.keys = malloc((1 << (order - 3)) * sizeof(distance_oct_key_t));
|
||||
octs.outputs = malloc(((output_oct_t)2 << rate) * sizeof(uint64_t));
|
||||
output_oct_t *short_outs = calloc(((output_oct_t)2 << rate), sizeof(output_oct_t));
|
||||
size_t outputs_len = 2 << rate;
|
||||
unsigned int output_counter = 1;
|
||||
// for every (even-numbered) shift register state, find the concatenated output of the state
|
||||
// and the subsequent state that follows it (low bit set). then, check to see if this
|
||||
// concatenated output has a unique key assigned to it already. if not, give it a key.
|
||||
// if it does, retrieve the key. assign this key to the shift register state.
|
||||
for (shift_register_t i = 0; i < (1 << (order - 3)); i++) {
|
||||
// first get the concatenated oct of outputs
|
||||
output_oct_t out = table[i * 8 + 7];
|
||||
out <<= rate;
|
||||
out |= table[i * 8 + 6];
|
||||
out <<= rate;
|
||||
out |= table[i * 8 + 5];
|
||||
out <<= rate;
|
||||
out |= table[i * 8 + 4];
|
||||
out <<= rate;
|
||||
out |= table[i * 8 + 3];
|
||||
out <<= rate;
|
||||
out |= table[i * 8 + 2];
|
||||
out <<= rate;
|
||||
out |= table[i * 8 + 1];
|
||||
out <<= rate;
|
||||
out |= table[i * 8];
|
||||
|
||||
distance_oct_key_t key = oct_lookup_find_key(short_outs, out, output_counter);
|
||||
// does this concatenated output exist in the outputs table yet?
|
||||
if (!key) {
|
||||
// doesn't exist, allocate a new key
|
||||
// now build it in expanded form
|
||||
output_oct_t expanded_out = table[i * 8 + 7];
|
||||
expanded_out <<= 8;
|
||||
expanded_out |= table[i * 8 + 6];
|
||||
expanded_out <<= 8;
|
||||
expanded_out |= table[i * 8 + 5];
|
||||
expanded_out <<= 8;
|
||||
expanded_out |= table[i * 8 + 4];
|
||||
expanded_out <<= 8;
|
||||
expanded_out |= table[i * 8 + 3];
|
||||
expanded_out <<= 8;
|
||||
expanded_out |= table[i * 8 + 2];
|
||||
expanded_out <<= 8;
|
||||
expanded_out |= table[i * 8 + 1];
|
||||
expanded_out <<= 8;
|
||||
expanded_out |= table[i * 8];
|
||||
|
||||
if (output_counter == outputs_len) {
|
||||
octs.outputs = realloc(octs.outputs, outputs_len * 2 * sizeof(output_oct_t));
|
||||
short_outs = realloc(short_outs, outputs_len * 2 * sizeof(output_oct_t));
|
||||
outputs_len *= 2;
|
||||
}
|
||||
short_outs[output_counter] = out;
|
||||
octs.outputs[output_counter] = expanded_out;
|
||||
key = output_counter;
|
||||
output_counter++;
|
||||
}
|
||||
// set the opaque key for the ith shift register state to the concatenated output entry
|
||||
// we multiply the key by 2 since the distances are strided by 2
|
||||
octs.keys[i] = key * 2;
|
||||
}
|
||||
free(short_outs);
|
||||
octs.outputs_len = output_counter;
|
||||
octs.output_mask = (1 << (rate)) - 1;
|
||||
octs.output_width = rate;
|
||||
octs.distances = malloc(octs.outputs_len * 2 * sizeof(uint64_t));
|
||||
return octs;
|
||||
}
|
||||
|
||||
void oct_lookup_destroy(oct_lookup_t octs) {
|
||||
free(octs.keys);
|
||||
free(octs.outputs);
|
||||
free(octs.distances);
|
||||
}
|
||||
|
||||
// WIP: sse approach to filling the distance table
|
||||
/*
|
||||
void oct_lookup_fill_distance_sse(oct_lookup_t octs, distance_t *distances) {
|
||||
distance_pair_t *distance_pair = (distance_pair_t*)octs.distances;
|
||||
__v4si index_shuffle_mask = (__v4si){0xffffff00, 0xffffff01, 0xffffff02, 0xffffff03};
|
||||
__m256i dist_shuffle_mask = (__m256i){0x01000504, 0x09080d0c, 0xffffffff, 0xffffffff,
|
||||
0x01000504, 0x09080d0c, 0xffffffff, 0xffffffff};
|
||||
const int dist_permute_mask = 0x0c;
|
||||
for (unsigned int i = 1; i < octs.outputs_len; i += 2) {
|
||||
// big heaping todo vvv
|
||||
// a) we want 16 bit distances GATHERed, not 32 bit
|
||||
// b) we need to load 8 of those distances, not 4
|
||||
__v4si short_concat_index = _mm_loadl_epi64(octs.outputs + 2*i);
|
||||
__v4si short_concat_index0 = _mm_loadl_epi64(octs.outputs + 2*i + 1);
|
||||
__m256i concat_index = _mm256_cvtepu8_epi32(short_concat_index);
|
||||
__m256i concat_index0 = _mm256_cvtepu8_epi32(short_concat_index0);
|
||||
__m256i dist = _mm256_i32gather_epi32(distances, concat_index, sizeof(distance_t));
|
||||
__m256i dist0 = _mm256_i32gather_epi32(distances, concat_index0, sizeof(distance_t));
|
||||
dist = _mm256_shuffle_epi8(dist, dist_shuffle_mask);
|
||||
dist0 = _mm256_shuffle_epi8(dist0, dist_shuffle_mask);
|
||||
dist = __builtin_shufflevector(dist, dist, 0, 5, 0, 0);
|
||||
dist0 = __builtin_shufflevector(dist0, dist0, 0, 5, 0, 0);
|
||||
__v4si packed_dist = _mm256_castsi256_si128(dist);
|
||||
_mm_store_si128(distance_pair + 8 * i, packed_dist);
|
||||
__v4si packed_dist0 = _mm256_castsi256_si128(dist0);
|
||||
_mm_store_si128(distance_pair + 8 * i + 4, packed_dist0);
|
||||
}
|
||||
}
|
||||
*/
|
255
core/libcorrect/src/fec_shim.c
Normal file
255
core/libcorrect/src/fec_shim.c
Normal file
@ -0,0 +1,255 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "fec_shim.h"
|
||||
|
||||
typedef struct {
|
||||
correct_reed_solomon *rs;
|
||||
unsigned int msg_length;
|
||||
unsigned int block_length;
|
||||
unsigned int num_roots;
|
||||
uint8_t *msg_out;
|
||||
unsigned int pad;
|
||||
uint8_t *erasures;
|
||||
} reed_solomon_shim;
|
||||
|
||||
void *init_rs_char(int symbol_size, int primitive_polynomial,
|
||||
int first_consecutive_root, int root_gap, int number_roots,
|
||||
unsigned int pad) {
|
||||
if (symbol_size != 8) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
reed_solomon_shim *shim = malloc(sizeof(reed_solomon_shim));
|
||||
|
||||
shim->pad = pad;
|
||||
shim->block_length = 255 - pad;
|
||||
shim->num_roots = number_roots;
|
||||
shim->msg_length = shim->block_length - number_roots;
|
||||
shim->rs = correct_reed_solomon_create(primitive_polynomial,
|
||||
first_consecutive_root, root_gap, number_roots);
|
||||
shim->msg_out = malloc(shim->block_length);
|
||||
shim->erasures = malloc(number_roots);
|
||||
|
||||
return shim;
|
||||
}
|
||||
|
||||
void free_rs_char(void *rs) {
|
||||
reed_solomon_shim *shim = (reed_solomon_shim *)rs;
|
||||
correct_reed_solomon_destroy(shim->rs);
|
||||
free(shim->msg_out);
|
||||
free(shim->erasures);
|
||||
free(shim);
|
||||
}
|
||||
|
||||
void encode_rs_char(void *rs, const unsigned char *msg, unsigned char *parity) {
|
||||
reed_solomon_shim *shim = (reed_solomon_shim *)rs;
|
||||
correct_reed_solomon_encode(shim->rs, msg, shim->msg_length, shim->msg_out);
|
||||
memcpy(parity, shim->msg_out + shim->msg_length, shim->num_roots);
|
||||
}
|
||||
|
||||
void decode_rs_char(void *rs, unsigned char *block, int *erasure_locations,
|
||||
int num_erasures) {
|
||||
reed_solomon_shim *shim = (reed_solomon_shim *)rs;
|
||||
for (int i = 0; i < num_erasures; i++) {
|
||||
shim->erasures[i] = (uint8_t)(erasure_locations[i]) - shim->pad;
|
||||
}
|
||||
correct_reed_solomon_decode_with_erasures(shim->rs, block, shim->block_length,
|
||||
shim->erasures, num_erasures,
|
||||
block);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
correct_convolutional *conv;
|
||||
unsigned int rate;
|
||||
unsigned int order;
|
||||
uint8_t *buf;
|
||||
size_t buf_len;
|
||||
uint8_t *read_iter;
|
||||
uint8_t *write_iter;
|
||||
} convolutional_shim;
|
||||
|
||||
static correct_convolutional_polynomial_t r12k7[] = {V27POLYA, V27POLYB};
|
||||
|
||||
static correct_convolutional_polynomial_t r12k9[] = {V29POLYA, V29POLYB};
|
||||
|
||||
static correct_convolutional_polynomial_t r13k9[] = {V39POLYA, V39POLYB,
|
||||
V39POLYC};
|
||||
|
||||
static correct_convolutional_polynomial_t r16k15[] = {
|
||||
V615POLYA, V615POLYB, V615POLYC, V615POLYD, V615POLYE, V615POLYF};
|
||||
|
||||
/* Common methods */
|
||||
static void *create_viterbi(unsigned int num_decoded_bits, unsigned int rate,
|
||||
unsigned int order,
|
||||
correct_convolutional_polynomial_t *poly) {
|
||||
convolutional_shim *shim = malloc(sizeof(convolutional_shim));
|
||||
|
||||
size_t num_decoded_bytes = (num_decoded_bits % 8)
|
||||
? (num_decoded_bits / 8 + 1)
|
||||
: num_decoded_bits / 8;
|
||||
|
||||
shim->rate = rate;
|
||||
shim->order = order;
|
||||
shim->buf = malloc(num_decoded_bytes);
|
||||
shim->buf_len = num_decoded_bytes;
|
||||
shim->conv = correct_convolutional_create(rate, order, poly);
|
||||
shim->read_iter = shim->buf;
|
||||
shim->write_iter = shim->buf;
|
||||
|
||||
return shim;
|
||||
}
|
||||
|
||||
static void delete_viterbi(void *vit) {
|
||||
convolutional_shim *shim = (convolutional_shim *)vit;
|
||||
free(shim->buf);
|
||||
correct_convolutional_destroy(shim->conv);
|
||||
free(shim);
|
||||
}
|
||||
|
||||
static void init_viterbi(void *vit) {
|
||||
convolutional_shim *shim = (convolutional_shim *)vit;
|
||||
shim->read_iter = shim->buf;
|
||||
shim->write_iter = shim->buf;
|
||||
}
|
||||
|
||||
static void update_viterbi_blk(void *vit, const unsigned char *encoded_soft,
|
||||
unsigned int num_encoded_groups) {
|
||||
convolutional_shim *shim = (convolutional_shim *)vit;
|
||||
|
||||
// don't overwrite our buffer
|
||||
size_t rem = (shim->buf + shim->buf_len) - shim->write_iter;
|
||||
size_t rem_bits = 8 * rem;
|
||||
// this math isn't very clear
|
||||
// here we sort of do the opposite of what liquid-dsp does
|
||||
size_t n_write_bits = num_encoded_groups - (shim->order - 1);
|
||||
if (n_write_bits > rem_bits) {
|
||||
size_t reduction = n_write_bits - rem_bits;
|
||||
num_encoded_groups -= reduction;
|
||||
n_write_bits -= reduction;
|
||||
}
|
||||
|
||||
// what if n_write_bits isn't a multiple of 8?
|
||||
// libcorrect can't start and stop at arbitrary indices...
|
||||
correct_convolutional_decode_soft(
|
||||
shim->conv, encoded_soft, num_encoded_groups * shim->rate, shim->write_iter);
|
||||
shim->write_iter += n_write_bits / 8;
|
||||
}
|
||||
|
||||
static void chainback_viterbi(void *vit, unsigned char *decoded,
|
||||
unsigned int num_decoded_bits) {
|
||||
convolutional_shim *shim = (convolutional_shim *)vit;
|
||||
|
||||
// num_decoded_bits not a multiple of 8?
|
||||
// this is a similar problem to update_viterbi_blk
|
||||
// although here we could actually resolve a non-multiple of 8
|
||||
size_t rem = shim->write_iter - shim->read_iter;
|
||||
size_t rem_bits = 8 * rem;
|
||||
|
||||
if (num_decoded_bits > rem_bits) {
|
||||
num_decoded_bits = rem_bits;
|
||||
}
|
||||
|
||||
size_t num_decoded_bytes = (num_decoded_bits % 8)
|
||||
? (num_decoded_bits / 8 + 1)
|
||||
: num_decoded_bits / 8;
|
||||
memcpy(decoded, shim->read_iter, num_decoded_bytes);
|
||||
|
||||
shim->read_iter += num_decoded_bytes;
|
||||
}
|
||||
|
||||
/* Rate 1/2, k = 7 */
|
||||
void *create_viterbi27(int num_decoded_bits) {
|
||||
return create_viterbi(num_decoded_bits, 2, 7, r12k7);
|
||||
}
|
||||
|
||||
void delete_viterbi27(void *vit) { delete_viterbi(vit); }
|
||||
|
||||
int init_viterbi27(void *vit, int _) {
|
||||
init_viterbi(vit);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int update_viterbi27_blk(void *vit, unsigned char *encoded_soft,
|
||||
int num_encoded_groups) {
|
||||
update_viterbi_blk(vit, encoded_soft, num_encoded_groups);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int chainback_viterbi27(void *vit, unsigned char *decoded,
|
||||
unsigned int num_decoded_bits, unsigned int _) {
|
||||
chainback_viterbi(vit, decoded, num_decoded_bits);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Rate 1/2, k = 9 */
|
||||
void *create_viterbi29(int num_decoded_bits) {
|
||||
return create_viterbi(num_decoded_bits, 2, 9, r12k9);
|
||||
}
|
||||
|
||||
void delete_viterbi29(void *vit) { delete_viterbi(vit); }
|
||||
|
||||
int init_viterbi29(void *vit, int _) {
|
||||
init_viterbi(vit);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int update_viterbi29_blk(void *vit, unsigned char *encoded_soft,
|
||||
int num_encoded_groups) {
|
||||
update_viterbi_blk(vit, encoded_soft, num_encoded_groups);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int chainback_viterbi29(void *vit, unsigned char *decoded,
|
||||
unsigned int num_decoded_bits, unsigned int _) {
|
||||
chainback_viterbi(vit, decoded, num_decoded_bits);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Rate 1/3, k = 9 */
|
||||
void *create_viterbi39(int num_decoded_bits) {
|
||||
return create_viterbi(num_decoded_bits, 3, 9, r13k9);
|
||||
}
|
||||
|
||||
void delete_viterbi39(void *vit) { delete_viterbi(vit); }
|
||||
|
||||
int init_viterbi39(void *vit, int _) {
|
||||
init_viterbi(vit);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int update_viterbi39_blk(void *vit, unsigned char *encoded_soft,
|
||||
int num_encoded_groups) {
|
||||
update_viterbi_blk(vit, encoded_soft, num_encoded_groups);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int chainback_viterbi39(void *vit, unsigned char *decoded,
|
||||
unsigned int num_decoded_bits, unsigned int _) {
|
||||
chainback_viterbi(vit, decoded, num_decoded_bits);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Rate 1/6, k = 15 */
|
||||
void *create_viterbi615(int num_decoded_bits) {
|
||||
return create_viterbi(num_decoded_bits, 6, 15, r16k15);
|
||||
}
|
||||
|
||||
void delete_viterbi615(void *vit) { delete_viterbi(vit); }
|
||||
|
||||
int init_viterbi615(void *vit, int _) {
|
||||
init_viterbi(vit);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int update_viterbi615_blk(void *vit, unsigned char *encoded_soft,
|
||||
int num_encoded_groups) {
|
||||
update_viterbi_blk(vit, encoded_soft, num_encoded_groups);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int chainback_viterbi615(void *vit, unsigned char *decoded,
|
||||
unsigned int num_decoded_bits, unsigned int _) {
|
||||
chainback_viterbi(vit, decoded, num_decoded_bits);
|
||||
return 0;
|
||||
}
|
2
core/libcorrect/src/reed-solomon/CMakeLists.txt
Normal file
2
core/libcorrect/src/reed-solomon/CMakeLists.txt
Normal file
@ -0,0 +1,2 @@
|
||||
set(SRCFILES polynomial.c reed-solomon.c encode.c decode.c)
|
||||
add_library(correct-reed-solomon OBJECT ${SRCFILES})
|
508
core/libcorrect/src/reed-solomon/decode.c
Normal file
508
core/libcorrect/src/reed-solomon/decode.c
Normal file
@ -0,0 +1,508 @@
|
||||
#include "correct/reed-solomon/encode.h"
|
||||
|
||||
// calculate all syndromes of the received polynomial at the roots of the generator
|
||||
// because we're evaluating at the roots of the generator, and because the transmitted
|
||||
// polynomial was made to be a product of the generator, we know that the transmitted
|
||||
// polynomial is 0 at these roots
|
||||
// any nonzero syndromes we find here are the values of the error polynomial evaluated
|
||||
// at these roots, so these values give us a window into the error polynomial. if
|
||||
// these syndromes are all zero, then we can conclude the error polynomial is also
|
||||
// zero. if they're nonzero, then we know our message received an error in transit.
|
||||
// returns true if syndromes are all zero
|
||||
static bool reed_solomon_find_syndromes(field_t field, polynomial_t msgpoly, field_logarithm_t **generator_root_exp,
|
||||
field_element_t *syndromes, size_t min_distance) {
|
||||
bool all_zero = true;
|
||||
memset(syndromes, 0, min_distance * sizeof(field_element_t));
|
||||
for (unsigned int i = 0; i < min_distance; i++) {
|
||||
// profiling reveals that this function takes about 50% of the cpu time of
|
||||
// decoding. so, in order to speed it up a little, we precompute and save
|
||||
// the successive powers of the roots of the generator, which are
|
||||
// located in generator_root_exp
|
||||
field_element_t eval = polynomial_eval_lut(field, msgpoly, generator_root_exp[i]);
|
||||
if (eval) {
|
||||
all_zero = false;
|
||||
}
|
||||
syndromes[i] = eval;
|
||||
}
|
||||
return all_zero;
|
||||
}
|
||||
|
||||
// Berlekamp-Massey algorithm to find LFSR that describes syndromes
|
||||
// returns number of errors and writes the error locator polynomial to rs->error_locator
|
||||
static unsigned int reed_solomon_find_error_locator(correct_reed_solomon *rs, size_t num_erasures) {
|
||||
unsigned int numerrors = 0;
|
||||
|
||||
memset(rs->error_locator.coeff, 0, (rs->min_distance + 1) * sizeof(field_element_t));
|
||||
|
||||
// initialize to f(x) = 1
|
||||
rs->error_locator.coeff[0] = 1;
|
||||
rs->error_locator.order = 0;
|
||||
|
||||
memcpy(rs->last_error_locator.coeff, rs->error_locator.coeff, (rs->min_distance + 1) * sizeof(field_element_t));
|
||||
rs->last_error_locator.order = rs->error_locator.order;
|
||||
|
||||
field_element_t discrepancy;
|
||||
field_element_t last_discrepancy = 1;
|
||||
unsigned int delay_length = 1;
|
||||
|
||||
for (unsigned int i = rs->error_locator.order; i < rs->min_distance - num_erasures; i++) {
|
||||
discrepancy = rs->syndromes[i];
|
||||
for (unsigned int j = 1; j <= numerrors; j++) {
|
||||
discrepancy = field_add(rs->field, discrepancy,
|
||||
field_mul(rs->field, rs->error_locator.coeff[j], rs->syndromes[i - j]));
|
||||
}
|
||||
|
||||
if (!discrepancy) {
|
||||
// our existing LFSR describes the new syndrome as well
|
||||
// leave it as-is but update the number of delay elements
|
||||
// so that if a discrepancy occurs later we can eliminate it
|
||||
delay_length++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (2 * numerrors <= i) {
|
||||
// there's a discrepancy, but we still have room for more taps
|
||||
// lengthen LFSR by one tap and set weight to eliminate discrepancy
|
||||
|
||||
// shift the last locator by the delay length, multiply by discrepancy,
|
||||
// and divide by the last discrepancy
|
||||
// we move down because we're shifting up, and this prevents overwriting
|
||||
for (int j = rs->last_error_locator.order; j >= 0; j--) {
|
||||
// the bounds here will be ok since we have a headroom of numerrors
|
||||
rs->last_error_locator.coeff[j + delay_length] = field_div(
|
||||
rs->field, field_mul(rs->field, rs->last_error_locator.coeff[j], discrepancy), last_discrepancy);
|
||||
}
|
||||
for (int j = delay_length - 1; j >= 0; j--) {
|
||||
rs->last_error_locator.coeff[j] = 0;
|
||||
}
|
||||
|
||||
// locator = locator - last_locator
|
||||
// we will also update last_locator to be locator before this loop takes place
|
||||
field_element_t temp;
|
||||
for (int j = 0; j <= (rs->last_error_locator.order + delay_length); j++) {
|
||||
temp = rs->error_locator.coeff[j];
|
||||
rs->error_locator.coeff[j] =
|
||||
field_add(rs->field, rs->error_locator.coeff[j], rs->last_error_locator.coeff[j]);
|
||||
rs->last_error_locator.coeff[j] = temp;
|
||||
}
|
||||
unsigned int temp_order = rs->error_locator.order;
|
||||
rs->error_locator.order = rs->last_error_locator.order + delay_length;
|
||||
rs->last_error_locator.order = temp_order;
|
||||
|
||||
// now last_locator is locator before we started,
|
||||
// and locator is (locator - (discrepancy/last_discrepancy) * x^(delay_length) * last_locator)
|
||||
|
||||
numerrors = i + 1 - numerrors;
|
||||
last_discrepancy = discrepancy;
|
||||
delay_length = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// no more taps
|
||||
// unlike the previous case, we are preserving last locator,
|
||||
// but we'll update locator as before
|
||||
// we're basically flattening the two loops from the previous case because
|
||||
// we no longer need to update last_locator
|
||||
for (int j = rs->last_error_locator.order; j >= 0; j--) {
|
||||
rs->error_locator.coeff[j + delay_length] =
|
||||
field_add(rs->field, rs->error_locator.coeff[j + delay_length],
|
||||
field_div(rs->field, field_mul(rs->field, rs->last_error_locator.coeff[j], discrepancy),
|
||||
last_discrepancy));
|
||||
}
|
||||
rs->error_locator.order = (rs->last_error_locator.order + delay_length > rs->error_locator.order)
|
||||
? rs->last_error_locator.order + delay_length
|
||||
: rs->error_locator.order;
|
||||
delay_length++;
|
||||
}
|
||||
return rs->error_locator.order;
|
||||
}
|
||||
|
||||
// find the roots of the error locator polynomial
|
||||
// Chien search
|
||||
bool reed_solomon_factorize_error_locator(field_t field, unsigned int num_skip, polynomial_t locator_log, field_element_t *roots,
|
||||
field_logarithm_t **element_exp) {
|
||||
// normally it'd be tricky to find all the roots
|
||||
// but, the finite field is awfully finite...
|
||||
// just brute force search across every field element
|
||||
unsigned int root = num_skip;
|
||||
memset(roots + num_skip, 0, (locator_log.order) * sizeof(field_element_t));
|
||||
for (field_operation_t i = 0; i < 256; i++) {
|
||||
// we make two optimizations here to help this search go faster
|
||||
// a) we have precomputed the first successive powers of every single element
|
||||
// in the field. we need at most n powers, where n is the largest possible
|
||||
// degree of the error locator
|
||||
// b) we have precomputed the error locator polynomial in log form, which
|
||||
// helps reduce some lookups that would be done here
|
||||
if (!polynomial_eval_log_lut(field, locator_log, element_exp[i])) {
|
||||
roots[root] = (field_element_t)i;
|
||||
root++;
|
||||
}
|
||||
}
|
||||
// this is where we find out if we are have too many errors to recover from
|
||||
// berlekamp-massey may have built an error locator that has 0 discrepancy
|
||||
// on the syndromes but doesn't have enough roots
|
||||
return root == locator_log.order + num_skip;
|
||||
}
|
||||
|
||||
// use error locator and syndromes to find the error evaluator polynomial
|
||||
void reed_solomon_find_error_evaluator(field_t field, polynomial_t locator, polynomial_t syndromes,
|
||||
polynomial_t error_evaluator) {
|
||||
// the error evaluator, omega(x), is S(x)*Lamba(x) mod x^(2t)
|
||||
// where S(x) is a polynomial constructed from the syndromes
|
||||
// S(1) + S(2)*x + ... + S(2t)*x(2t - 1)
|
||||
// and Lambda(x) is the error locator
|
||||
// the modulo is implicit here -- we have limited the max length of error_evaluator,
|
||||
// which polynomial_mul will interpret to mean that it should not compute
|
||||
// powers larger than that, which is the same as performing mod x^(2t)
|
||||
polynomial_mul(field, locator, syndromes, error_evaluator);
|
||||
}
|
||||
|
||||
// use error locator, error roots and syndromes to find the error values
|
||||
// that is, the elements in the finite field which can be added to the received
|
||||
// polynomial at the locations of the error roots in order to produce the
|
||||
// transmitted polynomial
|
||||
// forney algorithm
|
||||
void reed_solomon_find_error_values(correct_reed_solomon *rs) {
|
||||
// error value e(j) = -(X(j)^(1-c) * omega(X(j)^-1))/(lambda'(X(j)^-1))
|
||||
// where X(j)^-1 is a root of the error locator, omega(X) is the error evaluator,
|
||||
// lambda'(X) is the first formal derivative of the error locator,
|
||||
// and c is the first consecutive root of the generator used in encoding
|
||||
|
||||
// first find omega(X), the error evaluator
|
||||
// we generate S(x), the polynomial constructed from the roots of the syndromes
|
||||
// this is *not* the polynomial constructed by expanding the products of roots
|
||||
// S(x) = S(1) + S(2)*x + ... + S(2t)*x(2t - 1)
|
||||
polynomial_t syndrome_poly;
|
||||
syndrome_poly.order = rs->min_distance - 1;
|
||||
syndrome_poly.coeff = rs->syndromes;
|
||||
memset(rs->error_evaluator.coeff, 0, (rs->error_evaluator.order + 1) * sizeof(field_element_t));
|
||||
reed_solomon_find_error_evaluator(rs->field, rs->error_locator, syndrome_poly, rs->error_evaluator);
|
||||
|
||||
// now find lambda'(X)
|
||||
rs->error_locator_derivative.order = rs->error_locator.order - 1;
|
||||
polynomial_formal_derivative(rs->field, rs->error_locator, rs->error_locator_derivative);
|
||||
|
||||
// calculate each e(j)
|
||||
for (unsigned int i = 0; i < rs->error_locator.order; i++) {
|
||||
if (rs->error_roots[i] == 0) {
|
||||
continue;
|
||||
}
|
||||
rs->error_vals[i] = field_mul(
|
||||
rs->field, field_pow(rs->field, rs->error_roots[i], rs->first_consecutive_root - 1),
|
||||
field_div(
|
||||
rs->field, polynomial_eval_lut(rs->field, rs->error_evaluator, rs->element_exp[rs->error_roots[i]]),
|
||||
polynomial_eval_lut(rs->field, rs->error_locator_derivative, rs->element_exp[rs->error_roots[i]])));
|
||||
}
|
||||
}
|
||||
|
||||
void reed_solomon_find_error_locations(field_t field, field_logarithm_t generator_root_gap,
|
||||
field_element_t *error_roots, field_logarithm_t *error_locations,
|
||||
unsigned int num_errors, unsigned int num_skip) {
|
||||
for (unsigned int i = 0; i < num_errors; i++) {
|
||||
// the error roots are the reciprocals of the error locations, so div 1 by them
|
||||
|
||||
// we do mod 255 here because the log table aliases at index 1
|
||||
// the log of 1 is both 0 and 255 (alpha^255 = alpha^0 = 1)
|
||||
// for most uses it makes sense to have log(1) = 255, but in this case
|
||||
// we're interested in a byte index, and the 255th index is not even valid
|
||||
// just wrap it back to 0
|
||||
|
||||
if (error_roots[i] == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
field_operation_t loc = field_div(field, 1, error_roots[i]);
|
||||
for (field_operation_t j = 0; j < 256; j++) {
|
||||
if (field_pow(field, j, generator_root_gap) == loc) {
|
||||
error_locations[i] = field.log[j];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// erasure method -- take given locations and convert to roots
|
||||
// this is the inverse of reed_solomon_find_error_locations
|
||||
static void reed_solomon_find_error_roots_from_locations(field_t field, field_logarithm_t generator_root_gap,
|
||||
const field_logarithm_t *error_locations,
|
||||
field_element_t *error_roots, unsigned int num_errors) {
|
||||
for (unsigned int i = 0; i < num_errors; i++) {
|
||||
field_element_t loc = field_pow(field, field.exp[error_locations[i]], generator_root_gap);
|
||||
// field_element_t loc = field.exp[error_locations[i]];
|
||||
error_roots[i] = field_div(field, 1, loc);
|
||||
// error_roots[i] = loc;
|
||||
}
|
||||
}
|
||||
|
||||
// erasure method -- given the roots of the error locator, create the polynomial
|
||||
static polynomial_t reed_solomon_find_error_locator_from_roots(field_t field, unsigned int num_errors,
|
||||
field_element_t *error_roots,
|
||||
polynomial_t error_locator,
|
||||
polynomial_t *scratch) {
|
||||
// multiply out roots to build the error locator polynomial
|
||||
return polynomial_init_from_roots(field, num_errors, error_roots, error_locator, scratch);
|
||||
}
|
||||
|
||||
// erasure method
|
||||
static void reed_solomon_find_modified_syndromes(correct_reed_solomon *rs, field_element_t *syndromes, polynomial_t error_locator, field_element_t *modified_syndromes) {
|
||||
polynomial_t syndrome_poly;
|
||||
syndrome_poly.order = rs->min_distance - 1;
|
||||
syndrome_poly.coeff = syndromes;
|
||||
|
||||
polynomial_t modified_syndrome_poly;
|
||||
modified_syndrome_poly.order = rs->min_distance - 1;
|
||||
modified_syndrome_poly.coeff = modified_syndromes;
|
||||
|
||||
polynomial_mul(rs->field, error_locator, syndrome_poly, modified_syndrome_poly);
|
||||
}
|
||||
|
||||
void correct_reed_solomon_decoder_create(correct_reed_solomon *rs) {
|
||||
rs->has_init_decode = true;
|
||||
rs->syndromes = calloc(rs->min_distance, sizeof(field_element_t));
|
||||
rs->modified_syndromes = calloc(2 * rs->min_distance, sizeof(field_element_t));
|
||||
rs->received_polynomial = polynomial_create(rs->block_length - 1);
|
||||
rs->error_locator = polynomial_create(rs->min_distance);
|
||||
rs->error_locator_log = polynomial_create(rs->min_distance);
|
||||
rs->erasure_locator = polynomial_create(rs->min_distance);
|
||||
rs->error_roots = calloc(2 * rs->min_distance, sizeof(field_element_t));
|
||||
rs->error_vals = malloc(rs->min_distance * sizeof(field_element_t));
|
||||
rs->error_locations = malloc(rs->min_distance * sizeof(field_logarithm_t));
|
||||
|
||||
rs->last_error_locator = polynomial_create(rs->min_distance);
|
||||
rs->error_evaluator = polynomial_create(rs->min_distance - 1);
|
||||
rs->error_locator_derivative = polynomial_create(rs->min_distance - 1);
|
||||
|
||||
// calculate and store the first block_length powers of every generator root
|
||||
// we would have to do this work in order to calculate the syndromes
|
||||
// if we save it, we can prevent the need to recalculate it on subsequent calls
|
||||
// total memory usage is min_distance * block_length bytes e.g. 32 * 255 ~= 8k
|
||||
rs->generator_root_exp = malloc(rs->min_distance * sizeof(field_logarithm_t *));
|
||||
for (unsigned int i = 0; i < rs->min_distance; i++) {
|
||||
rs->generator_root_exp[i] = malloc(rs->block_length * sizeof(field_logarithm_t));
|
||||
polynomial_build_exp_lut(rs->field, rs->generator_roots[i], rs->block_length - 1, rs->generator_root_exp[i]);
|
||||
}
|
||||
|
||||
// calculate and store the first min_distance powers of every element in the field
|
||||
// we would have to do this for chien search anyway, and its size is only 256 * min_distance bytes
|
||||
// for min_distance = 32 this is 8k of memory, a pittance for the speedup we receive in exchange
|
||||
// we also get to reuse this work during error value calculation
|
||||
rs->element_exp = malloc(256 * sizeof(field_logarithm_t *));
|
||||
for (field_operation_t i = 0; i < 256; i++) {
|
||||
rs->element_exp[i] = malloc(rs->min_distance * sizeof(field_logarithm_t));
|
||||
polynomial_build_exp_lut(rs->field, i, rs->min_distance - 1, rs->element_exp[i]);
|
||||
}
|
||||
|
||||
rs->init_from_roots_scratch[0] = polynomial_create(rs->min_distance);
|
||||
rs->init_from_roots_scratch[1] = polynomial_create(rs->min_distance);
|
||||
}
|
||||
|
||||
ssize_t correct_reed_solomon_decode(correct_reed_solomon *rs, const uint8_t *encoded, size_t encoded_length,
|
||||
uint8_t *msg) {
|
||||
if (encoded_length > rs->block_length) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// the message is the non-remainder part
|
||||
size_t msg_length = encoded_length - rs->min_distance;
|
||||
// if they handed us a nonfull block, we'll write in 0s
|
||||
size_t pad_length = rs->block_length - encoded_length;
|
||||
|
||||
if (!rs->has_init_decode) {
|
||||
// initialize rs for decoding
|
||||
correct_reed_solomon_decoder_create(rs);
|
||||
}
|
||||
|
||||
// we need to copy to our local buffer
|
||||
// the buffer we're given has the coordinates in the wrong direction
|
||||
// e.g. byte 0 corresponds to the 254th order coefficient
|
||||
// so we're going to flip and then write padding
|
||||
// the final copied buffer will look like
|
||||
// | rem (rs->min_distance) | msg (msg_length) | pad (pad_length) |
|
||||
|
||||
for (unsigned int i = 0; i < encoded_length; i++) {
|
||||
rs->received_polynomial.coeff[i] = encoded[encoded_length - (i + 1)];
|
||||
}
|
||||
|
||||
// fill the pad_length with 0s
|
||||
for (unsigned int i = 0; i < pad_length; i++) {
|
||||
rs->received_polynomial.coeff[i + encoded_length] = 0;
|
||||
}
|
||||
|
||||
|
||||
bool all_zero = reed_solomon_find_syndromes(rs->field, rs->received_polynomial, rs->generator_root_exp,
|
||||
rs->syndromes, rs->min_distance);
|
||||
|
||||
if (all_zero) {
|
||||
// syndromes were all zero, so there was no error in the message
|
||||
// copy to msg and we are done
|
||||
for (unsigned int i = 0; i < msg_length; i++) {
|
||||
msg[i] = rs->received_polynomial.coeff[encoded_length - (i + 1)];
|
||||
}
|
||||
return msg_length;
|
||||
}
|
||||
|
||||
unsigned int order = reed_solomon_find_error_locator(rs, 0);
|
||||
// XXX fix this vvvv
|
||||
rs->error_locator.order = order;
|
||||
|
||||
for (unsigned int i = 0; i <= rs->error_locator.order; i++) {
|
||||
// this is a little strange since the coeffs are logs, not elements
|
||||
// also, we'll be storing log(0) = 0 for any 0 coeffs in the error locator
|
||||
// that would seem bad but we'll just be using this in chien search, and we'll skip all 0 coeffs
|
||||
// (you might point out that log(1) also = 0, which would seem to alias. however, that's ok,
|
||||
// because log(1) = 255 as well, and in fact that's how it's represented in our log table)
|
||||
rs->error_locator_log.coeff[i] = rs->field.log[rs->error_locator.coeff[i]];
|
||||
}
|
||||
rs->error_locator_log.order = rs->error_locator.order;
|
||||
|
||||
if (!reed_solomon_factorize_error_locator(rs->field, 0, rs->error_locator_log, rs->error_roots, rs->element_exp)) {
|
||||
// roots couldn't be found, so there were too many errors to deal with
|
||||
// RS has failed for this message
|
||||
return -1;
|
||||
}
|
||||
|
||||
reed_solomon_find_error_locations(rs->field, rs->generator_root_gap, rs->error_roots, rs->error_locations,
|
||||
rs->error_locator.order, 0);
|
||||
|
||||
reed_solomon_find_error_values(rs);
|
||||
|
||||
for (unsigned int i = 0; i < rs->error_locator.order; i++) {
|
||||
rs->received_polynomial.coeff[rs->error_locations[i]] =
|
||||
field_sub(rs->field, rs->received_polynomial.coeff[rs->error_locations[i]], rs->error_vals[i]);
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < msg_length; i++) {
|
||||
msg[i] = rs->received_polynomial.coeff[encoded_length - (i + 1)];
|
||||
}
|
||||
|
||||
return msg_length;
|
||||
}
|
||||
|
||||
ssize_t correct_reed_solomon_decode_with_erasures(correct_reed_solomon *rs, const uint8_t *encoded,
|
||||
size_t encoded_length, const uint8_t *erasure_locations,
|
||||
size_t erasure_length, uint8_t *msg) {
|
||||
if (!erasure_length) {
|
||||
return correct_reed_solomon_decode(rs, encoded, encoded_length, msg);
|
||||
}
|
||||
|
||||
if (encoded_length > rs->block_length) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (erasure_length > rs->min_distance) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// the message is the non-remainder part
|
||||
size_t msg_length = encoded_length - rs->min_distance;
|
||||
// if they handed us a nonfull block, we'll write in 0s
|
||||
size_t pad_length = rs->block_length - encoded_length;
|
||||
|
||||
if (!rs->has_init_decode) {
|
||||
// initialize rs for decoding
|
||||
correct_reed_solomon_decoder_create(rs);
|
||||
}
|
||||
|
||||
// we need to copy to our local buffer
|
||||
// the buffer we're given has the coordinates in the wrong direction
|
||||
// e.g. byte 0 corresponds to the 254th order coefficient
|
||||
// so we're going to flip and then write padding
|
||||
// the final copied buffer will look like
|
||||
// | rem (rs->min_distance) | msg (msg_length) | pad (pad_length) |
|
||||
|
||||
for (unsigned int i = 0; i < encoded_length; i++) {
|
||||
rs->received_polynomial.coeff[i] = encoded[encoded_length - (i + 1)];
|
||||
}
|
||||
|
||||
// fill the pad_length with 0s
|
||||
for (unsigned int i = 0; i < pad_length; i++) {
|
||||
rs->received_polynomial.coeff[i + encoded_length] = 0;
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < erasure_length; i++) {
|
||||
// remap the coordinates of the erasures
|
||||
rs->error_locations[i] = rs->block_length - (erasure_locations[i] + pad_length + 1);
|
||||
}
|
||||
|
||||
reed_solomon_find_error_roots_from_locations(rs->field, rs->generator_root_gap, rs->error_locations,
|
||||
rs->error_roots, erasure_length);
|
||||
|
||||
rs->erasure_locator =
|
||||
reed_solomon_find_error_locator_from_roots(rs->field, erasure_length, rs->error_roots, rs->erasure_locator, rs->init_from_roots_scratch);
|
||||
|
||||
bool all_zero = reed_solomon_find_syndromes(rs->field, rs->received_polynomial, rs->generator_root_exp,
|
||||
rs->syndromes, rs->min_distance);
|
||||
|
||||
if (all_zero) {
|
||||
// syndromes were all zero, so there was no error in the message
|
||||
// copy to msg and we are done
|
||||
for (unsigned int i = 0; i < msg_length; i++) {
|
||||
msg[i] = rs->received_polynomial.coeff[encoded_length - (i + 1)];
|
||||
}
|
||||
return msg_length;
|
||||
}
|
||||
|
||||
reed_solomon_find_modified_syndromes(rs, rs->syndromes, rs->erasure_locator, rs->modified_syndromes);
|
||||
|
||||
field_element_t *syndrome_copy = malloc(rs->min_distance * sizeof(field_element_t));
|
||||
memcpy(syndrome_copy, rs->syndromes, rs->min_distance * sizeof(field_element_t));
|
||||
|
||||
for (unsigned int i = erasure_length; i < rs->min_distance; i++) {
|
||||
rs->syndromes[i - erasure_length] = rs->modified_syndromes[i];
|
||||
}
|
||||
|
||||
unsigned int order = reed_solomon_find_error_locator(rs, erasure_length);
|
||||
// XXX fix this vvvv
|
||||
rs->error_locator.order = order;
|
||||
|
||||
for (unsigned int i = 0; i <= rs->error_locator.order; i++) {
|
||||
// this is a little strange since the coeffs are logs, not elements
|
||||
// also, we'll be storing log(0) = 0 for any 0 coeffs in the error locator
|
||||
// that would seem bad but we'll just be using this in chien search, and we'll skip all 0 coeffs
|
||||
// (you might point out that log(1) also = 0, which would seem to alias. however, that's ok,
|
||||
// because log(1) = 255 as well, and in fact that's how it's represented in our log table)
|
||||
rs->error_locator_log.coeff[i] = rs->field.log[rs->error_locator.coeff[i]];
|
||||
}
|
||||
rs->error_locator_log.order = rs->error_locator.order;
|
||||
|
||||
/*
|
||||
for (unsigned int i = 0; i < erasure_length; i++) {
|
||||
rs->error_roots[i] = field_div(rs->field, 1, rs->error_roots[i]);
|
||||
}
|
||||
*/
|
||||
|
||||
if (!reed_solomon_factorize_error_locator(rs->field, erasure_length, rs->error_locator_log, rs->error_roots, rs->element_exp)) {
|
||||
// roots couldn't be found, so there were too many errors to deal with
|
||||
// RS has failed for this message
|
||||
free(syndrome_copy);
|
||||
return -1;
|
||||
}
|
||||
|
||||
polynomial_t temp_poly = polynomial_create(rs->error_locator.order + erasure_length);
|
||||
polynomial_mul(rs->field, rs->erasure_locator, rs->error_locator, temp_poly);
|
||||
polynomial_t placeholder_poly = rs->error_locator;
|
||||
rs->error_locator = temp_poly;
|
||||
|
||||
reed_solomon_find_error_locations(rs->field, rs->generator_root_gap, rs->error_roots, rs->error_locations,
|
||||
rs->error_locator.order, erasure_length);
|
||||
|
||||
memcpy(rs->syndromes, syndrome_copy, rs->min_distance * sizeof(field_element_t));
|
||||
|
||||
reed_solomon_find_error_values(rs);
|
||||
|
||||
for (unsigned int i = 0; i < rs->error_locator.order; i++) {
|
||||
rs->received_polynomial.coeff[rs->error_locations[i]] =
|
||||
field_sub(rs->field, rs->received_polynomial.coeff[rs->error_locations[i]], rs->error_vals[i]);
|
||||
}
|
||||
|
||||
rs->error_locator = placeholder_poly;
|
||||
|
||||
for (unsigned int i = 0; i < msg_length; i++) {
|
||||
msg[i] = rs->received_polynomial.coeff[encoded_length - (i + 1)];
|
||||
}
|
||||
|
||||
polynomial_destroy(temp_poly);
|
||||
free(syndrome_copy);
|
||||
|
||||
return msg_length;
|
||||
}
|
34
core/libcorrect/src/reed-solomon/encode.c
Normal file
34
core/libcorrect/src/reed-solomon/encode.c
Normal file
@ -0,0 +1,34 @@
|
||||
#include "correct/reed-solomon/encode.h"
|
||||
|
||||
ssize_t correct_reed_solomon_encode(correct_reed_solomon *rs, const uint8_t *msg, size_t msg_length, uint8_t *encoded) {
|
||||
if (msg_length > rs->message_length) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t pad_length = rs->message_length - msg_length;
|
||||
for (unsigned int i = 0; i < msg_length; i++) {
|
||||
// message goes from high order to low order but libcorrect polynomials go low to high
|
||||
// so we reverse on the way in and on the way out
|
||||
// we'd have to do a copy anyway so this reversal should be free
|
||||
rs->encoded_polynomial.coeff[rs->encoded_polynomial.order - (i + pad_length)] = msg[i];
|
||||
}
|
||||
|
||||
// 0-fill the rest of the coefficients -- this length will always be > 0
|
||||
// because the order of this poly is block_length and the msg_length <= message_length
|
||||
// e.g. 255 and 223
|
||||
memset(rs->encoded_polynomial.coeff + (rs->encoded_polynomial.order + 1 - pad_length), 0, pad_length);
|
||||
memset(rs->encoded_polynomial.coeff, 0, (rs->encoded_polynomial.order + 1 - rs->message_length));
|
||||
|
||||
polynomial_mod(rs->field, rs->encoded_polynomial, rs->generator, rs->encoded_remainder);
|
||||
|
||||
// now return byte order to highest order to lowest order
|
||||
for (unsigned int i = 0; i < msg_length; i++) {
|
||||
encoded[i] = rs->encoded_polynomial.coeff[rs->encoded_polynomial.order - (i + pad_length)];
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < rs->min_distance; i++) {
|
||||
encoded[msg_length + i] = rs->encoded_remainder.coeff[rs->min_distance - (i + 1)];
|
||||
}
|
||||
|
||||
return rs->block_length;
|
||||
}
|
255
core/libcorrect/src/reed-solomon/polynomial.c
Normal file
255
core/libcorrect/src/reed-solomon/polynomial.c
Normal file
@ -0,0 +1,255 @@
|
||||
#include "correct/reed-solomon/polynomial.h"
|
||||
|
||||
polynomial_t polynomial_create(unsigned int order) {
|
||||
polynomial_t polynomial;
|
||||
polynomial.coeff = malloc(sizeof(field_element_t) * (order + 1));
|
||||
polynomial.order = order;
|
||||
return polynomial;
|
||||
}
|
||||
|
||||
void polynomial_destroy(polynomial_t polynomial) {
|
||||
free(polynomial.coeff);
|
||||
}
|
||||
|
||||
// if you want a full multiplication, then make res.order = l.order + r.order
|
||||
// but if you just care about a lower order, e.g. mul mod x^i, then you can select
|
||||
// fewer coefficients
|
||||
void polynomial_mul(field_t field, polynomial_t l, polynomial_t r, polynomial_t res) {
|
||||
// perform an element-wise multiplication of two polynomials
|
||||
memset(res.coeff, 0, sizeof(field_element_t) * (res.order + 1));
|
||||
for (unsigned int i = 0; i <= l.order; i++) {
|
||||
if (i > res.order) {
|
||||
continue;
|
||||
}
|
||||
unsigned int j_limit = (r.order > res.order - i) ? res.order - i : r.order;
|
||||
for (unsigned int j = 0; j <= j_limit; j++) {
|
||||
// e.g. alpha^5*x * alpha^37*x^2 --> alpha^42*x^3
|
||||
res.coeff[i + j] = field_add(field, res.coeff[i + j], field_mul(field, l.coeff[i], r.coeff[j]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void polynomial_mod(field_t field, polynomial_t dividend, polynomial_t divisor, polynomial_t mod) {
|
||||
// find the polynomial remainder of dividend mod divisor
|
||||
// do long division and return just the remainder (written to mod)
|
||||
|
||||
if (mod.order < dividend.order) {
|
||||
// mod.order must be >= dividend.order (scratch space needed)
|
||||
// this is an error -- catch it in debug?
|
||||
return;
|
||||
}
|
||||
// initialize remainder as dividend
|
||||
memcpy(mod.coeff, dividend.coeff, sizeof(field_element_t) * (dividend.order + 1));
|
||||
|
||||
// XXX make sure divisor[divisor_order] is nonzero
|
||||
field_logarithm_t divisor_leading = field.log[divisor.coeff[divisor.order]];
|
||||
// long division steps along one order at a time, starting at the highest order
|
||||
for (unsigned int i = dividend.order; i > 0; i--) {
|
||||
// look at the leading coefficient of dividend and divisor
|
||||
// if leading coefficient of dividend / leading coefficient of divisor is q
|
||||
// then the next row of subtraction will be q * divisor
|
||||
// if order of q < 0 then what we have is the remainder and we are done
|
||||
if (i < divisor.order) {
|
||||
break;
|
||||
}
|
||||
if (mod.coeff[i] == 0) {
|
||||
continue;
|
||||
}
|
||||
unsigned int q_order = i - divisor.order;
|
||||
field_logarithm_t q_coeff = field_div_log(field, field.log[mod.coeff[i]], divisor_leading);
|
||||
|
||||
// now that we've chosen q, multiply the divisor by q and subtract from
|
||||
// our remainder. subtracting in GF(2^8) is XOR, just like addition
|
||||
for (unsigned int j = 0; j <= divisor.order; j++) {
|
||||
if (divisor.coeff[j] == 0) {
|
||||
continue;
|
||||
}
|
||||
// all of the multiplication is shifted up by q_order places
|
||||
mod.coeff[j + q_order] = field_add(field, mod.coeff[j + q_order],
|
||||
field_mul_log_element(field, field.log[divisor.coeff[j]], q_coeff));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void polynomial_formal_derivative(field_t field, polynomial_t poly, polynomial_t der) {
|
||||
// if f(x) = a(n)*x^n + ... + a(1)*x + a(0)
|
||||
// then f'(x) = n*a(n)*x^(n-1) + ... + 2*a(2)*x + a(1)
|
||||
// where n*a(n) = sum(k=1, n, a(n)) e.g. the nth sum of a(n) in GF(2^8)
|
||||
|
||||
// assumes der.order = poly.order - 1
|
||||
memset(der.coeff, 0, sizeof(field_element_t) * (der.order + 1));
|
||||
for (unsigned int i = 0; i <= der.order; i++) {
|
||||
// we're filling in the ith power of der, so we look ahead one power in poly
|
||||
// f(x) = a(i + 1)*x^(i + 1) -> f'(x) = (i + 1)*a(i + 1)*x^i
|
||||
// where (i + 1)*a(i + 1) is the sum of a(i + 1) (i + 1) times, not the product
|
||||
der.coeff[i] = field_sum(field, poly.coeff[i + 1], i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
field_element_t polynomial_eval(field_t field, polynomial_t poly, field_element_t val) {
|
||||
// evaluate the polynomial poly at a particular element val
|
||||
if (val == 0) {
|
||||
return poly.coeff[0];
|
||||
}
|
||||
|
||||
field_element_t res = 0;
|
||||
|
||||
// we're going to start at 0th order and multiply by val each time
|
||||
field_logarithm_t val_exponentiated = field.log[1];
|
||||
field_logarithm_t val_log = field.log[val];
|
||||
|
||||
for (unsigned int i = 0; i <= poly.order; i++) {
|
||||
if (poly.coeff[i] != 0) {
|
||||
// multiply-accumulate by the next coeff times the next power of val
|
||||
res = field_add(field, res,
|
||||
field_mul_log_element(field, field.log[poly.coeff[i]], val_exponentiated));
|
||||
}
|
||||
// now advance to the next power
|
||||
val_exponentiated = field_mul_log(field, val_exponentiated, val_log);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
field_element_t polynomial_eval_lut(field_t field, polynomial_t poly, const field_logarithm_t *val_exp) {
|
||||
// evaluate the polynomial poly at a particular element val
|
||||
// in this case, all of the logarithms of the successive powers of val have been precalculated
|
||||
// this removes the extra work we'd have to do to calculate val_exponentiated each time
|
||||
// if this function is to be called on the same val multiple times
|
||||
if (val_exp[0] == 0) {
|
||||
return poly.coeff[0];
|
||||
}
|
||||
|
||||
field_element_t res = 0;
|
||||
|
||||
for (unsigned int i = 0; i <= poly.order; i++) {
|
||||
if (poly.coeff[i] != 0) {
|
||||
// multiply-accumulate by the next coeff times the next power of val
|
||||
res = field_add(field, res,
|
||||
field_mul_log_element(field, field.log[poly.coeff[i]], val_exp[i]));
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
field_element_t polynomial_eval_log_lut(field_t field, polynomial_t poly_log, const field_logarithm_t *val_exp) {
|
||||
// evaluate the log_polynomial poly at a particular element val
|
||||
// like polynomial_eval_lut, the logarithms of the successive powers of val have been
|
||||
// precomputed
|
||||
if (val_exp[0] == 0) {
|
||||
if (poly_log.coeff[0] == 0) {
|
||||
// special case for the non-existant log case
|
||||
return 0;
|
||||
}
|
||||
return field.exp[poly_log.coeff[0]];
|
||||
}
|
||||
|
||||
field_element_t res = 0;
|
||||
|
||||
for (unsigned int i = 0; i <= poly_log.order; i++) {
|
||||
// using 0 as a sentinel value in log -- log(0) is really -inf
|
||||
if (poly_log.coeff[i] != 0) {
|
||||
// multiply-accumulate by the next coeff times the next power of val
|
||||
res = field_add(field, res,
|
||||
field_mul_log_element(field, poly_log.coeff[i], val_exp[i]));
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
void polynomial_build_exp_lut(field_t field, field_element_t val, unsigned int order, field_logarithm_t *val_exp) {
|
||||
// create the lookup table of successive powers of val used by polynomial_eval_lut
|
||||
field_logarithm_t val_exponentiated = field.log[1];
|
||||
field_logarithm_t val_log = field.log[val];
|
||||
for (unsigned int i = 0; i <= order; i++) {
|
||||
if (val == 0) {
|
||||
val_exp[i] = 0;
|
||||
} else {
|
||||
val_exp[i] = val_exponentiated;
|
||||
val_exponentiated = field_mul_log(field, val_exponentiated, val_log);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
polynomial_t polynomial_init_from_roots(field_t field, unsigned int nroots, field_element_t *roots, polynomial_t poly, polynomial_t *scratch) {
|
||||
unsigned int order = nroots;
|
||||
polynomial_t l;
|
||||
field_element_t l_coeff[2];
|
||||
l.order = 1;
|
||||
l.coeff = l_coeff;
|
||||
|
||||
// we'll keep two temporary stores of rightside polynomial
|
||||
// each time through the loop, we take the previous result and use it as new rightside
|
||||
// swap back and forth (prevents the need for a copy)
|
||||
polynomial_t r[2];
|
||||
r[0] = scratch[0];
|
||||
r[1] = scratch[1];
|
||||
unsigned int rcoeffres = 0;
|
||||
|
||||
// initialize the result with x + roots[0]
|
||||
r[rcoeffres].coeff[1] = 1;
|
||||
r[rcoeffres].coeff[0] = roots[0];
|
||||
r[rcoeffres].order = 1;
|
||||
|
||||
// initialize lcoeff[1] with x
|
||||
// we'll fill in the 0th order term in each loop iter
|
||||
l.coeff[1] = 1;
|
||||
|
||||
// loop through, using previous run's result as the new right hand side
|
||||
// this allows us to multiply one group at a time
|
||||
for (unsigned int i = 1; i < nroots; i++) {
|
||||
l.coeff[0] = roots[i];
|
||||
unsigned int nextrcoeff = rcoeffres;
|
||||
rcoeffres = (rcoeffres + 1) % 2;
|
||||
r[rcoeffres].order = i + 1;
|
||||
polynomial_mul(field, l, r[nextrcoeff], r[rcoeffres]);
|
||||
}
|
||||
|
||||
memcpy(poly.coeff, r[rcoeffres].coeff, (order + 1) * sizeof(field_element_t));
|
||||
poly.order = order;
|
||||
|
||||
return poly;
|
||||
}
|
||||
|
||||
polynomial_t polynomial_create_from_roots(field_t field, unsigned int nroots, field_element_t *roots) {
|
||||
polynomial_t poly = polynomial_create(nroots);
|
||||
unsigned int order = nroots;
|
||||
polynomial_t l;
|
||||
l.order = 1;
|
||||
l.coeff = calloc(2, sizeof(field_element_t));
|
||||
|
||||
polynomial_t r[2];
|
||||
// we'll keep two temporary stores of rightside polynomial
|
||||
// each time through the loop, we take the previous result and use it as new rightside
|
||||
// swap back and forth (prevents the need for a copy)
|
||||
r[0].coeff = calloc(order + 1, sizeof(field_element_t));
|
||||
r[1].coeff = calloc(order + 1, sizeof(field_element_t));
|
||||
unsigned int rcoeffres = 0;
|
||||
|
||||
// initialize the result with x + roots[0]
|
||||
r[rcoeffres].coeff[0] = roots[0];
|
||||
r[rcoeffres].coeff[1] = 1;
|
||||
r[rcoeffres].order = 1;
|
||||
|
||||
// initialize lcoeff[1] with x
|
||||
// we'll fill in the 0th order term in each loop iter
|
||||
l.coeff[1] = 1;
|
||||
|
||||
// loop through, using previous run's result as the new right hand side
|
||||
// this allows us to multiply one group at a time
|
||||
for (unsigned int i = 1; i < nroots; i++) {
|
||||
l.coeff[0] = roots[i];
|
||||
unsigned int nextrcoeff = rcoeffres;
|
||||
rcoeffres = (rcoeffres + 1) % 2;
|
||||
r[rcoeffres].order = i + 1;
|
||||
polynomial_mul(field, l, r[nextrcoeff], r[rcoeffres]);
|
||||
}
|
||||
|
||||
memcpy(poly.coeff, r[rcoeffres].coeff, (order + 1) * sizeof(field_element_t));
|
||||
poly.order = order;
|
||||
|
||||
free(l.coeff);
|
||||
free(r[0].coeff);
|
||||
free(r[1].coeff);
|
||||
|
||||
return poly;
|
||||
}
|
187
core/libcorrect/src/reed-solomon/reed-solomon.c
Normal file
187
core/libcorrect/src/reed-solomon/reed-solomon.c
Normal file
@ -0,0 +1,187 @@
|
||||
#include "correct/reed-solomon/reed-solomon.h"
|
||||
|
||||
// coeff must be of size nroots + 1
|
||||
// e.g. 2 roots (x + alpha)(x + alpha^2) yields a poly with 3 terms x^2 + g0*x + g1
|
||||
static polynomial_t reed_solomon_build_generator(field_t field, unsigned int nroots, field_element_t first_consecutive_root, unsigned int root_gap, polynomial_t generator, field_element_t *roots) {
|
||||
// generator has order 2*t
|
||||
// of form (x + alpha^1)(x + alpha^2)...(x - alpha^2*t)
|
||||
for (unsigned int i = 0; i < nroots; i++) {
|
||||
roots[i] = field.exp[(root_gap * (i + first_consecutive_root)) % 255];
|
||||
}
|
||||
return polynomial_create_from_roots(field, nroots, roots);
|
||||
}
|
||||
|
||||
correct_reed_solomon *correct_reed_solomon_create(field_operation_t primitive_polynomial, field_logarithm_t first_consecutive_root, field_logarithm_t generator_root_gap, size_t num_roots) {
|
||||
correct_reed_solomon *rs = calloc(1, sizeof(correct_reed_solomon));
|
||||
rs->field = field_create(primitive_polynomial);
|
||||
|
||||
rs->block_length = 255;
|
||||
rs->min_distance = num_roots;
|
||||
rs->message_length = rs->block_length - rs->min_distance;
|
||||
|
||||
rs->first_consecutive_root = first_consecutive_root;
|
||||
rs->generator_root_gap = generator_root_gap;
|
||||
|
||||
rs->generator_roots = malloc(rs->min_distance * sizeof(field_element_t));
|
||||
|
||||
rs->generator = reed_solomon_build_generator(rs->field, rs->min_distance, rs->first_consecutive_root, rs->generator_root_gap, rs->generator, rs->generator_roots);
|
||||
|
||||
rs->encoded_polynomial = polynomial_create(rs->block_length - 1);
|
||||
rs->encoded_remainder = polynomial_create(rs->block_length - 1);
|
||||
|
||||
rs->has_init_decode = false;
|
||||
|
||||
return rs;
|
||||
}
|
||||
|
||||
void correct_reed_solomon_destroy(correct_reed_solomon *rs) {
|
||||
field_destroy(rs->field);
|
||||
polynomial_destroy(rs->generator);
|
||||
free(rs->generator_roots);
|
||||
polynomial_destroy(rs->encoded_polynomial);
|
||||
polynomial_destroy(rs->encoded_remainder);
|
||||
if (rs->has_init_decode) {
|
||||
free(rs->syndromes);
|
||||
free(rs->modified_syndromes);
|
||||
polynomial_destroy(rs->received_polynomial);
|
||||
polynomial_destroy(rs->error_locator);
|
||||
polynomial_destroy(rs->error_locator_log);
|
||||
polynomial_destroy(rs->erasure_locator);
|
||||
free(rs->error_roots);
|
||||
free(rs->error_vals);
|
||||
free(rs->error_locations);
|
||||
polynomial_destroy(rs->last_error_locator);
|
||||
polynomial_destroy(rs->error_evaluator);
|
||||
polynomial_destroy(rs->error_locator_derivative);
|
||||
for (unsigned int i = 0; i < rs->min_distance; i++) {
|
||||
free(rs->generator_root_exp[i]);
|
||||
}
|
||||
free(rs->generator_root_exp);
|
||||
for (field_operation_t i = 0; i < 256; i++) {
|
||||
free(rs->element_exp[i]);
|
||||
}
|
||||
free(rs->element_exp);
|
||||
polynomial_destroy(rs->init_from_roots_scratch[0]);
|
||||
polynomial_destroy(rs->init_from_roots_scratch[1]);
|
||||
}
|
||||
free(rs);
|
||||
}
|
||||
|
||||
void correct_reed_solomon_debug_print(correct_reed_solomon *rs) {
|
||||
for (unsigned int i = 0; i < 256; i++) {
|
||||
printf("%3d %3d %3d %3d\n", i, rs->field.exp[i], i, rs->field.log[i]);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
printf("roots: ");
|
||||
for (unsigned int i = 0; i < rs->min_distance; i++) {
|
||||
printf("%d", rs->generator_roots[i]);
|
||||
if (i < rs->min_distance - 1) {
|
||||
printf(", ");
|
||||
}
|
||||
}
|
||||
printf("\n\n");
|
||||
|
||||
printf("generator: ");
|
||||
for (unsigned int i = 0; i < rs->generator.order + 1; i++) {
|
||||
printf("%d*x^%d", rs->generator.coeff[i], i);
|
||||
if (i < rs->generator.order) {
|
||||
printf(" + ");
|
||||
}
|
||||
}
|
||||
printf("\n\n");
|
||||
|
||||
printf("generator (alpha format): ");
|
||||
for (unsigned int i = rs->generator.order + 1; i > 0; i--) {
|
||||
printf("alpha^%d*x^%d", rs->field.log[rs->generator.coeff[i - 1]], i - 1);
|
||||
if (i > 1) {
|
||||
printf(" + ");
|
||||
}
|
||||
}
|
||||
printf("\n\n");
|
||||
|
||||
printf("remainder: ");
|
||||
bool has_printed = false;
|
||||
for (unsigned int i = 0; i < rs->encoded_remainder.order + 1; i++) {
|
||||
if (!rs->encoded_remainder.coeff[i]) {
|
||||
continue;
|
||||
}
|
||||
if (has_printed) {
|
||||
printf(" + ");
|
||||
}
|
||||
has_printed = true;
|
||||
printf("%d*x^%d", rs->encoded_remainder.coeff[i], i);
|
||||
}
|
||||
printf("\n\n");
|
||||
|
||||
printf("syndromes: ");
|
||||
for (unsigned int i = 0; i < rs->min_distance; i++) {
|
||||
printf("%d", rs->syndromes[i]);
|
||||
if (i < rs->min_distance - 1) {
|
||||
printf(", ");
|
||||
}
|
||||
}
|
||||
printf("\n\n");
|
||||
|
||||
printf("numerrors: %d\n\n", rs->error_locator.order);
|
||||
|
||||
printf("error locator: ");
|
||||
has_printed = false;
|
||||
for (unsigned int i = 0; i < rs->error_locator.order + 1; i++) {
|
||||
if (!rs->error_locator.coeff[i]) {
|
||||
continue;
|
||||
}
|
||||
if (has_printed) {
|
||||
printf(" + ");
|
||||
}
|
||||
has_printed = true;
|
||||
printf("%d*x^%d", rs->error_locator.coeff[i], i);
|
||||
}
|
||||
printf("\n\n");
|
||||
|
||||
printf("error roots: ");
|
||||
for (unsigned int i = 0; i < rs->error_locator.order; i++) {
|
||||
printf("%d@%d", polynomial_eval(rs->field, rs->error_locator, rs->error_roots[i]), rs->error_roots[i]);
|
||||
if (i < rs->error_locator.order - 1) {
|
||||
printf(", ");
|
||||
}
|
||||
}
|
||||
printf("\n\n");
|
||||
|
||||
printf("error evaluator: ");
|
||||
has_printed = false;
|
||||
for (unsigned int i = 0; i < rs->error_evaluator.order; i++) {
|
||||
if (!rs->error_evaluator.coeff[i]) {
|
||||
continue;
|
||||
}
|
||||
if (has_printed) {
|
||||
printf(" + ");
|
||||
}
|
||||
has_printed = true;
|
||||
printf("%d*x^%d", rs->error_evaluator.coeff[i], i);
|
||||
}
|
||||
printf("\n\n");
|
||||
|
||||
printf("error locator derivative: ");
|
||||
has_printed = false;
|
||||
for (unsigned int i = 0; i < rs->error_locator_derivative.order; i++) {
|
||||
if (!rs->error_locator_derivative.coeff[i]) {
|
||||
continue;
|
||||
}
|
||||
if (has_printed) {
|
||||
printf(" + ");
|
||||
}
|
||||
has_printed = true;
|
||||
printf("%d*x^%d", rs->error_locator_derivative.coeff[i], i);
|
||||
}
|
||||
printf("\n\n");
|
||||
|
||||
printf("error locator: ");
|
||||
for (unsigned int i = 0; i < rs->error_locator.order; i++) {
|
||||
printf("%d@%d", rs->error_vals[i], rs->error_locations[i]);
|
||||
if (i < rs->error_locator.order - 1) {
|
||||
printf(", ");
|
||||
}
|
||||
}
|
||||
printf("\n\n");
|
||||
}
|
Reference in New Issue
Block a user