diff --git a/CMakeLists.txt b/CMakeLists.txt index e3d871c3..fbeeefb4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,7 @@ option(OPT_BUILD_SPYSERVER_SOURCE "Build SpyServer Source Module (no dependencie option(OPT_BUILD_SOAPY_SOURCE "Build SoapySDR Source Module (Depedencies: soapysdr)" ON) option(OPT_BUILD_AIRSPYHF_SOURCE "Build Airspy HF+ Source Module (Depedencies: libairspyhf)" ON) option(OPT_BUILD_AIRSPY_SOURCE "Build Airspy Source Module (Depedencies: libairspy)" ON) +option(OPT_BUILD_BLADERF_SOURCE "Build BladeRF Source Module (Depedencies: libbladeRF)" OFF) option(OPT_BUILD_SDRPLAY_SOURCE "Build SDRplay Source Module (Depedencies: libsdrplay)" OFF) option(OPT_BUILD_PLUTOSDR_SOURCE "Build PlutoSDR Source Module (Depedencies: libiio, libad9361)" ON) option(OPT_BUILD_HACKRF_SOURCE "Build HackRF Source Module (Depedencies: libhackrf)" OFF) @@ -20,6 +21,7 @@ add_subdirectory("core") add_subdirectory("radio") add_subdirectory("recorder") add_subdirectory("file_source") +add_subdirectory("falcon9_decoder") # Source modules if (OPT_BUILD_RTL_TCP_SOURCE) @@ -42,6 +44,10 @@ if (OPT_BUILD_AIRSPY_SOURCE) add_subdirectory("airspy_source") endif (OPT_BUILD_AIRSPY_SOURCE) +if (OPT_BUILD_BLADERF_SOURCE) +add_subdirectory("bladerf_source") +endif(OPT_BUILD_BLADERF_SOURCE) + if (OPT_BUILD_SDRPLAY_SOURCE) add_subdirectory("sdrplay_source") endif (OPT_BUILD_SDRPLAY_SOURCE) @@ -95,7 +101,7 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") add_custom_target(do_always ALL cp \"$/libsdrpp_core.dylib\" \"$\") endif () -# cmake .. "-DCMAKE_TOOLCHAIN_FILE=C:/Users/Alex/vcpkg/scripts/buildsystems/vcpkg.cmake" -G "Visual Studio 15 2017 Win64" +# cd # Install directives install(TARGETS sdrpp DESTINATION bin) diff --git a/bladerf_source/CMakeLists.txt b/bladerf_source/CMakeLists.txt new file mode 100644 index 00000000..c8b6765a --- /dev/null +++ b/bladerf_source/CMakeLists.txt @@ -0,0 +1,29 @@ +cmake_minimum_required(VERSION 3.13) +project(bladerf_source) + +if (MSVC) + set(CMAKE_CXX_FLAGS "-O2 /std:c++17 /EHsc") +else() + set(CMAKE_CXX_FLAGS "-O3 -std=c++17 -fpermissive") +endif (MSVC) + +include_directories("src/") + +file(GLOB SRC "src/*.cpp") + +add_library(bladerf_source SHARED ${SRC}) +target_link_libraries(bladerf_source PRIVATE sdrpp_core) +set_target_properties(bladerf_source PROPERTIES PREFIX "") + +if (MSVC) + # Lib path + target_link_directories(sdrpp_core PUBLIC "C:/Program Files/PothosSDR/bin/") + + target_link_libraries(bladerf_source PUBLIC bladeRF) +else (MSVC) + # Not in pkg-config + target_link_libraries(bladerf_source PUBLIC libbladeRF) +endif (MSVC) + +# Install directives +install(TARGETS bladerf_source DESTINATION lib/sdrpp/plugins) \ No newline at end of file diff --git a/bladerf_source/src/main.cpp b/bladerf_source/src/main.cpp new file mode 100644 index 00000000..49cd3c62 --- /dev/null +++ b/bladerf_source/src/main.cpp @@ -0,0 +1,339 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CONCAT(a, b) ((std::string(a) + b).c_str()) + +#define NUM_BUFFERS 128 +#define NUM_TRANSFERS 32 + +SDRPP_MOD_INFO { + /* Name: */ "bladerf_source", + /* Description: */ "BladeRF source module for SDR++", + /* Author: */ "Ryzerth", + /* Version: */ 0, 1, 0, + /* Max instances */ 1 +}; + +const uint64_t sampleRates[] = { + 520834, + 1000000, + 2000000, + 4000000, + 5000000, + 8000000, + 10000000, + 15000000, + 20000000, + 25000000, + 30000000, + 35000000, + 40000000, + 45000000, + 50000000, + 55000000, + 61440000 +}; + +const char* sampleRatesTxt = + "520.834KHz\0" + "1MHz\0" + "2MHz\0" + "4MHz\0" + "5MHz\0" + "8MHz\0" + "10MHz\0" + "15MHz\0" + "20MHz\0" + "25MHz\0" + "30MHz\0" + "35MHz\0" + "40MHz\0" + "45MHz\0" + "50MHz\0" + "55MHz\0" + "61.44MHz\0"; + +ConfigManager config; + +class BladeRFSourceModule : public ModuleManager::Instance { +public: + BladeRFSourceModule(std::string name) { + this->name = name; + + sampleRate = 768000.0; + + handler.ctx = this; + handler.selectHandler = menuSelected; + handler.deselectHandler = menuDeselected; + handler.menuHandler = menuHandler; + handler.startHandler = start; + handler.stopHandler = stop; + handler.tuneHandler = tune; + handler.stream = &stream; + + refresh(); + selectFirst(); + + // Select device here + core::setInputSampleRate(sampleRate); + + sigpath::sourceManager.registerSource("BladeRF", &handler); + } + + ~BladeRFSourceModule() { + + } + + void enable() { + enabled = true; + } + + void disable() { + enabled = false; + } + + bool isEnabled() { + return enabled; + } + + void refresh() { + devListTxt = ""; + + if (devInfoList != NULL) { + bladerf_free_device_list(devInfoList); + } + + devCount = bladerf_get_device_list(&devInfoList); + if (devCount < 0) { + spdlog::error("Could not list devices"); + return; + } + for (int i = 0; i < devCount; i++) { + devListTxt += devInfoList[i].serial; + devListTxt += '\0'; + } + } + + void selectFirst() { + if (devCount > 0) { selectByInfo(&devInfoList[0]); } + } + + void selectByInfo(bladerf_devinfo* info) { + int ret = bladerf_open_with_devinfo(&openDev, info); + if (ret != 0) { + spdlog::error("Could not open device {0}", info->serial); + return; + } + + channelCount = bladerf_get_channel_count(openDev, BLADERF_RX); + + // TODO: Gen sample rate list automatically by detecting which version is selected + + bladerf_close(openDev); + } + +private: + std::string getBandwdithScaled(double bw) { + char buf[1024]; + if (bw >= 1000000.0) { + sprintf(buf, "%.1lfMHz", bw / 1000000.0); + } + else if (bw >= 1000.0) { + sprintf(buf, "%.1lfKHz", bw / 1000.0); + } + else { + sprintf(buf, "%.1lfHz", bw); + } + return std::string(buf); + } + + static void menuSelected(void* ctx) { + BladeRFSourceModule* _this = (BladeRFSourceModule*)ctx; + core::setInputSampleRate(_this->sampleRate); + spdlog::info("BladeRFSourceModule '{0}': Menu Select!", _this->name); + } + + static void menuDeselected(void* ctx) { + BladeRFSourceModule* _this = (BladeRFSourceModule*)ctx; + spdlog::info("BladeRFSourceModule '{0}': Menu Deselect!", _this->name); + } + + static void start(void* ctx) { + BladeRFSourceModule* _this = (BladeRFSourceModule*)ctx; + if (_this->running) { + return; + } + if (_this->devCount == 0) { return; } + + // Open device + bladerf_devinfo info = _this->devInfoList[_this->devId]; + int ret = bladerf_open_with_devinfo(&_this->openDev, &info); + if (ret != 0) { + spdlog::error("Could not open device {0}", info.serial); + + + return; + } + + bladerf_sample_rate wantedSr = _this->sampleRate; + bladerf_sample_rate actualSr; + bladerf_set_sample_rate(_this->openDev, BLADERF_CHANNEL_RX(0), wantedSr, &actualSr); + bladerf_set_frequency(_this->openDev, BLADERF_CHANNEL_RX(0), _this->freq); + + if (actualSr != wantedSr) { + spdlog::warn("Sample rate rejected: {0} vs {1}", actualSr, wantedSr); + return; + } + + // Start stream + ret = bladerf_init_stream(&_this->rxStream, _this->openDev, callback, &_this->streamBuffers, NUM_BUFFERS, BLADERF_FORMAT_SC16_Q11, 8192, NUM_TRANSFERS, _this); + if (ret != 0) { + spdlog::error("Could not start stream {0}", ret); + return; + } + + bladerf_enable_module(_this->openDev, BLADERF_CHANNEL_RX(0), true); + + _this->running = true; + _this->workerThread = std::thread(&BladeRFSourceModule::worker, _this); + + spdlog::info("BladeRFSourceModule '{0}': Start!", _this->name); + } + + static void stop(void* ctx) { + BladeRFSourceModule* _this = (BladeRFSourceModule*)ctx; + if (!_this->running) { + return; + } + _this->running = false; + _this->stream.stopWriter(); + + bladerf_enable_module(_this->openDev, BLADERF_CHANNEL_RX(0), false); + if (_this->workerThread.joinable()) { + _this->workerThread.join(); + } + + bladerf_close(_this->openDev); + + _this->stream.clearWriteStop(); + spdlog::info("BladeRFSourceModule '{0}': Stop!", _this->name); + } + + static void tune(double freq, void* ctx) { + BladeRFSourceModule* _this = (BladeRFSourceModule*)ctx; + _this->freq = freq; + if (_this->running) { + bladerf_set_frequency(_this->openDev, BLADERF_CHANNEL_RX(0), _this->freq); + } + spdlog::info("BladeRFSourceModule '{0}': Tune: {1}!", _this->name, freq); + } + + static void menuHandler(void* ctx) { + BladeRFSourceModule* _this = (BladeRFSourceModule*)ctx; + float menuWidth = ImGui::GetContentRegionAvailWidth(); + + if (_this->running) { style::beginDisabled(); } + + ImGui::SetNextItemWidth(menuWidth); + if (ImGui::Combo(CONCAT("##_airspyhf_dev_sel_", _this->name), &_this->devId, _this->devListTxt.c_str())) { + // Select device + core::setInputSampleRate(_this->sampleRate); + // Save config + } + + if (ImGui::Combo(CONCAT("##_airspyhf_sr_sel_", _this->name), &_this->srId, sampleRatesTxt)) { + _this->sampleRate = sampleRates[_this->srId]; + core::setInputSampleRate(_this->sampleRate); + // Save config + } + + ImGui::SameLine(); + float refreshBtnWdith = menuWidth - ImGui::GetCursorPosX(); + if (ImGui::Button(CONCAT("Refresh##_airspyhf_refr_", _this->name), ImVec2(refreshBtnWdith, 0))) { + _this->refresh(); + config.aquire(); + std::string devSerial = config.conf["device"]; + config.release(); + // Reselect device + core::setInputSampleRate(_this->sampleRate); + } + + if (_this->running) { style::endDisabled(); } + + // General config BS + + } + + void worker() { + bladerf_stream(rxStream, BLADERF_RX_X1); + } + + static void* callback(struct bladerf *dev, struct bladerf_stream *stream, struct bladerf_metadata *meta, void *samples, size_t num_samples, void *user_data) { + // TODO: Convert with volk + BladeRFSourceModule* _this = (BladeRFSourceModule*)user_data; + int16_t* samples16 = (int16_t*)samples; + _this->currentBuffer = ((_this->currentBuffer + 1) % NUM_BUFFERS); + for (size_t i = 0; i < num_samples; i++) { + _this->stream.writeBuf[i].i = (float)samples16[(2 * i)] / 32768.0f; + _this->stream.writeBuf[i].q = (float)samples16[(2 * i) + 1] / 32768.0f; + if (!_this->stream.swap(num_samples)) { return _this->streamBuffers[_this->currentBuffer];; } + } + + return _this->streamBuffers[_this->currentBuffer]; + } + + std::string name; + bladerf* openDev; + bool enabled = true; + dsp::stream stream; + //dsp::Packer packer(&steam, 2048); + double sampleRate; + SourceManager::SourceHandler handler; + bool running = false; + double freq; + int devId = 0; + int srId = 0; + + int channelCount = 0; + int currentBuffer = 0; + void** streamBuffers; + struct bladerf_stream* rxStream; + + std::thread workerThread; + + int devCount = 0; + bladerf_devinfo* devInfoList = NULL; + std::string devListTxt; +}; + +MOD_EXPORT void _INIT_() { + json def = json({}); + def["devices"] = json({}); + def["device"] = ""; + config.setPath(options::opts.root + "/bladerf_config.json"); + config.load(def); + config.enableAutoSave(); +} + +MOD_EXPORT ModuleManager::Instance* _CREATE_INSTANCE_(std::string name) { + return new BladeRFSourceModule(name); +} + +MOD_EXPORT void _DELETE_INSTANCE_(ModuleManager::Instance* instance) { + delete (BladeRFSourceModule*)instance; +} + +MOD_EXPORT void _END_() { + config.disableAutoSave(); + config.save(); +} \ No newline at end of file diff --git a/core/src/dsp/deframing.h b/core/src/dsp/deframing.h new file mode 100644 index 00000000..642d77a1 --- /dev/null +++ b/core/src/dsp/deframing.h @@ -0,0 +1,114 @@ +#pragma once +#include +#include + +#define DSP_SIGN(n) ((n) >= 0) +#define DSP_STEP(n) (((n) > 0.0f) ? 1.0f : -1.0f) + +namespace dsp { + class Deframer : public generic_block { + public: + Deframer() {} + + Deframer(stream* in, int frameLen, uint8_t* syncWord, int syncLen) { init(in, frameLen, syncWord, syncLen); } + + ~Deframer() { + generic_block::stop(); + } + + void init(stream* in, int frameLen, uint8_t* syncWord, int syncLen) { + _in = in; + _frameLen = frameLen; + _syncword = new uint8_t[syncLen]; + _syncLen = syncLen; + memcpy(_syncword, syncWord, syncLen); + + buffer = new uint8_t[STREAM_BUFFER_SIZE + syncLen]; + memset(buffer, 0, syncLen); + bufferStart = buffer + syncLen; + + generic_block::registerInput(_in); + generic_block::registerOutput(&out); + } + + int run() { + count = _in->read(); + if (count < 0) { return -1; } + + + // Copy data into work buffer + memcpy(bufferStart, _in->readBuf, count - 1); + + // Iterate through all symbols + for (int i = 0; i < count;) { + + // If already in the process of reading bits + if (bitsRead >= 0) { + if ((bitsRead % 8) == 0) { out.writeBuf[bitsRead / 8] = 0; } + out.writeBuf[bitsRead / 8] |= (buffer[i] << (7 - (bitsRead % 8))); + i++; + bitsRead++; + + if (bitsRead >= _frameLen) { + if (!out.swap((bitsRead / 8) + ((bitsRead % 8) > 0))) { return -1; } + bitsRead = -1; + nextBitIsStartOfFrame = true; + } + + continue; + } + + // Else, check for a header + else if (memcmp(buffer + i, _syncword, _syncLen) == 0) { + bitsRead = 0; + badFrameCount = 0; + continue; + } + else if (nextBitIsStartOfFrame) { + nextBitIsStartOfFrame = false; + + // try to save + if (badFrameCount < 5) { + badFrameCount++; + bitsRead = 0; + continue; + } + + } + + else { i++; } + + nextBitIsStartOfFrame = false; + + } + + // Keep last _syncLen4 symbols + memcpy(buffer, &_in->readBuf[count - _syncLen], _syncLen); + + //printf("Block processed\n"); + callcount++; + + _in->flush(); + return count; + } + + stream out; + + private: + uint8_t* buffer; + uint8_t* bufferStart; + uint8_t* _syncword; + int count; + int _frameLen; + int _syncLen; + int bitsRead = -1; + + int badFrameCount = 5; + bool nextBitIsStartOfFrame = false; + + int callcount = 0; + + stream* _in; + + }; +} \ No newline at end of file diff --git a/core/src/dsp/demodulator.h b/core/src/dsp/demodulator.h index fc51a47a..91dc3cde 100644 --- a/core/src/dsp/demodulator.h +++ b/core/src/dsp/demodulator.h @@ -3,7 +3,7 @@ #include #include #include - +#include #include #define FAST_ATAN2_COEF1 FL_M_PI / 4.0f diff --git a/core/src/dsp/interpolation_taps.h b/core/src/dsp/interpolation_taps.h new file mode 100644 index 00000000..5550846c --- /dev/null +++ b/core/src/dsp/interpolation_taps.h @@ -0,0 +1,136 @@ +#pragma once + +const int INTERP_TAP_COUNT = 8; +const int INTERP_STEPS = 128; + +const float INTERP_TAPS[INTERP_STEPS + 1][INTERP_TAP_COUNT] = { + { 0.00000e+00, 0.00000e+00, 0.00000e+00, 1.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00 }, + { -1.98993e-04, 1.24642e-03, -5.41054e-03, 9.98534e-01, 7.89295e-03, -2.76968e-03, 8.53777e-04, -1.54700e-04 }, + { -3.96391e-04, 2.47942e-03, -1.07209e-02, 9.96891e-01, 1.58840e-02, -5.55134e-03, 1.70888e-03, -3.09412e-04 }, + { -5.92100e-04, 3.69852e-03, -1.59305e-02, 9.95074e-01, 2.39714e-02, -8.34364e-03, 2.56486e-03, -4.64053e-04 }, + { -7.86031e-04, 4.90322e-03, -2.10389e-02, 9.93082e-01, 3.21531e-02, -1.11453e-02, 3.42130e-03, -6.18544e-04 }, + { -9.78093e-04, 6.09305e-03, -2.60456e-02, 9.90917e-01, 4.04274e-02, -1.39548e-02, 4.27773e-03, -7.72802e-04 }, + { -1.16820e-03, 7.26755e-03, -3.09503e-02, 9.88580e-01, 4.87921e-02, -1.67710e-02, 5.13372e-03, -9.26747e-04 }, + { -1.35627e-03, 8.42626e-03, -3.57525e-02, 9.86071e-01, 5.72454e-02, -1.95925e-02, 5.98883e-03, -1.08030e-03 }, + { -1.54221e-03, 9.56876e-03, -4.04519e-02, 9.83392e-01, 6.57852e-02, -2.24178e-02, 6.84261e-03, -1.23337e-03 }, + { -1.72594e-03, 1.06946e-02, -4.50483e-02, 9.80543e-01, 7.44095e-02, -2.52457e-02, 7.69462e-03, -1.38589e-03 }, + { -1.90738e-03, 1.18034e-02, -4.95412e-02, 9.77526e-01, 8.31162e-02, -2.80746e-02, 8.54441e-03, -1.53777e-03 }, + { -2.08645e-03, 1.28947e-02, -5.39305e-02, 9.74342e-01, 9.19033e-02, -3.09033e-02, 9.39154e-03, -1.68894e-03 }, + { -2.26307e-03, 1.39681e-02, -5.82159e-02, 9.70992e-01, 1.00769e-01, -3.37303e-02, 1.02356e-02, -1.83931e-03 }, + { -2.43718e-03, 1.50233e-02, -6.23972e-02, 9.67477e-01, 1.09710e-01, -3.65541e-02, 1.10760e-02, -1.98880e-03 }, + { -2.60868e-03, 1.60599e-02, -6.64743e-02, 9.63798e-01, 1.18725e-01, -3.93735e-02, 1.19125e-02, -2.13733e-03 }, + { -2.77751e-03, 1.70776e-02, -7.04471e-02, 9.59958e-01, 1.27812e-01, -4.21869e-02, 1.27445e-02, -2.28483e-03 }, + { -2.94361e-03, 1.80759e-02, -7.43154e-02, 9.55956e-01, 1.36968e-01, -4.49929e-02, 1.35716e-02, -2.43121e-03 }, + { -3.10689e-03, 1.90545e-02, -7.80792e-02, 9.51795e-01, 1.46192e-01, -4.77900e-02, 1.43934e-02, -2.57640e-03 }, + { -3.26730e-03, 2.00132e-02, -8.17385e-02, 9.47477e-01, 1.55480e-01, -5.05770e-02, 1.52095e-02, -2.72032e-03 }, + { -3.42477e-03, 2.09516e-02, -8.52933e-02, 9.43001e-01, 1.64831e-01, -5.33522e-02, 1.60193e-02, -2.86289e-03 }, + { -3.57923e-03, 2.18695e-02, -8.87435e-02, 9.38371e-01, 1.74242e-01, -5.61142e-02, 1.68225e-02, -3.00403e-03 }, + { -3.73062e-03, 2.27664e-02, -9.20893e-02, 9.33586e-01, 1.83711e-01, -5.88617e-02, 1.76185e-02, -3.14367e-03 }, + { -3.87888e-03, 2.36423e-02, -9.53307e-02, 9.28650e-01, 1.93236e-01, -6.15931e-02, 1.84071e-02, -3.28174e-03 }, + { -4.02397e-03, 2.44967e-02, -9.84679e-02, 9.23564e-01, 2.02814e-01, -6.43069e-02, 1.91877e-02, -3.41815e-03 }, + { -4.16581e-03, 2.53295e-02, -1.01501e-01, 9.18329e-01, 2.12443e-01, -6.70018e-02, 1.99599e-02, -3.55283e-03 }, + { -4.30435e-03, 2.61404e-02, -1.04430e-01, 9.12947e-01, 2.22120e-01, -6.96762e-02, 2.07233e-02, -3.68570e-03 }, + { -4.43955e-03, 2.69293e-02, -1.07256e-01, 9.07420e-01, 2.31843e-01, -7.23286e-02, 2.14774e-02, -3.81671e-03 }, + { -4.57135e-03, 2.76957e-02, -1.09978e-01, 9.01749e-01, 2.41609e-01, -7.49577e-02, 2.22218e-02, -3.94576e-03 }, + { -4.69970e-03, 2.84397e-02, -1.12597e-01, 8.95936e-01, 2.51417e-01, -7.75620e-02, 2.29562e-02, -4.07279e-03 }, + { -4.82456e-03, 2.91609e-02, -1.15113e-01, 8.89984e-01, 2.61263e-01, -8.01399e-02, 2.36801e-02, -4.19774e-03 }, + { -4.94589e-03, 2.98593e-02, -1.17526e-01, 8.83893e-01, 2.71144e-01, -8.26900e-02, 2.43930e-02, -4.32052e-03 }, + { -5.06363e-03, 3.05345e-02, -1.19837e-01, 8.77666e-01, 2.81060e-01, -8.52109e-02, 2.50946e-02, -4.44107e-03 }, + { -5.17776e-03, 3.11866e-02, -1.22047e-01, 8.71305e-01, 2.91006e-01, -8.77011e-02, 2.57844e-02, -4.55932e-03 }, + { -5.28823e-03, 3.18153e-02, -1.24154e-01, 8.64812e-01, 3.00980e-01, -9.01591e-02, 2.64621e-02, -4.67520e-03 }, + { -5.39500e-03, 3.24205e-02, -1.26161e-01, 8.58189e-01, 3.10980e-01, -9.25834e-02, 2.71272e-02, -4.78866e-03 }, + { -5.49804e-03, 3.30021e-02, -1.28068e-01, 8.51437e-01, 3.21004e-01, -9.49727e-02, 2.77794e-02, -4.89961e-03 }, + { -5.59731e-03, 3.35600e-02, -1.29874e-01, 8.44559e-01, 3.31048e-01, -9.73254e-02, 2.84182e-02, -5.00800e-03 }, + { -5.69280e-03, 3.40940e-02, -1.31581e-01, 8.37557e-01, 3.41109e-01, -9.96402e-02, 2.90433e-02, -5.11376e-03 }, + { -5.78446e-03, 3.46042e-02, -1.33189e-01, 8.30432e-01, 3.51186e-01, -1.01915e-01, 2.96543e-02, -5.21683e-03 }, + { -5.87227e-03, 3.50903e-02, -1.34699e-01, 8.23188e-01, 3.61276e-01, -1.04150e-01, 3.02507e-02, -5.31716e-03 }, + { -5.95620e-03, 3.55525e-02, -1.36111e-01, 8.15826e-01, 3.71376e-01, -1.06342e-01, 3.08323e-02, -5.41467e-03 }, + { -6.03624e-03, 3.59905e-02, -1.37426e-01, 8.08348e-01, 3.81484e-01, -1.08490e-01, 3.13987e-02, -5.50931e-03 }, + { -6.11236e-03, 3.64044e-02, -1.38644e-01, 8.00757e-01, 3.91596e-01, -1.10593e-01, 3.19495e-02, -5.60103e-03 }, + { -6.18454e-03, 3.67941e-02, -1.39767e-01, 7.93055e-01, 4.01710e-01, -1.12650e-01, 3.24843e-02, -5.68976e-03 }, + { -6.25277e-03, 3.71596e-02, -1.40794e-01, 7.85244e-01, 4.11823e-01, -1.14659e-01, 3.30027e-02, -5.77544e-03 }, + { -6.31703e-03, 3.75010e-02, -1.41727e-01, 7.77327e-01, 4.21934e-01, -1.16618e-01, 3.35046e-02, -5.85804e-03 }, + { -6.37730e-03, 3.78182e-02, -1.42566e-01, 7.69305e-01, 4.32038e-01, -1.18526e-01, 3.39894e-02, -5.93749e-03 }, + { -6.43358e-03, 3.81111e-02, -1.43313e-01, 7.61181e-01, 4.42134e-01, -1.20382e-01, 3.44568e-02, -6.01374e-03 }, + { -6.48585e-03, 3.83800e-02, -1.43968e-01, 7.52958e-01, 4.52218e-01, -1.22185e-01, 3.49066e-02, -6.08674e-03 }, + { -6.53412e-03, 3.86247e-02, -1.44531e-01, 7.44637e-01, 4.62289e-01, -1.23933e-01, 3.53384e-02, -6.15644e-03 }, + { -6.57836e-03, 3.88454e-02, -1.45004e-01, 7.36222e-01, 4.72342e-01, -1.25624e-01, 3.57519e-02, -6.22280e-03 }, + { -6.61859e-03, 3.90420e-02, -1.45387e-01, 7.27714e-01, 4.82377e-01, -1.27258e-01, 3.61468e-02, -6.28577e-03 }, + { -6.65479e-03, 3.92147e-02, -1.45682e-01, 7.19116e-01, 4.92389e-01, -1.28832e-01, 3.65227e-02, -6.34530e-03 }, + { -6.68698e-03, 3.93636e-02, -1.45889e-01, 7.10431e-01, 5.02377e-01, -1.30347e-01, 3.68795e-02, -6.40135e-03 }, + { -6.71514e-03, 3.94886e-02, -1.46009e-01, 7.01661e-01, 5.12337e-01, -1.31800e-01, 3.72167e-02, -6.45388e-03 }, + { -6.73929e-03, 3.95900e-02, -1.46043e-01, 6.92808e-01, 5.22267e-01, -1.33190e-01, 3.75341e-02, -6.50285e-03 }, + { -6.75943e-03, 3.96678e-02, -1.45993e-01, 6.83875e-01, 5.32164e-01, -1.34515e-01, 3.78315e-02, -6.54823e-03 }, + { -6.77557e-03, 3.97222e-02, -1.45859e-01, 6.74865e-01, 5.42025e-01, -1.35775e-01, 3.81085e-02, -6.58996e-03 }, + { -6.78771e-03, 3.97532e-02, -1.45641e-01, 6.65779e-01, 5.51849e-01, -1.36969e-01, 3.83650e-02, -6.62802e-03 }, + { -6.79588e-03, 3.97610e-02, -1.45343e-01, 6.56621e-01, 5.61631e-01, -1.38094e-01, 3.86006e-02, -6.66238e-03 }, + { -6.80007e-03, 3.97458e-02, -1.44963e-01, 6.47394e-01, 5.71370e-01, -1.39150e-01, 3.88151e-02, -6.69300e-03 }, + { -6.80032e-03, 3.97077e-02, -1.44503e-01, 6.38099e-01, 5.81063e-01, -1.40136e-01, 3.90083e-02, -6.71985e-03 }, + { -6.79662e-03, 3.96469e-02, -1.43965e-01, 6.28739e-01, 5.90706e-01, -1.41050e-01, 3.91800e-02, -6.74291e-03 }, + { -6.78902e-03, 3.95635e-02, -1.43350e-01, 6.19318e-01, 6.00298e-01, -1.41891e-01, 3.93299e-02, -6.76214e-03 }, + { -6.77751e-03, 3.94578e-02, -1.42658e-01, 6.09836e-01, 6.09836e-01, -1.42658e-01, 3.94578e-02, -6.77751e-03 }, + { -6.76214e-03, 3.93299e-02, -1.41891e-01, 6.00298e-01, 6.19318e-01, -1.43350e-01, 3.95635e-02, -6.78902e-03 }, + { -6.74291e-03, 3.91800e-02, -1.41050e-01, 5.90706e-01, 6.28739e-01, -1.43965e-01, 3.96469e-02, -6.79662e-03 }, + { -6.71985e-03, 3.90083e-02, -1.40136e-01, 5.81063e-01, 6.38099e-01, -1.44503e-01, 3.97077e-02, -6.80032e-03 }, + { -6.69300e-03, 3.88151e-02, -1.39150e-01, 5.71370e-01, 6.47394e-01, -1.44963e-01, 3.97458e-02, -6.80007e-03 }, + { -6.66238e-03, 3.86006e-02, -1.38094e-01, 5.61631e-01, 6.56621e-01, -1.45343e-01, 3.97610e-02, -6.79588e-03 }, + { -6.62802e-03, 3.83650e-02, -1.36969e-01, 5.51849e-01, 6.65779e-01, -1.45641e-01, 3.97532e-02, -6.78771e-03 }, + { -6.58996e-03, 3.81085e-02, -1.35775e-01, 5.42025e-01, 6.74865e-01, -1.45859e-01, 3.97222e-02, -6.77557e-03 }, + { -6.54823e-03, 3.78315e-02, -1.34515e-01, 5.32164e-01, 6.83875e-01, -1.45993e-01, 3.96678e-02, -6.75943e-03 }, + { -6.50285e-03, 3.75341e-02, -1.33190e-01, 5.22267e-01, 6.92808e-01, -1.46043e-01, 3.95900e-02, -6.73929e-03 }, + { -6.45388e-03, 3.72167e-02, -1.31800e-01, 5.12337e-01, 7.01661e-01, -1.46009e-01, 3.94886e-02, -6.71514e-03 }, + { -6.40135e-03, 3.68795e-02, -1.30347e-01, 5.02377e-01, 7.10431e-01, -1.45889e-01, 3.93636e-02, -6.68698e-03 }, + { -6.34530e-03, 3.65227e-02, -1.28832e-01, 4.92389e-01, 7.19116e-01, -1.45682e-01, 3.92147e-02, -6.65479e-03 }, + { -6.28577e-03, 3.61468e-02, -1.27258e-01, 4.82377e-01, 7.27714e-01, -1.45387e-01, 3.90420e-02, -6.61859e-03 }, + { -6.22280e-03, 3.57519e-02, -1.25624e-01, 4.72342e-01, 7.36222e-01, -1.45004e-01, 3.88454e-02, -6.57836e-03 }, + { -6.15644e-03, 3.53384e-02, -1.23933e-01, 4.62289e-01, 7.44637e-01, -1.44531e-01, 3.86247e-02, -6.53412e-03 }, + { -6.08674e-03, 3.49066e-02, -1.22185e-01, 4.52218e-01, 7.52958e-01, -1.43968e-01, 3.83800e-02, -6.48585e-03 }, + { -6.01374e-03, 3.44568e-02, -1.20382e-01, 4.42134e-01, 7.61181e-01, -1.43313e-01, 3.81111e-02, -6.43358e-03 }, + { -5.93749e-03, 3.39894e-02, -1.18526e-01, 4.32038e-01, 7.69305e-01, -1.42566e-01, 3.78182e-02, -6.37730e-03 }, + { -5.85804e-03, 3.35046e-02, -1.16618e-01, 4.21934e-01, 7.77327e-01, -1.41727e-01, 3.75010e-02, -6.31703e-03 }, + { -5.77544e-03, 3.30027e-02, -1.14659e-01, 4.11823e-01, 7.85244e-01, -1.40794e-01, 3.71596e-02, -6.25277e-03 }, + { -5.68976e-03, 3.24843e-02, -1.12650e-01, 4.01710e-01, 7.93055e-01, -1.39767e-01, 3.67941e-02, -6.18454e-03 }, + { -5.60103e-03, 3.19495e-02, -1.10593e-01, 3.91596e-01, 8.00757e-01, -1.38644e-01, 3.64044e-02, -6.11236e-03 }, + { -5.50931e-03, 3.13987e-02, -1.08490e-01, 3.81484e-01, 8.08348e-01, -1.37426e-01, 3.59905e-02, -6.03624e-03 }, + { -5.41467e-03, 3.08323e-02, -1.06342e-01, 3.71376e-01, 8.15826e-01, -1.36111e-01, 3.55525e-02, -5.95620e-03 }, + { -5.31716e-03, 3.02507e-02, -1.04150e-01, 3.61276e-01, 8.23188e-01, -1.34699e-01, 3.50903e-02, -5.87227e-03 }, + { -5.21683e-03, 2.96543e-02, -1.01915e-01, 3.51186e-01, 8.30432e-01, -1.33189e-01, 3.46042e-02, -5.78446e-03 }, + { -5.11376e-03, 2.90433e-02, -9.96402e-02, 3.41109e-01, 8.37557e-01, -1.31581e-01, 3.40940e-02, -5.69280e-03 }, + { -5.00800e-03, 2.84182e-02, -9.73254e-02, 3.31048e-01, 8.44559e-01, -1.29874e-01, 3.35600e-02, -5.59731e-03 }, + { -4.89961e-03, 2.77794e-02, -9.49727e-02, 3.21004e-01, 8.51437e-01, -1.28068e-01, 3.30021e-02, -5.49804e-03 }, + { -4.78866e-03, 2.71272e-02, -9.25834e-02, 3.10980e-01, 8.58189e-01, -1.26161e-01, 3.24205e-02, -5.39500e-03 }, + { -4.67520e-03, 2.64621e-02, -9.01591e-02, 3.00980e-01, 8.64812e-01, -1.24154e-01, 3.18153e-02, -5.28823e-03 }, + { -4.55932e-03, 2.57844e-02, -8.77011e-02, 2.91006e-01, 8.71305e-01, -1.22047e-01, 3.11866e-02, -5.17776e-03 }, + { -4.44107e-03, 2.50946e-02, -8.52109e-02, 2.81060e-01, 8.77666e-01, -1.19837e-01, 3.05345e-02, -5.06363e-03 }, + { -4.32052e-03, 2.43930e-02, -8.26900e-02, 2.71144e-01, 8.83893e-01, -1.17526e-01, 2.98593e-02, -4.94589e-03 }, + { -4.19774e-03, 2.36801e-02, -8.01399e-02, 2.61263e-01, 8.89984e-01, -1.15113e-01, 2.91609e-02, -4.82456e-03 }, + { -4.07279e-03, 2.29562e-02, -7.75620e-02, 2.51417e-01, 8.95936e-01, -1.12597e-01, 2.84397e-02, -4.69970e-03 }, + { -3.94576e-03, 2.22218e-02, -7.49577e-02, 2.41609e-01, 9.01749e-01, -1.09978e-01, 2.76957e-02, -4.57135e-03 }, + { -3.81671e-03, 2.14774e-02, -7.23286e-02, 2.31843e-01, 9.07420e-01, -1.07256e-01, 2.69293e-02, -4.43955e-03 }, + { -3.68570e-03, 2.07233e-02, -6.96762e-02, 2.22120e-01, 9.12947e-01, -1.04430e-01, 2.61404e-02, -4.30435e-03 }, + { -3.55283e-03, 1.99599e-02, -6.70018e-02, 2.12443e-01, 9.18329e-01, -1.01501e-01, 2.53295e-02, -4.16581e-03 }, + { -3.41815e-03, 1.91877e-02, -6.43069e-02, 2.02814e-01, 9.23564e-01, -9.84679e-02, 2.44967e-02, -4.02397e-03 }, + { -3.28174e-03, 1.84071e-02, -6.15931e-02, 1.93236e-01, 9.28650e-01, -9.53307e-02, 2.36423e-02, -3.87888e-03 }, + { -3.14367e-03, 1.76185e-02, -5.88617e-02, 1.83711e-01, 9.33586e-01, -9.20893e-02, 2.27664e-02, -3.73062e-03 }, + { -3.00403e-03, 1.68225e-02, -5.61142e-02, 1.74242e-01, 9.38371e-01, -8.87435e-02, 2.18695e-02, -3.57923e-03 }, + { -2.86289e-03, 1.60193e-02, -5.33522e-02, 1.64831e-01, 9.43001e-01, -8.52933e-02, 2.09516e-02, -3.42477e-03 }, + { -2.72032e-03, 1.52095e-02, -5.05770e-02, 1.55480e-01, 9.47477e-01, -8.17385e-02, 2.00132e-02, -3.26730e-03 }, + { -2.57640e-03, 1.43934e-02, -4.77900e-02, 1.46192e-01, 9.51795e-01, -7.80792e-02, 1.90545e-02, -3.10689e-03 }, + { -2.43121e-03, 1.35716e-02, -4.49929e-02, 1.36968e-01, 9.55956e-01, -7.43154e-02, 1.80759e-02, -2.94361e-03 }, + { -2.28483e-03, 1.27445e-02, -4.21869e-02, 1.27812e-01, 9.59958e-01, -7.04471e-02, 1.70776e-02, -2.77751e-03 }, + { -2.13733e-03, 1.19125e-02, -3.93735e-02, 1.18725e-01, 9.63798e-01, -6.64743e-02, 1.60599e-02, -2.60868e-03 }, + { -1.98880e-03, 1.10760e-02, -3.65541e-02, 1.09710e-01, 9.67477e-01, -6.23972e-02, 1.50233e-02, -2.43718e-03 }, + { -1.83931e-03, 1.02356e-02, -3.37303e-02, 1.00769e-01, 9.70992e-01, -5.82159e-02, 1.39681e-02, -2.26307e-03 }, + { -1.68894e-03, 9.39154e-03, -3.09033e-02, 9.19033e-02, 9.74342e-01, -5.39305e-02, 1.28947e-02, -2.08645e-03 }, + { -1.53777e-03, 8.54441e-03, -2.80746e-02, 8.31162e-02, 9.77526e-01, -4.95412e-02, 1.18034e-02, -1.90738e-03 }, + { -1.38589e-03, 7.69462e-03, -2.52457e-02, 7.44095e-02, 9.80543e-01, -4.50483e-02, 1.06946e-02, -1.72594e-03 }, + { -1.23337e-03, 6.84261e-03, -2.24178e-02, 6.57852e-02, 9.83392e-01, -4.04519e-02, 9.56876e-03, -1.54221e-03 }, + { -1.08030e-03, 5.98883e-03, -1.95925e-02, 5.72454e-02, 9.86071e-01, -3.57525e-02, 8.42626e-03, -1.35627e-03 }, + { -9.26747e-04, 5.13372e-03, -1.67710e-02, 4.87921e-02, 9.88580e-01, -3.09503e-02, 7.26755e-03, -1.16820e-03 }, + { -7.72802e-04, 4.27773e-03, -1.39548e-02, 4.04274e-02, 9.90917e-01, -2.60456e-02, 6.09305e-03, -9.78093e-04 }, + { -6.18544e-04, 3.42130e-03, -1.11453e-02, 3.21531e-02, 9.93082e-01, -2.10389e-02, 4.90322e-03, -7.86031e-04 }, + { -4.64053e-04, 2.56486e-03, -8.34364e-03, 2.39714e-02, 9.95074e-01, -1.59305e-02, 3.69852e-03, -5.92100e-04 }, + { -3.09412e-04, 1.70888e-03, -5.55134e-03, 1.58840e-02, 9.96891e-01, -1.07209e-02, 2.47942e-03, -3.96391e-04 }, + { -1.54700e-04, 8.53777e-04, -2.76968e-03, 7.89295e-03, 9.98534e-01, -5.41054e-03, 1.24642e-03, -1.98993e-04 }, + { 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 1.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00 }, +}; diff --git a/core/src/dsp/pll.h b/core/src/dsp/pll.h new file mode 100644 index 00000000..7d0c44ad --- /dev/null +++ b/core/src/dsp/pll.h @@ -0,0 +1,185 @@ +#pragma once +#include +#include + +#define DSP_SIGN(n) ((n) >= 0) +#define DSP_STEP(n) (((n) > 0.0f) ? 1.0f : -1.0f) + +namespace dsp { + class SqSymbolRecovery : public generic_block { + public: + SqSymbolRecovery() {} + + SqSymbolRecovery(stream* in, int omega) { init(in, omega); } + + ~SqSymbolRecovery() { + generic_block::stop(); + } + + void init(stream* in, int omega) { + _in = in; + samplesPerSymbol = omega; + generic_block::registerInput(_in); + generic_block::registerOutput(&out); + } + + int run() { + count = _in->read(); + if (count < 0) { return -1; } + + int outCount = 0; + + for (int i = 0; i < count; i++) { + if (DSP_SIGN(lastVal) != DSP_SIGN(_in->readBuf[i])) { + counter = samplesPerSymbol / 2; + lastVal = _in->readBuf[i]; + continue; + } + + if (counter >= samplesPerSymbol) { + counter = 0; + out.writeBuf[outCount] = _in->readBuf[i]; + outCount++; + } + else { + counter++; + } + + lastVal = _in->readBuf[i]; + } + + _in->flush(); + if (!out.swap(outCount)) { return -1; } + return count; + } + + stream out; + + private: + int count; + int samplesPerSymbol = 1; + int counter = 0; + float lastVal = 0; + stream* _in; + + }; + + + + class MMClockRecovery : public generic_block { + public: + MMClockRecovery() {} + + MMClockRecovery(stream* in, float omega, float gainOmega, float muGain, float omegaRelLimit) { + init(in, omega, gainOmega, muGain, omegaRelLimit); + } + + ~MMClockRecovery() { + generic_block::stop(); + } + + void init(stream* in, float omega, float gainOmega, float muGain, float omegaRelLimit) { + _in = in; + _omega = omega; + _muGain = muGain; + _gainOmega = gainOmega; + _omegaRelLimit = omegaRelLimit; + + omegaMin = _omega - (_omega * _omegaRelLimit); + omegaMax = _omega + (_omega * _omegaRelLimit); + _dynOmega = _omega; + + generic_block::registerInput(_in); + generic_block::registerOutput(&out); + } + + int run() { + count = _in->read(); + if (count < 0) { return -1; } + + int outCount = 0; + float outVal; + float phaseError; + float roundedStep; + int maxOut = 2.0f * _omega * (float)count; + + // Copy the first 7 values to the delay buffer for fast computing + memcpy(&delay[7], _in->readBuf, 7); + + int i = nextOffset; + for (; i < count && outCount < maxOut;) { + // Calculate output value + // If we still need to use the old values, calculate using delay buf + // Otherwise, use normal buffer + if (i < 7) { + volk_32f_x2_dot_prod_32f(&outVal, &delay[i], INTERP_TAPS[(int)roundf(_mu * 128.0f)], 8); + } + else { + volk_32f_x2_dot_prod_32f(&outVal, &_in->readBuf[i - 7], INTERP_TAPS[(int)roundf(_mu * 128.0f)], 8); + } + out.writeBuf[outCount] = outVal; + + + // Cursed phase detect approximation (don't ask me how this approximation works) + phaseError = (DSP_STEP(lastOutput)*outVal) - (lastOutput*DSP_STEP(outVal)); + lastOutput = outVal; + outCount++; + + // Adjust the symbol rate using the phase error approximation and clamp + // TODO: Branchless clamp + _dynOmega = _dynOmega + (_gainOmega * phaseError); + if (_dynOmega > omegaMax) { _dynOmega = omegaMax; } + else if (_dynOmega < omegaMin) { _dynOmega = omegaMin; } + + // Adjust the symbol phase according to the phase error approximation + // It will now contain the phase delta needed to jump to the next symbol + // Rounded step will contain the rounded number of symbols + _mu = _mu + _dynOmega + (_muGain * phaseError); + roundedStep = floor(_mu); + + // Step to where the next symbol should be + i += (int)roundedStep; + + // Now that we've stepped to the next symbol, keep only the offset inside the symbol + _mu -= roundedStep; + } + + nextOffset = i - count; + + // Save the last 7 values for the next round + memcpy(delay, &_in->readBuf[count - 7], 7); + + _in->flush(); + if (!out.swap(outCount)) { return -1; } + return count; + } + + stream out; + + private: + int count; + + // Delay buffer + float delay[15]; + int nextOffset = 0; + + // Configuration + float _omega = 1.0f; + float _muGain = 1.0f; + float _gainOmega = 0.001f; + float _omegaRelLimit = 0.005; + + // Precalculated values + float omegaMin = _omega + (_omega * _omegaRelLimit); + float omegaMax = _omega + (_omega * _omegaRelLimit); + + // Runtime adjusted + float _dynOmega = _omega; + float _mu = 0.5f; + float lastOutput = 0.0f; + + + stream* _in; + + }; +} \ No newline at end of file diff --git a/core/src/dsp/processing.h b/core/src/dsp/processing.h index a775ceec..1dccbce4 100644 --- a/core/src/dsp/processing.h +++ b/core/src/dsp/processing.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace dsp { template @@ -151,6 +152,54 @@ namespace dsp { }; + class FeedForwardAGC : public generic_block { + public: + FeedForwardAGC() {} + + FeedForwardAGC(stream* in) { init(in); } + + ~FeedForwardAGC() { generic_block::stop(); } + + void init(stream* in) { + _in = in; + generic_block::registerInput(_in); + generic_block::registerOutput(&out); + } + + void setInput(stream* in) { + std::lock_guard lck(generic_block::ctrlMtx); + generic_block::tempStop(); + generic_block::unregisterInput(_in); + _in = in; + generic_block::registerInput(_in); + generic_block::tempStart(); + } + + int run() { + count = _in->read(); + if (count < 0) { return -1; } + + float level = 0; + for (int i = 0; i < count; i++) { + if (fabs(_in->readBuf[i]) > level) { level = fabs(_in->readBuf[i]); } + } + + volk_32f_s32f_multiply_32f(out.writeBuf, _in->readBuf, 1.0f / level, count); + + _in->flush(); + if (!out.swap(count)) { return -1; } + return count; + } + + stream out; + + private: + int count; + stream* _in; + + }; + + template class Volume : public generic_block> { public: @@ -365,4 +414,63 @@ namespace dsp { stream* _in; }; + + class Threshold : public generic_block { + public: + Threshold() {} + + Threshold(stream* in) { init(in); } + + ~Threshold() { + generic_block::stop(); + delete[] normBuffer; + } + + void init(stream* in) { + _in = in; + normBuffer = new float[STREAM_BUFFER_SIZE]; + generic_block::registerInput(_in); + generic_block::registerOutput(&out); + } + + void setInput(stream* in) { + std::lock_guard lck(generic_block::ctrlMtx); + generic_block::tempStop(); + generic_block::unregisterInput(_in); + _in = in; + generic_block::registerInput(_in); + generic_block::tempStart(); + } + + void setLevel(float level) { + _level = level; + } + + float getLevel() { + return _level; + } + + int run() { + count = _in->read(); + if (count < 0) { return -1; } + + for (int i = 0; i < count; i++) { + out.writeBuf[i] = (_in->readBuf[i] > 0.0f); + } + + _in->flush(); + if (!out.swap(count)) { return -1; } + return count; + } + + stream out; + + + private: + int count; + float* normBuffer; + float _level = -50.0f; + stream* _in; + + }; } \ No newline at end of file diff --git a/core/src/dsp/routing.h b/core/src/dsp/routing.h index a1e821b0..42bf4446 100644 --- a/core/src/dsp/routing.h +++ b/core/src/dsp/routing.h @@ -151,27 +151,29 @@ namespace dsp { } void bufferWorker() { - complex_t* buf = new complex_t[_keep]; + T* buf = new T[_keep]; bool delay = _skip < 0; int readCount = std::min(_keep + _skip, _keep); int skip = std::max(_skip, 0); - int delaySize = (-_skip) * sizeof(complex_t); + int delaySize = (-_skip) * sizeof(T); int delayCount = (-_skip); - complex_t* start = &buf[std::max(-_skip, 0)]; - complex_t* delayStart = &buf[_keep + _skip]; + T* start = &buf[std::max(-_skip, 0)]; + T* delayStart = &buf[_keep + _skip]; while (true) { if (delay) { memmove(buf, delayStart, delaySize); - for (int i = 0; i < delayCount; i++) { - buf[i].i /= 10.0f; - buf[i].q /= 10.0f; + if constexpr (std::is_same_v || std::is_same_v) { + for (int i = 0; i < delayCount; i++) { + buf[i].i /= 10.0f; + buf[i].q /= 10.0f; + } } } if (ringBuf.readAndSkip(start, readCount, skip) < 0) { break; }; - memcpy(out.writeBuf, buf, _keep * sizeof(complex_t)); + memcpy(out.writeBuf, buf, _keep * sizeof(T)); if (!out.swap(_keep)) { break; } } delete[] buf; diff --git a/core/src/dsp/source.h b/core/src/dsp/source.h index b973f7b6..c7ed9da3 100644 --- a/core/src/dsp/source.h +++ b/core/src/dsp/source.h @@ -71,4 +71,43 @@ namespace dsp { lv_32fc_t* zeroPhase; }; + + template + class HandlerSource : public generic_block> { + public: + HandlerSource() {} + + HandlerSource(int (*handler)(T* data, void* ctx), void* ctx) { init(handler, ctx); } + + ~HandlerSource() { generic_block>::stop(); } + + void init(int (*handler)(T* data, void* ctx), void* ctx) { + _handler = handler; + _ctx = ctx; + generic_block>::registerOutput(&out); + } + + void setHandler(int (*handler)(T* data, void* ctx), void* ctx) { + std::lock_guard lck(generic_block>::ctrlMtx); + generic_block>::tempStop(); + _handler = handler; + _ctx = ctx; + generic_block>::tempStart(); + } + + int run() { + int count = _handler(out.writeBuf, _ctx); + if (count < 0) { return -1; } + out.swap(count); + return count; + } + + stream out; + + private: + int count; + int (*_handler)(T* data, void* ctx); + void* _ctx; + + }; } \ No newline at end of file diff --git a/core/src/gui/widgets/symbol_diagram.cpp b/core/src/gui/widgets/symbol_diagram.cpp new file mode 100644 index 00000000..4898a4fd --- /dev/null +++ b/core/src/gui/widgets/symbol_diagram.cpp @@ -0,0 +1,42 @@ +#pragma once +#include + +namespace ImGui { + SymbolDiagram::SymbolDiagram() { + memset(buffer, 0, 1024 * sizeof(float)); + } + + void SymbolDiagram::draw(const ImVec2& size_arg) { + std::lock_guard lck(bufferMtx); + ImGuiWindow* window = GetCurrentWindow(); + ImGuiStyle& style = GetStyle(); + float pad = style.FramePadding.y; + ImVec2 min = window->DC.CursorPos; + ImVec2 size = CalcItemSize(size_arg, CalcItemWidth(), 100); + ImRect bb(min, ImVec2(min.x+size.x, min.y+size.y)); + float lineHeight = size.y; + + ItemSize(size, style.FramePadding.y); + if (!ItemAdd(bb, 0)) { + return; + } + + window->DrawList->AddRectFilled(min, ImVec2(min.x+size.x, min.y+size.y), IM_COL32(0,0,0,255)); + ImU32 col = ImGui::GetColorU32(ImGuiCol_CheckMark, 0.7f); + float increment = size.x / 1024.0f; + for (int i = 0; i < 1024; i++) { + if (buffer[i] > 1.0f || buffer[i] < -1.0f) { continue; } + window->DrawList->AddCircleFilled(ImVec2(((float)i * increment) + min.x, ((buffer[i] + 1) * (size.y*0.5f)) + min.y), 2, col); + } + } + + float* SymbolDiagram::aquireBuffer() { + bufferMtx.lock(); + return buffer; + } + + void SymbolDiagram::releaseBuffer() { + bufferMtx.unlock(); + } + +} \ No newline at end of file diff --git a/core/src/gui/widgets/symbol_diagram.h b/core/src/gui/widgets/symbol_diagram.h new file mode 100644 index 00000000..43c9fa38 --- /dev/null +++ b/core/src/gui/widgets/symbol_diagram.h @@ -0,0 +1,24 @@ +#pragma once + +#include +#include +#include +#include + +namespace ImGui { + class SymbolDiagram { + public: + SymbolDiagram(); + + void draw(const ImVec2& size_arg = ImVec2(0, 0)); + + float* aquireBuffer(); + + void releaseBuffer(); + + private: + std::mutex bufferMtx; + float buffer[1024]; + + }; +} \ No newline at end of file diff --git a/core/src/version.h b/core/src/version.h index 330e64fc..9d1afaf6 100644 --- a/core/src/version.h +++ b/core/src/version.h @@ -1,3 +1,3 @@ #pragma once -#define VERSION_STR "0.2.5_beta" \ No newline at end of file +#define VERSION_STR "0.3.0_beta" \ No newline at end of file diff --git a/falcon9_decoder/CMakeLists.txt b/falcon9_decoder/CMakeLists.txt new file mode 100644 index 00000000..90f2cf2d --- /dev/null +++ b/falcon9_decoder/CMakeLists.txt @@ -0,0 +1,20 @@ +cmake_minimum_required(VERSION 3.13) +project(falcon9_decoder) + +if (MSVC) + set(CMAKE_CXX_FLAGS "-O2 /std:c++17 /EHsc") +else() + set(CMAKE_CXX_FLAGS "-O3 -std=c++17 -fpermissive") +endif (MSVC) + +file(GLOB_RECURSE SRC "src/*.cpp" "src/*.c") + +include_directories("src/") +include_directories("src/libcorrect/") + +add_library(falcon9_decoder SHARED ${SRC}) +target_link_libraries(falcon9_decoder PRIVATE sdrpp_core) +set_target_properties(falcon9_decoder PROPERTIES PREFIX "") + +# Install directives +install(TARGETS falcon9_decoder DESTINATION lib/sdrpp/plugins) \ No newline at end of file diff --git a/falcon9_decoder/src/falcon_fec.h b/falcon9_decoder/src/falcon_fec.h new file mode 100644 index 00000000..560895d6 --- /dev/null +++ b/falcon9_decoder/src/falcon_fec.h @@ -0,0 +1,127 @@ +#pragma once +#include +#include + +// WTF??? +extern "C" +{ +#include +} + +const uint8_t toDB[] = { +0x00, 0x7b, 0xaf, 0xd4, 0x99, 0xe2, 0x36, 0x4d, 0xfa, 0x81, 0x55, 0x2e, 0x63, 0x18, 0xcc, 0xb7, 0x86, 0xfd, 0x29, 0x52, 0x1f, + 0x64, 0xb0, 0xcb, 0x7c, 0x07, 0xd3, 0xa8, 0xe5, 0x9e, 0x4a, 0x31, 0xec, 0x97, 0x43, 0x38, 0x75, 0x0e, 0xda, 0xa1, 0x16, 0x6d, 0xb9, 0xc2, 0x8f, 0xf4, + 0x20, 0x5b, 0x6a, 0x11, 0xc5, 0xbe, 0xf3, 0x88, 0x5c, 0x27, 0x90, 0xeb, 0x3f, 0x44, 0x09, 0x72, 0xa6, 0xdd, 0xef, 0x94, 0x40, 0x3b, 0x76, 0x0d, 0xd9, + 0xa2, 0x15, 0x6e, 0xba, 0xc1, 0x8c, 0xf7, 0x23, 0x58, 0x69, 0x12, 0xc6, 0xbd, 0xf0, 0x8b, 0x5f, 0x24, 0x93, 0xe8, 0x3c, 0x47, 0x0a, 0x71, 0xa5, 0xde, + 0x03, 0x78, 0xac, 0xd7, 0x9a, 0xe1, 0x35, 0x4e, 0xf9, 0x82, 0x56, 0x2d, 0x60, 0x1b, 0xcf, 0xb4, 0x85, 0xfe, 0x2a, 0x51, 0x1c, 0x67, 0xb3, 0xc8, 0x7f, + 0x04, 0xd0, 0xab, 0xe6, 0x9d, 0x49, 0x32, 0x8d, 0xf6, 0x22, 0x59, 0x14, 0x6f, 0xbb, 0xc0, 0x77, 0x0c, 0xd8, 0xa3, 0xee, 0x95, 0x41, 0x3a, 0x0b, 0x70, + 0xa4, 0xdf, 0x92, 0xe9, 0x3d, 0x46, 0xf1, 0x8a, 0x5e, 0x25, 0x68, 0x13, 0xc7, 0xbc, 0x61, 0x1a, 0xce, 0xb5, 0xf8, 0x83, 0x57, 0x2c, 0x9b, 0xe0, 0x34, + 0x4f, 0x02, 0x79, 0xad, 0xd6, 0xe7, 0x9c, 0x48, 0x33, 0x7e, 0x05, 0xd1, 0xaa, 0x1d, 0x66, 0xb2, 0xc9, 0x84, 0xff, 0x2b, 0x50, 0x62, 0x19, 0xcd, 0xb6, + 0xfb, 0x80, 0x54, 0x2f, 0x98, 0xe3, 0x37, 0x4c, 0x01, 0x7a, 0xae, 0xd5, 0xe4, 0x9f, 0x4b, 0x30, 0x7d, 0x06, 0xd2, 0xa9, 0x1e, 0x65, 0xb1, 0xca, 0x87, + 0xfc, 0x28, 0x53, 0x8e, 0xf5, 0x21, 0x5a, 0x17, 0x6c, 0xb8, 0xc3, 0x74, 0x0f, 0xdb, 0xa0, 0xed, 0x96, 0x42, 0x39, 0x08, 0x73, 0xa7, 0xdc, 0x91, 0xea, + 0x3e, 0x45, 0xf2, 0x89, 0x5d, 0x26, 0x6b, 0x10, 0xc4, 0xbf +}; + +const uint8_t fromDB[] = { + 0x00, 0xcc, 0xac, 0x60, 0x79, 0xb5, 0xd5, 0x19, 0xf0, 0x3c, 0x5c, 0x90, 0x89, 0x45, 0x25, 0xe9, 0xfd, 0x31, 0x51, 0x9d, + 0x84, 0x48, 0x28, 0xe4, 0x0d, 0xc1, 0xa1, 0x6d, 0x74, 0xb8, 0xd8, 0x14, 0x2e, 0xe2, 0x82, 0x4e, 0x57, 0x9b, 0xfb, 0x37, 0xde, 0x12, 0x72, 0xbe, 0xa7, + 0x6b, 0x0b, 0xc7, 0xd3, 0x1f, 0x7f, 0xb3, 0xaa, 0x66, 0x06, 0xca, 0x23, 0xef, 0x8f, 0x43, 0x5a, 0x96, 0xf6, 0x3a, 0x42, 0x8e, 0xee, 0x22, 0x3b, 0xf7, + 0x97, 0x5b, 0xb2, 0x7e, 0x1e, 0xd2, 0xcb, 0x07, 0x67, 0xab, 0xbf, 0x73, 0x13, 0xdf, 0xc6, 0x0a, 0x6a, 0xa6, 0x4f, 0x83, 0xe3, 0x2f, 0x36, 0xfa, 0x9a, + 0x56, 0x6c, 0xa0, 0xc0, 0x0c, 0x15, 0xd9, 0xb9, 0x75, 0x9c, 0x50, 0x30, 0xfc, 0xe5, 0x29, 0x49, 0x85, 0x91, 0x5d, 0x3d, 0xf1, 0xe8, 0x24, 0x44, 0x88, + 0x61, 0xad, 0xcd, 0x01, 0x18, 0xd4, 0xb4, 0x78, 0xc5, 0x09, 0x69, 0xa5, 0xbc, 0x70, 0x10, 0xdc, 0x35, 0xf9, 0x99, 0x55, 0x4c, 0x80, 0xe0, 0x2c, 0x38, + 0xf4, 0x94, 0x58, 0x41, 0x8d, 0xed, 0x21, 0xc8, 0x04, 0x64, 0xa8, 0xb1, 0x7d, 0x1d, 0xd1, 0xeb, 0x27, 0x47, 0x8b, 0x92, 0x5e, 0x3e, 0xf2, 0x1b, 0xd7, + 0xb7, 0x7b, 0x62, 0xae, 0xce, 0x02, 0x16, 0xda, 0xba, 0x76, 0x6f, 0xa3, 0xc3, 0x0f, 0xe6, 0x2a, 0x4a, 0x86, 0x9f, 0x53, 0x33, 0xff, 0x87, 0x4b, 0x2b, + 0xe7, 0xfe, 0x32, 0x52, 0x9e, 0x77, 0xbb, 0xdb, 0x17, 0x0e, 0xc2, 0xa2, 0x6e, 0x7a, 0xb6, 0xd6, 0x1a, 0x03, 0xcf, 0xaf, 0x63, 0x8a, 0x46, 0x26, 0xea, + 0xf3, 0x3f, 0x5f, 0x93, 0xa9, 0x65, 0x05, 0xc9, 0xd0, 0x1c, 0x7c, 0xb0, 0x59, 0x95, 0xf5, 0x39, 0x20, 0xec, 0x8c, 0x40, 0x54, 0x98, 0xf8, 0x34, 0x2d, + 0xe1, 0x81, 0x4d, 0xa4, 0x68, 0x08, 0xc4, 0xdd, 0x11, 0x71, 0xbd +}; + +const uint8_t randVals[] = { + 0xFF, 0x48, 0x0E, 0xC0, 0x9A, 0x0D, 0x70, 0xBC, 0x8E, 0x2C, 0x93, 0xAD, 0xA7, 0xB7, 0x46, 0xCE, + 0x5A, 0x97, 0x7D, 0xCC, 0x32, 0xA2, 0xBF, 0x3E, 0x0A, 0x10, 0xF1, 0x88, 0x94, 0xCD, 0xEA, 0xB1, + 0xFE, 0x90, 0x1D, 0x81, 0x34, 0x1A, 0xE1, 0x79, 0x1C, 0x59, 0x27, 0x5B, 0x4F, 0x6E, 0x8D, 0x9C, + 0xB5, 0x2E, 0xFB, 0x98, 0x65, 0x45, 0x7E, 0x7C, 0x14, 0x21, 0xE3, 0x11, 0x29, 0x9B, 0xD5, 0x63, + 0xFD, 0x20, 0x3B, 0x02, 0x68, 0x35, 0xC2, 0xF2, 0x38, 0xB2, 0x4E, 0xB6, 0x9E, 0xDD, 0x1B, 0x39, + 0x6A, 0x5D, 0xF7, 0x30, 0xCA, 0x8A, 0xFC, 0xF8, 0x28, 0x43, 0xC6, 0x22, 0x53, 0x37, 0xAA, 0xC7, + 0xFA, 0x40, 0x76, 0x04, 0xD0, 0x6B, 0x85, 0xE4, 0x71, 0x64, 0x9D, 0x6D, 0x3D, 0xBA, 0x36, 0x72, + 0xD4, 0xBB, 0xEE, 0x61, 0x95, 0x15, 0xF9, 0xF0, 0x50, 0x87, 0x8C, 0x44, 0xA6, 0x6F, 0x55, 0x8F, + 0xF4, 0x80, 0xEC, 0x09, 0xA0, 0xD7, 0x0B, 0xC8, 0xE2, 0xC9, 0x3A, 0xDA, 0x7B, 0x74, 0x6C, 0xE5, + 0xA9, 0x77, 0xDC, 0xC3, 0x2A, 0x2B, 0xF3, 0xE0, 0xA1, 0x0F, 0x18, 0x89, 0x4C, 0xDE, 0xAB, 0x1F, + 0xE9, 0x01, 0xD8, 0x13, 0x41, 0xAE, 0x17, 0x91, 0xC5, 0x92, 0x75, 0xB4, 0xF6, 0xE8, 0xD9, 0xCB, + 0x52, 0xEF, 0xB9, 0x86, 0x54, 0x57, 0xE7, 0xC1, 0x42, 0x1E, 0x31, 0x12, 0x99, 0xBD, 0x56, 0x3F, + 0xD2, 0x03, 0xB0, 0x26, 0x83, 0x5C, 0x2F, 0x23, 0x8B, 0x24, 0xEB, 0x69, 0xED, 0xD1, 0xB3, 0x96, + 0xA5, 0xDF, 0x73, 0x0C, 0xA8, 0xAF, 0xCF, 0x82, 0x84, 0x3C, 0x62, 0x25, 0x33, 0x7A, 0xAC, 0x7F, + 0xA4, 0x07, 0x60, 0x4D, 0x06, 0xB8, 0x5E, 0x47, 0x16, 0x49, 0xD6, 0xD3, 0xDB, 0xA3, 0x67, 0x2D, + 0x4B, 0xBE, 0xE6, 0x19, 0x51, 0x5F, 0x9F, 0x05, 0x08, 0x78, 0xC4, 0x4A, 0x66, 0xF5, 0x58 +}; + +namespace dsp { + class FalconRS : public generic_block { + public: + FalconRS() {} + + FalconRS(stream* in) { init(in); } + + ~FalconRS() { + generic_block::stop(); + } + + void init(stream* in) { + _in = in; + + for (int i = 0; i < 5; i++) { memset(buffers[i], 0, 255); } + for (int i = 0; i < 5; i++) { memset(outBuffers[i], 0, 255); } + rs = correct_reed_solomon_create(correct_rs_primitive_polynomial_ccsds, 120, 11, 16); + if (rs == NULL) { printf("Error creating the reed solomon decoder\n"); } + + generic_block::registerInput(_in); + generic_block::registerOutput(&out); + } + + int run() { + count = _in->read(); + if (count < 0) { return -1; } + + uint8_t* data = _in->readBuf + 4; + + // Deinterleave + for (int i = 0; i < 255*5; i++) { + buffers[i%5][i/5] = fromDB[data[i]]; + } + + // Reed the solomon :weary: + int result = 0; + result = correct_reed_solomon_decode(rs, buffers[0], 255, outBuffers[0]); + if (result == -1) { _in->flush(); return count; } + result = correct_reed_solomon_decode(rs, buffers[1], 255, outBuffers[1]); + if (result == -1) { _in->flush(); return count; } + result = correct_reed_solomon_decode(rs, buffers[2], 255, outBuffers[2]); + if (result == -1) { _in->flush(); return count; } + result = correct_reed_solomon_decode(rs, buffers[3], 255, outBuffers[3]); + if (result == -1) { _in->flush(); return count; } + result = correct_reed_solomon_decode(rs, buffers[4], 255, outBuffers[4]); + if (result == -1) { _in->flush(); return count; } + + // Reinterleave + for (int i = 0; i < 255*5; i++) { + out.writeBuf[i] = toDB[outBuffers[i%5][i/5]] ^ randVals[i % 255]; + } + + out.swap(255*5); + + _in->flush(); + return count; + } + + stream out; + + private: + int count; + uint8_t buffers[5][255]; + uint8_t outBuffers[5][255]; + correct_reed_solomon* rs; + + stream* _in; + + }; +} \ No newline at end of file diff --git a/falcon9_decoder/src/falcon_packet.h b/falcon9_decoder/src/falcon_packet.h new file mode 100644 index 00000000..e1d7aca9 --- /dev/null +++ b/falcon9_decoder/src/falcon_packet.h @@ -0,0 +1,121 @@ +#pragma once +#include +#include + +namespace dsp { + struct FalconFrameHeader { + uint32_t counter; + uint16_t packet; + }; + + class FalconPacketSync : public generic_block { + public: + FalconPacketSync() {} + + FalconPacketSync(stream* in) { init(in); } + + ~FalconPacketSync() { + generic_block::stop(); + } + + void init(stream* in) { + _in = in; + + generic_block::registerInput(_in); + generic_block::registerOutput(&out); + } + + int run() { + count = _in->read(); + if (count < 0) { return -1; } + + // Parse frame header + FalconFrameHeader header; + header.packet = (_in->readBuf[3] | ((_in->readBuf[2] & 0b111) << 8)); + header.counter = ((_in->readBuf[2] >> 3) | (_in->readBuf[1] << 5) | ((_in->readBuf[0] & 0b111111) << 13)); + + // Pointer to the data aera of the frame + uint8_t* data = _in->readBuf + 4; + int dataLen = 1191; + + // If a frame was missed, cancel reading the current packet + if (lastCounter + 1 != header.counter) { + packetRead = -1; + } + lastCounter = header.counter; + + // If frame is just a continuation of a single packet, save it + // If we're not currently reading a packet + if (header.packet == 2047 && packetRead >= 0) { + memcpy(packet + packetRead, data, dataLen); + packetRead += dataLen; + _in->flush(); + printf("Wow, all data\n"); + return count; + } + else if (header.packet == 2047) { + printf("Wow, all data\n"); + _in->flush(); + return count; + } + + // Finish reading the last package and send it + if (packetRead >= 0) { + memcpy(packet + packetRead, data, header.packet); + memcpy(out.writeBuf, packet, packetRead + header.packet); + out.swap(packetRead + header.packet); + packetRead = -1; + } + + // Iterate through every packet of the frame + for (int i = header.packet; i < dataLen;) { + // First, check if we can read the header. If not, save and wait for next frame + if (dataLen - i < 4) { + packetRead = dataLen - i; + memcpy(packet, &data[i], packetRead); + break; + } + + // Extract packet length + uint16_t length = (((data[i] & 0b1111) << 8) | data[i + 1]) + 2; + + // Check if it's not an invalid zero length packet + if (length <= 2) { + packetRead = -1; + break; + } + + uint64_t pktId = ((uint64_t)data[i + 2] << 56) | ((uint64_t)data[i + 3] << 48) | ((uint64_t)data[i + 4] << 40) | ((uint64_t)data[i + 5] << 32) + | ((uint64_t)data[i + 6] << 24) | ((uint64_t)data[i + 7] << 16) | ((uint64_t)data[i + 8] << 8) | data[i + 9]; + + // If the packet doesn't fit the frame, save and go to next frame + if (dataLen - i < length) { + packetRead = dataLen - i; + memcpy(packet, &data[i], packetRead); + break; + } + + // Here, the package fits fully, read it and jump to the next + memcpy(out.writeBuf, &data[i], length); + out.swap(length); + i += length; + + } + + _in->flush(); + return count; + } + + stream out; + + private: + int count; + uint32_t lastCounter = 0; + + int packetRead = -1; + uint8_t packet[0x4008]; + + stream* _in; + + }; +} \ No newline at end of file diff --git a/falcon9_decoder/src/libcorrect/convolutional/CMakeLists.txt b/falcon9_decoder/src/libcorrect/convolutional/CMakeLists.txt new file mode 100644 index 00000000..43c4bd9d --- /dev/null +++ b/falcon9_decoder/src/libcorrect/convolutional/CMakeLists.txt @@ -0,0 +1,5 @@ +set(SRCFILES bit.c metric.c history_buffer.c error_buffer.c lookup.c convolutional.c encode.c decode.c) +add_library(correct-convolutional OBJECT ${SRCFILES}) +if(HAVE_SSE) + add_subdirectory(sse) +endif() diff --git a/falcon9_decoder/src/libcorrect/convolutional/bit.c b/falcon9_decoder/src/libcorrect/convolutional/bit.c new file mode 100644 index 00000000..9ac4e1ef --- /dev/null +++ b/falcon9_decoder/src/libcorrect/convolutional/bit.c @@ -0,0 +1,232 @@ +#include "correct/convolutional/bit.h" + +bit_writer_t *bit_writer_create(uint8_t *bytes, size_t len) { + bit_writer_t *w = calloc(1, sizeof(bit_writer_t)); + + if (bytes) { + bit_writer_reconfigure(w, bytes, len); + } + + return w; +} + +void bit_writer_reconfigure(bit_writer_t *w, uint8_t *bytes, size_t len) { + w->bytes = bytes; + w->len = len; + + w->current_byte = 0; + w->current_byte_len = 0; + w->byte_index = 0; +} + +void bit_writer_destroy(bit_writer_t *w) { + free(w); +} + +void bit_writer_write(bit_writer_t *w, uint8_t val, unsigned int n) { + for (size_t j = 0; j < n; j++) { + bit_writer_write_1(w, val); + val >>= 1; + } +} + +void bit_writer_write_1(bit_writer_t *w, uint8_t val) { + w->current_byte |= val & 1; + w->current_byte_len++; + + if (w->current_byte_len == 8) { + // 8 bits in a byte -- move to the next byte + w->bytes[w->byte_index] = w->current_byte; + w->byte_index++; + w->current_byte_len = 0; + w->current_byte = 0; + } else { + w->current_byte <<= 1; + } +} + +void bit_writer_write_bitlist(bit_writer_t *w, uint8_t *l, size_t len) { + // first close the current byte + // we might have been given too few elements to do that. be careful. + size_t close_len = 8 - w->current_byte_len; + close_len = (close_len < len) ? close_len : len; + + uint16_t b = w->current_byte; + + for (ptrdiff_t i = 0; i < close_len; i++) { + b |= l[i]; + b <<= 1; + } + + + l += close_len; + len -= close_len; + + uint8_t *bytes = w->bytes; + size_t byte_index = w->byte_index; + + if (w->current_byte_len + close_len == 8) { + b >>= 1; + bytes[byte_index] = b; + byte_index++; + } else { + w->current_byte = b; + w->current_byte_len += close_len; + return; + } + + size_t full_bytes = len/8; + + for (size_t i = 0; i < full_bytes; i++) { + bytes[byte_index] = l[0] << 7 | l[1] << 6 | l[2] << 5 | + l[3] << 4 | l[4] << 3 | l[5] << 2 | + l[6] << 1 | l[7]; + byte_index += 1; + l += 8; + } + + len -= 8*full_bytes; + + b = 0; + for (ptrdiff_t i = 0; i < len; i++) { + b |= l[i]; + b <<= 1; + } + + w->current_byte = b; + w->byte_index = byte_index; + w->current_byte_len = len; +} + +void bit_writer_write_bitlist_reversed(bit_writer_t *w, uint8_t *l, size_t len) { + l = l + len - 1; + + uint8_t *bytes = w->bytes; + size_t byte_index = w->byte_index; + uint16_t b; + + if (w->current_byte_len != 0) { + size_t close_len = 8 - w->current_byte_len; + close_len = (close_len < len) ? close_len : len; + + b = w->current_byte; + + for (ptrdiff_t i = 0; i < close_len; i++) { + b |= *l; + b <<= 1; + l--; + } + + len -= close_len; + + if (w->current_byte_len + close_len == 8) { + b >>= 1; + bytes[byte_index] = b; + byte_index++; + } else { + w->current_byte = b; + w->current_byte_len += close_len; + return; + } + } + + size_t full_bytes = len/8; + + for (size_t i = 0; i < full_bytes; i++) { + bytes[byte_index] = l[0] << 7 | l[-1] << 6 | l[-2] << 5 | + l[-3] << 4 | l[-4] << 3 | l[-5] << 2 | + l[-6] << 1 | l[-7]; + byte_index += 1; + l -= 8; + } + + len -= 8*full_bytes; + + b = 0; + for (ptrdiff_t i = 0; i < len; i++) { + b |= *l; + b <<= 1; + l--; + } + + w->current_byte = (uint8_t)b; + w->byte_index = byte_index; + w->current_byte_len = len; +} + +void bit_writer_flush_byte(bit_writer_t *w) { + if (w->current_byte_len != 0) { + w->current_byte <<= (8 - w->current_byte_len); + w->bytes[w->byte_index] = w->current_byte; + w->byte_index++; + w->current_byte_len = 0; + } +} + +size_t bit_writer_length(bit_writer_t *w) { + return w->byte_index; +} + +uint8_t reverse_byte(uint8_t b) { + return (b & 0x80) >> 7 | (b & 0x40) >> 5 | (b & 0x20) >> 3 | + (b & 0x10) >> 1 | (b & 0x08) << 1 | (b & 0x04) << 3 | + (b & 0x02) << 5 | (b & 0x01) << 7; +} + +static uint8_t reverse_table[256]; + +void create_reverse_table() { + for (uint16_t i = 0; i < 256; i++) { + reverse_table[i] = reverse_byte(i); + } +} + +bit_reader_t *bit_reader_create(const uint8_t *bytes, size_t len) { + bit_reader_t *r = calloc(1, sizeof(bit_reader_t)); + + static bool reverse_table_created = false; + + if (!reverse_table_created) { + create_reverse_table(); + reverse_table_created = true; + } + + if (bytes) { + bit_reader_reconfigure(r, bytes, len); + } + + return r; +} + +void bit_reader_reconfigure(bit_reader_t *r, const uint8_t *bytes, size_t len) { + r->bytes = bytes; + r->len = len; + + r->current_byte_len = 8; + r->current_byte = bytes[0]; + r->byte_index = 0; +} + +void bit_reader_destroy(bit_reader_t *r) { + free(r); +} + +uint8_t bit_reader_read(bit_reader_t *r, unsigned int n) { + unsigned int read = 0; + unsigned int n_copy = n; + + if (r->current_byte_len < n) { + read = r->current_byte & ((1 << r->current_byte_len) - 1); + r->byte_index++; + r->current_byte = r->bytes[r->byte_index]; + n -= r->current_byte_len; + r->current_byte_len = 8; + read <<= n; + } + + uint8_t copy_mask = (1 << n) - 1; + copy_mask <<= (r->current_byte_len - n); + read |= (r->current_byte & copy_mask) >> (r->current_byte_len - n); + r->current_byte_len -= n; + return reverse_table[read] >> (8 - n_copy); +} diff --git a/falcon9_decoder/src/libcorrect/convolutional/convolutional.c b/falcon9_decoder/src/libcorrect/convolutional/convolutional.c new file mode 100644 index 00000000..910ed156 --- /dev/null +++ b/falcon9_decoder/src/libcorrect/convolutional/convolutional.c @@ -0,0 +1,59 @@ +#include "correct/convolutional/convolutional.h" + +// https://www.youtube.com/watch?v=b3_lVSrPB6w + +correct_convolutional *_correct_convolutional_init(correct_convolutional *conv, + size_t rate, size_t order, + const polynomial_t *poly) { + if (order > 8 * sizeof(shift_register_t)) { + // XXX turn this into an error code + // printf("order must be smaller than 8 * sizeof(shift_register_t)\n"); + return NULL; + } + if (rate < 2) { + // XXX turn this into an error code + // printf("rate must be 2 or greater\n"); + return NULL; + } + + conv->order = order; + conv->rate = rate; + conv->numstates = 1 << order; + + unsigned int *table = malloc(sizeof(unsigned int) * (1 << order)); + fill_table(conv->rate, conv->order, poly, table); + *(unsigned int **)&conv->table = table; + + conv->bit_writer = bit_writer_create(NULL, 0); + conv->bit_reader = bit_reader_create(NULL, 0); + + conv->has_init_decode = false; + return conv; +} + +correct_convolutional *correct_convolutional_create(size_t rate, size_t order, + const polynomial_t *poly) { + correct_convolutional *conv = malloc(sizeof(correct_convolutional)); + correct_convolutional *init_conv = _correct_convolutional_init(conv, rate, order, poly); + if (!init_conv) { + free(conv); + } + return init_conv; +} + +void _correct_convolutional_teardown(correct_convolutional *conv) { + free(*(unsigned int **)&conv->table); + bit_writer_destroy(conv->bit_writer); + bit_reader_destroy(conv->bit_reader); + if (conv->has_init_decode) { + pair_lookup_destroy(conv->pair_lookup); + history_buffer_destroy(conv->history_buffer); + error_buffer_destroy(conv->errors); + free(conv->distances); + } +} + +void correct_convolutional_destroy(correct_convolutional *conv) { + _correct_convolutional_teardown(conv); + free(conv); +} diff --git a/falcon9_decoder/src/libcorrect/convolutional/decode.c b/falcon9_decoder/src/libcorrect/convolutional/decode.c new file mode 100644 index 00000000..e8891e04 --- /dev/null +++ b/falcon9_decoder/src/libcorrect/convolutional/decode.c @@ -0,0 +1,321 @@ +#include "correct/convolutional/convolutional.h" + +void conv_decode_print_iter(correct_convolutional *conv, unsigned int iter, + unsigned int winner_index) { + if (iter < 2220) { + return; + } + printf("iteration: %d\n", iter); + distance_t *errors = conv->errors->write_errors; + printf("errors:\n"); + for (shift_register_t i = 0; i < conv->numstates / 2; i++) { + printf("%2d: %d\n", i, errors[i]); + } + printf("\n"); + printf("history:\n"); + for (shift_register_t i = 0; i < conv->numstates / 2; i++) { + printf("%2d: ", i); + for (unsigned int j = 0; j <= winner_index; j++) { + printf("%d", conv->history_buffer->history[j][i] ? 1 : 0); + } + printf("\n"); + } + printf("\n"); +} + +void convolutional_decode_warmup(correct_convolutional *conv, unsigned int sets, + const uint8_t *soft) { + // first phase: load shiftregister up from 0 (order goes from 1 to conv->order) + // we are building up error metrics for the first order bits + for (unsigned int i = 0; i < conv->order - 1 && i < sets; i++) { + // peel off rate bits from encoded to recover the same `out` as in the encoding process + // the difference being that this `out` will have the channel noise/errors applied + unsigned int out; + if (!soft) { + out = bit_reader_read(conv->bit_reader, conv->rate); + } + const distance_t *read_errors = conv->errors->read_errors; + distance_t *write_errors = conv->errors->write_errors; + // walk all of the state we have so far + for (size_t j = 0; j < (1 << (i + 1)); j += 1) { + unsigned int last = j >> 1; + distance_t dist; + if (soft) { + if (conv->soft_measurement == CORRECT_SOFT_LINEAR) { + dist = metric_soft_distance_linear(conv->table[j], soft + i * conv->rate, + conv->rate); + } else { + dist = metric_soft_distance_quadratic(conv->table[j], soft + i * conv->rate, + conv->rate); + } + } else { + dist = metric_distance(conv->table[j], out); + } + write_errors[j] = dist + read_errors[last]; + } + error_buffer_swap(conv->errors); + } +} + +void convolutional_decode_inner(correct_convolutional *conv, unsigned int sets, + const uint8_t *soft) { + shift_register_t highbit = 1 << (conv->order - 1); + for (unsigned int i = conv->order - 1; i < (sets - conv->order + 1); i++) { + distance_t *distances = conv->distances; + // lasterrors are the aggregate bit errors for the states of shiftregister for the previous + // time slice + if (soft) { + if (conv->soft_measurement == CORRECT_SOFT_LINEAR) { + for (unsigned int j = 0; j < 1 << (conv->rate); j++) { + distances[j] = + metric_soft_distance_linear(j, soft + i * conv->rate, conv->rate); + } + } else { + for (unsigned int j = 0; j < 1 << (conv->rate); j++) { + distances[j] = + metric_soft_distance_quadratic(j, soft + i * conv->rate, conv->rate); + } + } + } else { + unsigned int out = bit_reader_read(conv->bit_reader, conv->rate); + for (unsigned int i = 0; i < 1 << (conv->rate); i++) { + distances[i] = metric_distance(i, out); + } + } + pair_lookup_t pair_lookup = conv->pair_lookup; + pair_lookup_fill_distance(pair_lookup, distances); + + // a mask to get the high order bit from the shift register + unsigned int num_iter = highbit << 1; + const distance_t *read_errors = conv->errors->read_errors; + // aggregate bit errors for this time slice + distance_t *write_errors = conv->errors->write_errors; + + uint8_t *history = history_buffer_get_slice(conv->history_buffer); + // walk through all states, ignoring oldest bit + // we will track a best register state (path) and the number of bit errors at that path at + // this time slice + // this loop considers two paths per iteration (high order bit set, clear) + // so, it only runs numstates/2 iterations + // we'll update the history for every state and find the path with the least aggregated bit + // errors + + // now run the main loop + // we calculate 2 sets of 2 register states here (4 states per iter) + // this creates 2 sets which share a predecessor, and 2 sets which share a successor + // + // the first set definition is the two states that are the same except for the least order + // bit + // these two share a predecessor because their high n - 1 bits are the same (differ only by + // newest bit) + // + // the second set definition is the two states that are the same except for the high order + // bit + // these two share a successor because the oldest high order bit will be shifted out, and + // the other bits will be present in the successor + // + shift_register_t highbase = highbit >> 1; + for (shift_register_t low = 0, high = highbit, base = 0; high < num_iter; + low += 8, high += 8, base += 4) { + // shifted-right ancestors + // low and low_plus_one share low_past_error + // note that they are the same when shifted right by 1 + // same goes for high and high_plus_one + for (shift_register_t offset = 0, base_offset = 0; base_offset < 4; + offset += 2, base_offset += 1) { + distance_pair_key_t low_key = pair_lookup.keys[base + base_offset]; + distance_pair_key_t high_key = pair_lookup.keys[highbase + base + base_offset]; + distance_pair_t low_concat_dist = pair_lookup.distances[low_key]; + distance_pair_t high_concat_dist = pair_lookup.distances[high_key]; + + distance_t low_past_error = read_errors[base + base_offset]; + distance_t high_past_error = read_errors[highbase + base + base_offset]; + + distance_t low_error = (low_concat_dist & 0xffff) + low_past_error; + distance_t high_error = (high_concat_dist & 0xffff) + high_past_error; + + shift_register_t successor = low + offset; + distance_t error; + uint8_t history_mask; + if (low_error <= high_error) { + error = low_error; + history_mask = 0; + } else { + error = high_error; + history_mask = 1; + } + write_errors[successor] = error; + history[successor] = history_mask; + + shift_register_t low_plus_one = low + offset + 1; + + distance_t low_plus_one_error = (low_concat_dist >> 16) + low_past_error; + distance_t high_plus_one_error = (high_concat_dist >> 16) + high_past_error; + + shift_register_t plus_one_successor = low_plus_one; + distance_t plus_one_error; + uint8_t plus_one_history_mask; + if (low_plus_one_error <= high_plus_one_error) { + plus_one_error = low_plus_one_error; + plus_one_history_mask = 0; + } else { + plus_one_error = high_plus_one_error; + plus_one_history_mask = 1; + } + write_errors[plus_one_successor] = plus_one_error; + history[plus_one_successor] = plus_one_history_mask; + } + } + + history_buffer_process(conv->history_buffer, write_errors, conv->bit_writer); + error_buffer_swap(conv->errors); + } +} + +void convolutional_decode_tail(correct_convolutional *conv, unsigned int sets, + const uint8_t *soft) { + // flush state registers + // now we only shift in 0s, skipping 1-successors + shift_register_t highbit = 1 << (conv->order - 1); + for (unsigned int i = sets - conv->order + 1; i < sets; i++) { + // lasterrors are the aggregate bit errors for the states of shiftregister for the previous + // time slice + const distance_t *read_errors = conv->errors->read_errors; + // aggregate bit errors for this time slice + distance_t *write_errors = conv->errors->write_errors; + + uint8_t *history = history_buffer_get_slice(conv->history_buffer); + + // calculate the distance from all output states to our sliced bits + distance_t *distances = conv->distances; + if (soft) { + if (conv->soft_measurement == CORRECT_SOFT_LINEAR) { + for (unsigned int j = 0; j < 1 << (conv->rate); j++) { + distances[j] = + metric_soft_distance_linear(j, soft + i * conv->rate, conv->rate); + } + } else { + for (unsigned int j = 0; j < 1 << (conv->rate); j++) { + distances[j] = + metric_soft_distance_quadratic(j, soft + i * conv->rate, conv->rate); + } + } + } else { + unsigned int out = bit_reader_read(conv->bit_reader, conv->rate); + for (unsigned int i = 0; i < 1 << (conv->rate); i++) { + distances[i] = metric_distance(i, out); + } + } + const unsigned int *table = conv->table; + + // a mask to get the high order bit from the shift register + unsigned int num_iter = highbit << 1; + unsigned int skip = 1 << (conv->order - (sets - i)); + unsigned int base_skip = skip >> 1; + + shift_register_t highbase = highbit >> 1; + for (shift_register_t low = 0, high = highbit, base = 0; high < num_iter; + low += skip, high += skip, base += base_skip) { + unsigned int low_output = table[low]; + unsigned int high_output = table[high]; + distance_t low_dist = distances[low_output]; + distance_t high_dist = distances[high_output]; + + distance_t low_past_error = read_errors[base]; + distance_t high_past_error = read_errors[highbase + base]; + + distance_t low_error = low_dist + low_past_error; + distance_t high_error = high_dist + high_past_error; + + shift_register_t successor = low; + distance_t error; + uint8_t history_mask; + if (low_error < high_error) { + error = low_error; + history_mask = 0; + } else { + error = high_error; + history_mask = 1; + } + write_errors[successor] = error; + history[successor] = history_mask; + } + + history_buffer_process_skip(conv->history_buffer, write_errors, conv->bit_writer, skip); + error_buffer_swap(conv->errors); + } +} + +void _convolutional_decode_init(correct_convolutional *conv, unsigned int min_traceback, + unsigned int traceback_length, unsigned int renormalize_interval) { + conv->has_init_decode = true; + + conv->distances = calloc(1 << (conv->rate), sizeof(distance_t)); + conv->pair_lookup = pair_lookup_create(conv->rate, conv->order, conv->table); + + conv->soft_measurement = CORRECT_SOFT_LINEAR; + + // we limit history to go back as far as 5 * the order of our polynomial + conv->history_buffer = history_buffer_create(min_traceback, traceback_length, renormalize_interval, + conv->numstates / 2, 1 << (conv->order - 1)); + + conv->errors = error_buffer_create(conv->numstates); +} + +static ssize_t _convolutional_decode(correct_convolutional *conv, size_t num_encoded_bits, + size_t num_encoded_bytes, uint8_t *msg, + const soft_t *soft_encoded) { + if (!conv->has_init_decode) { + uint64_t max_error_per_input = conv->rate * soft_max; + unsigned int renormalize_interval = distance_max / max_error_per_input; + _convolutional_decode_init(conv, 5 * conv->order, 15 * conv->order, renormalize_interval); + } + + size_t sets = num_encoded_bits / conv->rate; + // XXX fix this vvvvvv + size_t decoded_len_bytes = num_encoded_bytes; + bit_writer_reconfigure(conv->bit_writer, msg, decoded_len_bytes); + + error_buffer_reset(conv->errors); + history_buffer_reset(conv->history_buffer); + + // no outputs are generated during warmup + convolutional_decode_warmup(conv, sets, soft_encoded); + convolutional_decode_inner(conv, sets, soft_encoded); + convolutional_decode_tail(conv, sets, soft_encoded); + + history_buffer_flush(conv->history_buffer, conv->bit_writer); + + return bit_writer_length(conv->bit_writer); +} + +// perform viterbi decoding +// hard decoder +ssize_t correct_convolutional_decode(correct_convolutional *conv, const uint8_t *encoded, + size_t num_encoded_bits, uint8_t *msg) { + if (num_encoded_bits % conv->rate) { + // XXX turn this into an error code + // printf("encoded length of message must be a multiple of rate\n"); + return -1; + } + + size_t num_encoded_bytes = + (num_encoded_bits % 8) ? (num_encoded_bits / 8 + 1) : (num_encoded_bits / 8); + bit_reader_reconfigure(conv->bit_reader, encoded, num_encoded_bytes); + + return _convolutional_decode(conv, num_encoded_bits, num_encoded_bytes, msg, NULL); +} + +ssize_t correct_convolutional_decode_soft(correct_convolutional *conv, const soft_t *encoded, + size_t num_encoded_bits, uint8_t *msg) { + if (num_encoded_bits % conv->rate) { + // XXX turn this into an error code + // printf("encoded length of message must be a multiple of rate\n"); + return -1; + } + + size_t num_encoded_bytes = + (num_encoded_bits % 8) ? (num_encoded_bits / 8 + 1) : (num_encoded_bits / 8); + + return _convolutional_decode(conv, num_encoded_bits, num_encoded_bytes, msg, encoded); +} diff --git a/falcon9_decoder/src/libcorrect/convolutional/encode.c b/falcon9_decoder/src/libcorrect/convolutional/encode.c new file mode 100644 index 00000000..5041262b --- /dev/null +++ b/falcon9_decoder/src/libcorrect/convolutional/encode.c @@ -0,0 +1,61 @@ +#include "correct/convolutional/convolutional.h" + +size_t correct_convolutional_encode_len(correct_convolutional *conv, size_t msg_len) { + size_t msgbits = 8 * msg_len; + size_t encodedbits = conv->rate * (msgbits + conv->order + 1); + return encodedbits; +} + +// shift in most significant bit every time, one byte at a time +// shift register takes most recent bit on right, shifts left +// poly is written in same order, just & mask message w/ poly + +// assume that encoded length is long enough? +size_t correct_convolutional_encode(correct_convolutional *conv, + const uint8_t *msg, + size_t msg_len, + uint8_t *encoded) { + // convolutional code convolves filter coefficients, given by + // the polynomial, with some history from our message. + // the history is stored as single subsequent bits in shiftregister + shift_register_t shiftregister = 0; + + // shiftmask is the shiftregister bit mask that removes bits + // that extend beyond order + // e.g. if order is 7, then remove the 8th bit and beyond + unsigned int shiftmask = (1 << conv->order) - 1; + + size_t encoded_len_bits = correct_convolutional_encode_len(conv, msg_len); + size_t encoded_len = (encoded_len_bits % 8) ? (encoded_len_bits / 8 + 1) : (encoded_len_bits / 8); + bit_writer_reconfigure(conv->bit_writer, encoded, encoded_len); + + bit_reader_reconfigure(conv->bit_reader, msg, msg_len); + + for (size_t i = 0; i < 8 * msg_len; i++) { + // shiftregister has oldest bits on left, newest on right + shiftregister <<= 1; + shiftregister |= bit_reader_read(conv->bit_reader, 1); + shiftregister &= shiftmask; + // shift most significant bit from byte and move down one bit at a time + + // we do direct lookup of our convolutional output here + // all of the bits from this convolution are stored in this row + unsigned int out = conv->table[shiftregister]; + bit_writer_write(conv->bit_writer, out, conv->rate); + } + + // now flush the shiftregister + // this is simply running the loop as above but without any new inputs + // or rather, the new input string is all 0s + for (size_t i = 0; i < conv->order + 1; i++) { + shiftregister <<= 1; + shiftregister &= shiftmask; + unsigned int out = conv->table[shiftregister]; + bit_writer_write(conv->bit_writer, out, conv->rate); + } + + // 0-fill any remaining bits on our final byte + bit_writer_flush_byte(conv->bit_writer); + + return encoded_len_bits; +} diff --git a/falcon9_decoder/src/libcorrect/convolutional/error_buffer.c b/falcon9_decoder/src/libcorrect/convolutional/error_buffer.c new file mode 100644 index 00000000..a5fc0ab5 --- /dev/null +++ b/falcon9_decoder/src/libcorrect/convolutional/error_buffer.c @@ -0,0 +1,43 @@ +#include "correct/convolutional/error_buffer.h" + +error_buffer_t *error_buffer_create(unsigned int num_states) { + error_buffer_t *buf = calloc(1, sizeof(error_buffer_t)); + + // how large are the error buffers? + buf->num_states = num_states; + + // save two error metrics, one for last round and one for this + // (double buffer) + // the error metric is the aggregated number of bit errors found + // at a given path which terminates at a particular shift register state + buf->errors[0] = calloc(sizeof(distance_t), num_states); + buf->errors[1] = calloc(sizeof(distance_t), num_states); + + // which buffer are we using, 0 or 1? + buf->index = 0; + + buf->read_errors = buf->errors[0]; + buf->write_errors = buf->errors[1]; + + return buf; +} + +void error_buffer_destroy(error_buffer_t *buf) { + free(buf->errors[0]); + free(buf->errors[1]); + free(buf); +} + +void error_buffer_reset(error_buffer_t *buf) { + memset(buf->errors[0], 0, buf->num_states * sizeof(distance_t)); + memset(buf->errors[1], 0, buf->num_states * sizeof(distance_t)); + buf->index = 0; + buf->read_errors = buf->errors[0]; + buf->write_errors = buf->errors[1]; +} + +void error_buffer_swap(error_buffer_t *buf) { + buf->read_errors = buf->errors[buf->index]; + buf->index = (buf->index + 1) % 2; + buf->write_errors = buf->errors[buf->index]; +} diff --git a/falcon9_decoder/src/libcorrect/convolutional/history_buffer.c b/falcon9_decoder/src/libcorrect/convolutional/history_buffer.c new file mode 100644 index 00000000..f54ffdd9 --- /dev/null +++ b/falcon9_decoder/src/libcorrect/convolutional/history_buffer.c @@ -0,0 +1,158 @@ +#include "correct/convolutional/history_buffer.h" + +history_buffer *history_buffer_create(unsigned int min_traceback_length, + unsigned int traceback_group_length, + unsigned int renormalize_interval, unsigned int num_states, + shift_register_t highbit) { + history_buffer *buf = calloc(1, sizeof(history_buffer)); + + *(unsigned int *)&buf->min_traceback_length = min_traceback_length; + *(unsigned int *)&buf->traceback_group_length = traceback_group_length; + *(unsigned int *)&buf->cap = min_traceback_length + traceback_group_length; + *(unsigned int *)&buf->num_states = num_states; + *(shift_register_t *)&buf->highbit = highbit; + + buf->history = malloc(buf->cap * sizeof(uint8_t *)); + for (unsigned int i = 0; i < buf->cap; i++) { + buf->history[i] = calloc(num_states, sizeof(uint8_t)); + } + buf->fetched = malloc(buf->cap * sizeof(uint8_t)); + + buf->index = 0; + buf->len = 0; + + buf->renormalize_counter = 0; + buf->renormalize_interval = renormalize_interval; + + return buf; +} + +void history_buffer_destroy(history_buffer *buf) { + for (unsigned int i = 0; i < buf->cap; i++) { + free(buf->history[i]); + } + free(buf->history); + free(buf->fetched); + free(buf); +} + +void history_buffer_reset(history_buffer *buf) { + buf->len = 0; + buf->index = 0; +} + +uint8_t *history_buffer_get_slice(history_buffer *buf) { return buf->history[buf->index]; } + +shift_register_t history_buffer_search(history_buffer *buf, const distance_t *distances, + unsigned int search_every) { + shift_register_t bestpath; + distance_t leasterror = USHRT_MAX; + // search for a state with the least error + for (shift_register_t state = 0; state < buf->num_states; state += search_every) { + if (distances[state] < leasterror) { + leasterror = distances[state]; + bestpath = state; + } + } + return bestpath; +} + +void history_buffer_renormalize(history_buffer *buf, distance_t *distances, + shift_register_t min_register) { + distance_t min_distance = distances[min_register]; + for (shift_register_t i = 0; i < buf->num_states; i++) { + distances[i] -= min_distance; + } +} + +void history_buffer_traceback(history_buffer *buf, shift_register_t bestpath, + unsigned int min_traceback_length, bit_writer_t *output) { + unsigned int fetched_index = 0; + shift_register_t highbit = buf->highbit; + unsigned int index = buf->index; + unsigned int cap = buf->cap; + for (unsigned int j = 0; j < min_traceback_length; j++) { + if (index == 0) { + index = cap - 1; + } else { + index--; + } + // we're walking backwards from what the work we did before + // so, we'll shift high order bits in + // the path will cross multiple different shift register states, and we determine + // which state by going backwards one time slice at a time + uint8_t history = buf->history[index][bestpath]; + shift_register_t pathbit = history ? highbit : 0; + bestpath |= pathbit; + bestpath >>= 1; + } + unsigned int prefetch_index = index; + if (prefetch_index == 0) { + prefetch_index = cap - 1; + } else { + prefetch_index--; + } + unsigned int len = buf->len; + for (unsigned int j = min_traceback_length; j < len; j++) { + index = prefetch_index; + if (prefetch_index == 0) { + prefetch_index = cap - 1; + } else { + prefetch_index--; + } + prefetch(buf->history[prefetch_index]); + // we're walking backwards from what the work we did before + // so, we'll shift high order bits in + // the path will cross multiple different shift register states, and we determine + // which state by going backwards one time slice at a time + uint8_t history = buf->history[index][bestpath]; + shift_register_t pathbit = history ? highbit : 0; + bestpath |= pathbit; + bestpath >>= 1; + buf->fetched[fetched_index] = (pathbit ? 1 : 0); + fetched_index++; + } + bit_writer_write_bitlist_reversed(output, buf->fetched, fetched_index); + buf->len -= fetched_index; +} + +void history_buffer_process_skip(history_buffer *buf, distance_t *distances, bit_writer_t *output, + unsigned int skip) { + buf->index++; + if (buf->index == buf->cap) { + buf->index = 0; + } + + buf->renormalize_counter++; + buf->len++; + + // there are four ways these branches can resolve + // a) we are neither renormalizing nor doing a traceback + // b) we are renormalizing but not doing a traceback + // c) we are renormalizing and doing a traceback + // d) we are not renormalizing but we are doing a traceback + // in case c, we want to save the effort of finding the bestpath + // since that's expensive + // so we have to check for that case after we renormalize + if (buf->renormalize_counter == buf->renormalize_interval) { + buf->renormalize_counter = 0; + shift_register_t bestpath = history_buffer_search(buf, distances, skip); + history_buffer_renormalize(buf, distances, bestpath); + if (buf->len == buf->cap) { + // reuse the bestpath found for renormalizing + history_buffer_traceback(buf, bestpath, buf->min_traceback_length, output); + } + } else if (buf->len == buf->cap) { + // not renormalizing, find the bestpath here + shift_register_t bestpath = history_buffer_search(buf, distances, skip); + history_buffer_traceback(buf, bestpath, buf->min_traceback_length, output); + } +} + +void history_buffer_process(history_buffer *buf, distance_t *distances, bit_writer_t *output) { + history_buffer_process_skip(buf, distances, output, 1); +} + +void history_buffer_flush(history_buffer *buf, bit_writer_t *output) { + history_buffer_traceback(buf, 0, 0, output); +} diff --git a/falcon9_decoder/src/libcorrect/convolutional/lookup.c b/falcon9_decoder/src/libcorrect/convolutional/lookup.c new file mode 100644 index 00000000..8c96aae6 --- /dev/null +++ b/falcon9_decoder/src/libcorrect/convolutional/lookup.c @@ -0,0 +1,74 @@ +#include "correct/convolutional/lookup.h" + +// table has numstates rows +// each row contains all of the polynomial output bits concatenated together +// e.g. for rate 2, we have 2 bits in each row +// the first poly gets the LEAST significant bit, last poly gets most significant +void fill_table(unsigned int rate, + unsigned int order, + const polynomial_t *poly, + unsigned int *table) { + for (shift_register_t i = 0; i < 1 << order; i++) { + unsigned int out = 0; + unsigned int mask = 1; + for (size_t j = 0; j < rate; j++) { + out |= (popcount(i & poly[j]) % 2) ? mask : 0; + mask <<= 1; + } + table[i] = out; + } +} + +pair_lookup_t pair_lookup_create(unsigned int rate, + unsigned int order, + const unsigned int *table) { + pair_lookup_t pairs; + + pairs.keys = malloc(sizeof(unsigned int) * (1 << (order - 1))); + pairs.outputs = calloc((1 << (rate * 2)), sizeof(unsigned int)); + unsigned int *inv_outputs = calloc((1 << (rate * 2)), sizeof(unsigned int)); + unsigned int output_counter = 1; + // for every (even-numbered) shift register state, find the concatenated output of the state + // and the subsequent state that follows it (low bit set). then, check to see if this + // concatenated output has a unique key assigned to it already. if not, give it a key. + // if it does, retrieve the key. assign this key to the shift register state. + for (unsigned int i = 0; i < (1 << (order - 1)); i++) { + // first get the concatenated pair of outputs + unsigned int out = table[i * 2 + 1]; + out <<= rate; + out |= table[i * 2]; + + // does this concatenated output exist in the outputs table yet? + if (!inv_outputs[out]) { + // doesn't exist, allocate a new key + inv_outputs[out] = output_counter; + pairs.outputs[output_counter] = out; + output_counter++; + } + // set the opaque key for the ith shift register state to the concatenated output entry + pairs.keys[i] = inv_outputs[out]; + } + pairs.outputs_len = output_counter; + pairs.output_mask = (1 << (rate)) - 1; + pairs.output_width = rate; + pairs.distances = calloc(pairs.outputs_len, sizeof(distance_pair_t)); + free(inv_outputs); + return pairs; +} + +void pair_lookup_destroy(pair_lookup_t pairs) { + free(pairs.keys); + free(pairs.outputs); + free(pairs.distances); +} + +void pair_lookup_fill_distance(pair_lookup_t pairs, distance_t *distances) { + for (unsigned int i = 1; i < pairs.outputs_len; i += 1) { + output_pair_t concat_out = pairs.outputs[i]; + unsigned int i_0 = concat_out & pairs.output_mask; + concat_out >>= pairs.output_width; + unsigned int i_1 = concat_out; + + pairs.distances[i] = (distances[i_1] << 16) | distances[i_0]; + } +} diff --git a/falcon9_decoder/src/libcorrect/convolutional/metric.c b/falcon9_decoder/src/libcorrect/convolutional/metric.c new file mode 100644 index 00000000..894db4d9 --- /dev/null +++ b/falcon9_decoder/src/libcorrect/convolutional/metric.c @@ -0,0 +1,17 @@ +#include "correct/convolutional/metric.h" + +// measure the square of the euclidean distance between x and y +// since euclidean dist is sqrt(a^2 + b^2 + ... + n^2), the square is just +// a^2 + b^2 + ... + n^2 +distance_t metric_soft_distance_quadratic(unsigned int hard_x, const uint8_t *soft_y, size_t len) { + distance_t dist = 0; + for (unsigned int i = 0; i < len; i++) { + // first, convert hard_x to a soft measurement (0 -> 0, 1 - > 255) + unsigned int soft_x = (hard_x & 1) ? 255 : 0; + hard_x >>= 1; + int d = soft_y[i] - soft_x; + dist += d*d; + } + return dist >> 3; +} + diff --git a/falcon9_decoder/src/libcorrect/convolutional/sse/CMakeLists.txt b/falcon9_decoder/src/libcorrect/convolutional/sse/CMakeLists.txt new file mode 100644 index 00000000..0d0ade9a --- /dev/null +++ b/falcon9_decoder/src/libcorrect/convolutional/sse/CMakeLists.txt @@ -0,0 +1,2 @@ +set(SRCFILES lookup.c convolutional.c encode.c decode.c) +add_library(correct-convolutional-sse OBJECT ${SRCFILES}) diff --git a/falcon9_decoder/src/libcorrect/convolutional/sse/convolutional.c b/falcon9_decoder/src/libcorrect/convolutional/sse/convolutional.c new file mode 100644 index 00000000..484c5c6e --- /dev/null +++ b/falcon9_decoder/src/libcorrect/convolutional/sse/convolutional.c @@ -0,0 +1,21 @@ +#include "correct/convolutional/sse/convolutional.h" + +correct_convolutional_sse *correct_convolutional_sse_create(size_t rate, + size_t order, + const polynomial_t *poly) { + correct_convolutional_sse *conv = malloc(sizeof(correct_convolutional_sse)); + correct_convolutional *init_conv = _correct_convolutional_init(&conv->base_conv, rate, order, poly); + if (!init_conv) { + free(conv); + conv = NULL; + } + return conv; +} + +void correct_convolutional_sse_destroy(correct_convolutional_sse *conv) { + if (conv->base_conv.has_init_decode) { + oct_lookup_destroy(conv->oct_lookup); + } + _correct_convolutional_teardown(&conv->base_conv); + free(conv); +} diff --git a/falcon9_decoder/src/libcorrect/convolutional/sse/decode.c b/falcon9_decoder/src/libcorrect/convolutional/sse/decode.c new file mode 100644 index 00000000..0f6bcf5b --- /dev/null +++ b/falcon9_decoder/src/libcorrect/convolutional/sse/decode.c @@ -0,0 +1,319 @@ +#include "correct/convolutional/sse/convolutional.h" + +static void convolutional_sse_decode_inner(correct_convolutional_sse *sse_conv, unsigned int sets, + const uint8_t *soft) { + correct_convolutional *conv = &sse_conv->base_conv; + shift_register_t highbit = 1 << (conv->order - 1); + unsigned int hist_buf_index = conv->history_buffer->index; + unsigned int hist_buf_cap = conv->history_buffer->cap; + unsigned int hist_buf_len = conv->history_buffer->len; + unsigned int hist_buf_rn_int = conv->history_buffer->renormalize_interval; + unsigned int hist_buf_rn_cnt = conv->history_buffer->renormalize_counter; + for (unsigned int i = conv->order - 1; i < (sets - conv->order + 1); i++) { + distance_t *distances = conv->distances; + // lasterrors are the aggregate bit errors for the states of + // shiftregister for the previous time slice + if (soft) { + if (conv->soft_measurement == CORRECT_SOFT_LINEAR) { + for (unsigned int j = 0; j < 1 << (conv->rate); j++) { + distances[j] = + metric_soft_distance_linear(j, soft + i * conv->rate, conv->rate); + } + } else { + for (unsigned int j = 0; j < 1 << (conv->rate); j++) { + distances[j] = + metric_soft_distance_quadratic(j, soft + i * conv->rate, conv->rate); + } + } + } else { + unsigned int out = bit_reader_read(conv->bit_reader, conv->rate); + for (unsigned int i = 0; i < 1 << (conv->rate); i++) { + distances[i] = metric_distance(i, out); + } + } + oct_lookup_t oct_lookup = sse_conv->oct_lookup; + oct_lookup_fill_distance(oct_lookup, distances); + + // a mask to get the high order bit from the shift register + unsigned int num_iter = highbit << 1; + const distance_t *read_errors = conv->errors->read_errors; + // aggregate bit errors for this time slice + distance_t *write_errors = conv->errors->write_errors; + + uint8_t *history = conv->history_buffer->history[hist_buf_index]; + ; + // walk through all states, ignoring oldest bit + // we will track a best register state (path) and the number of bit + // errors at that path at this time slice + // this loop considers two paths per iteration (high order bit set, + // clear) + // so, it only runs numstates/2 iterations + // we'll update the history for every state and find the path with the + // least aggregated bit errors + + // now run the main loop + // we calculate 2 sets of 2 register states here (4 states per iter) + // this creates 2 sets which share a predecessor, and 2 sets which share + // a successor + // + // the first set definition is the two states that are the same except + // for the least order bit + // these two share a predecessor because their high n - 1 bits are the + // same (differ only by newest bit) + // + // the second set definition is the two states that are the same except + // for the high order bit + // these two share a successor because the oldest high order bit will be + // shifted out, and the other bits will be present in the successor + // + shift_register_t highbase = highbit >> 1; + shift_register_t oct_highbase = highbase >> 2; + for (shift_register_t low = 0, high = highbit, base = 0, oct = 0; high < num_iter; + low += 32, high += 32, base += 16, oct += 4) { + // shifted-right ancestors + // low and low_plus_one share low_past_error + // note that they are the same when shifted right by 1 + // same goes for high and high_plus_one + __m128i past_shuffle_mask = + _mm_set_epi32(0x07060706, 0x05040504, 0x03020302, 0x01000100); + __m128i hist_mask = + _mm_set_epi32(0x80808080, 0x80808080, 0x0e0c0a09, 0x07050301); + + // the loop below calculates 64 register states per loop iteration + // it does this by packing the 128-bit xmm registers with 8, 16-bit + // distances + // 4 of these registers hold distances for convolutional shift + // register states with the high bit cleared + // and 4 hold distances for the corresponding shift register + // states with the high bit set + // since each xmm register holds 8 distances, this adds up to a + // total of 8 * 8 = 64 shift register states + for (shift_register_t offset = 0, base_offset = 0; base_offset < 16; + offset += 32, base_offset += 16) { + // load the past error for the register states with the high + // order bit cleared + __m128i low_past_error = + _mm_loadl_epi64((const __m128i *)(read_errors + base + base_offset)); + __m128i low_past_error0 = + _mm_loadl_epi64((const __m128i *)(read_errors + base + base_offset + 4)); + __m128i low_past_error1 = + _mm_loadl_epi64((const __m128i *)(read_errors + base + base_offset + 8)); + __m128i low_past_error2 = + _mm_loadl_epi64((const __m128i *)(read_errors + base + base_offset + 12)); + + // shuffle the low past error + // register states that differ only by their low order bit share + // a past error + low_past_error = _mm_shuffle_epi8(low_past_error, past_shuffle_mask); + low_past_error0 = _mm_shuffle_epi8(low_past_error0, past_shuffle_mask); + low_past_error1 = _mm_shuffle_epi8(low_past_error1, past_shuffle_mask); + low_past_error2 = _mm_shuffle_epi8(low_past_error2, past_shuffle_mask); + + // repeat past error lookup for register states with high order + // bit set + __m128i high_past_error = + _mm_loadl_epi64((const __m128i *)(read_errors + highbase + base + base_offset)); + __m128i high_past_error0 = _mm_loadl_epi64( + (const __m128i *)(read_errors + highbase + base + base_offset + 4)); + __m128i high_past_error1 = _mm_loadl_epi64( + (const __m128i *)(read_errors + highbase + base + base_offset + 8)); + __m128i high_past_error2 = _mm_loadl_epi64( + (const __m128i *)(read_errors + highbase + base + base_offset + 12)); + + high_past_error = _mm_shuffle_epi8(high_past_error, past_shuffle_mask); + high_past_error0 = _mm_shuffle_epi8(high_past_error0, past_shuffle_mask); + high_past_error1 = _mm_shuffle_epi8(high_past_error1, past_shuffle_mask); + high_past_error2 = _mm_shuffle_epi8(high_past_error2, past_shuffle_mask); + + // __m128i this_shuffle_mask = (__m128i){0x80800100, 0x80800302, + // 0x80800504, 0x80800706}; + + // load the opaque oct distance table keys from out loop index + distance_oct_key_t low_key = oct_lookup.keys[oct + (base_offset / 4)]; + distance_oct_key_t low_key0 = oct_lookup.keys[oct + (base_offset / 4) + 1]; + distance_oct_key_t low_key1 = oct_lookup.keys[oct + (base_offset / 4) + 2]; + distance_oct_key_t low_key2 = oct_lookup.keys[oct + (base_offset / 4) + 3]; + + // load the distances for the register states with high order + // bit cleared + __m128i low_this_error = + _mm_load_si128((const __m128i *)(oct_lookup.distances + low_key)); + __m128i low_this_error0 = + _mm_load_si128((const __m128i *)(oct_lookup.distances + low_key0)); + __m128i low_this_error1 = + _mm_load_si128((const __m128i *)(oct_lookup.distances + low_key1)); + __m128i low_this_error2 = + _mm_load_si128((const __m128i *)(oct_lookup.distances + low_key2)); + + // add the distance for this time slice to the past distances + __m128i low_error = _mm_add_epi16(low_past_error, low_this_error); + __m128i low_error0 = _mm_add_epi16(low_past_error0, low_this_error0); + __m128i low_error1 = _mm_add_epi16(low_past_error1, low_this_error1); + __m128i low_error2 = _mm_add_epi16(low_past_error2, low_this_error2); + + // repeat oct distance table lookup for registers with high + // order bit set + distance_oct_key_t high_key = + oct_lookup.keys[oct_highbase + oct + (base_offset / 4)]; + distance_oct_key_t high_key0 = + oct_lookup.keys[oct_highbase + oct + (base_offset / 4) + 1]; + distance_oct_key_t high_key1 = + oct_lookup.keys[oct_highbase + oct + (base_offset / 4) + 2]; + distance_oct_key_t high_key2 = + oct_lookup.keys[oct_highbase + oct + (base_offset / 4) + 3]; + + __m128i high_this_error = + _mm_load_si128((const __m128i *)(oct_lookup.distances + high_key)); + __m128i high_this_error0 = + _mm_load_si128((const __m128i *)(oct_lookup.distances + high_key0)); + __m128i high_this_error1 = + _mm_load_si128((const __m128i *)(oct_lookup.distances + high_key1)); + __m128i high_this_error2 = + _mm_load_si128((const __m128i *)(oct_lookup.distances + high_key2)); + + __m128i high_error = _mm_add_epi16(high_past_error, high_this_error); + __m128i high_error0 = _mm_add_epi16(high_past_error0, high_this_error0); + __m128i high_error1 = _mm_add_epi16(high_past_error1, high_this_error1); + __m128i high_error2 = _mm_add_epi16(high_past_error2, high_this_error2); + + // distances for this time slice calculated + + // find the least error between registers who differ only in + // their high order bit + __m128i min_error = _mm_min_epu16(low_error, high_error); + __m128i min_error0 = _mm_min_epu16(low_error0, high_error0); + __m128i min_error1 = _mm_min_epu16(low_error1, high_error1); + __m128i min_error2 = _mm_min_epu16(low_error2, high_error2); + + _mm_store_si128((__m128i *)(write_errors + low + offset), min_error); + _mm_store_si128((__m128i *)(write_errors + low + offset + 8), min_error0); + _mm_store_si128((__m128i *)(write_errors + low + offset + 16), min_error1); + _mm_store_si128((__m128i *)(write_errors + low + offset + 24), min_error2); + + // generate history bits as (low_error > least_error) + // this operation fills each element with all 1s if true and 0s + // if false + // in other words, we set the history bit to 1 if + // the register state with high order bit set was the least + // error + __m128i hist = _mm_cmpgt_epi16(low_error, min_error); + // pack the bits down from 16-bit wide to 8-bit wide to + // accomodate history table + hist = _mm_shuffle_epi8(hist, hist_mask); + + __m128i hist0 = _mm_cmpgt_epi16(low_error0, min_error0); + hist0 = _mm_shuffle_epi8(hist0, hist_mask); + + __m128i hist1 = _mm_cmpgt_epi16(low_error1, min_error1); + hist1 = _mm_shuffle_epi8(hist1, hist_mask); + + __m128i hist2 = _mm_cmpgt_epi16(low_error2, min_error2); + hist2 = _mm_shuffle_epi8(hist2, hist_mask); + + // write the least error so that the next time slice sees it as + // the past error + // store the history bits set by cmp and shuffle operations + _mm_storel_epi64((__m128i *)(history + low + offset), hist); + _mm_storel_epi64((__m128i *)(history + low + offset + 8), hist0); + _mm_storel_epi64((__m128i *)(history + low + offset + 16), hist1); + _mm_storel_epi64((__m128i *)(history + low + offset + 24), hist2); + } + } + + // bypass the call to history buffer + // we should really make that function inline and remove this below + if (hist_buf_len == hist_buf_cap - 1 || hist_buf_rn_cnt == hist_buf_rn_int - 1) { + // restore hist buffer state and invoke it + conv->history_buffer->len = hist_buf_len; + conv->history_buffer->index = hist_buf_index; + conv->history_buffer->renormalize_counter = hist_buf_rn_cnt; + history_buffer_process(conv->history_buffer, write_errors, conv->bit_writer); + // restore our local values + hist_buf_len = conv->history_buffer->len; + hist_buf_index = conv->history_buffer->index; + hist_buf_cap = conv->history_buffer->cap; + hist_buf_rn_cnt = conv->history_buffer->renormalize_counter; + } else { + hist_buf_len++; + hist_buf_index++; + if (hist_buf_index == hist_buf_cap) { + hist_buf_index = 0; + } + hist_buf_rn_cnt++; + } + error_buffer_swap(conv->errors); + } + conv->history_buffer->len = hist_buf_len; + conv->history_buffer->index = hist_buf_index; + conv->history_buffer->renormalize_counter = hist_buf_rn_cnt; +} + +static void _convolutional_sse_decode_init(correct_convolutional_sse *conv, + unsigned int min_traceback, + unsigned int traceback_length, + unsigned int renormalize_interval) { + _convolutional_decode_init(&conv->base_conv, min_traceback, traceback_length, + renormalize_interval); + conv->oct_lookup = + oct_lookup_create(conv->base_conv.rate, conv->base_conv.order, conv->base_conv.table); +} + +static ssize_t _convolutional_sse_decode(correct_convolutional_sse *sse_conv, + size_t num_encoded_bits, size_t num_encoded_bytes, + uint8_t *msg, const soft_t *soft_encoded) { + correct_convolutional *conv = &sse_conv->base_conv; + if (!conv->has_init_decode) { + uint64_t max_error_per_input = conv->rate * soft_max; + // sse implementation unfortunately uses signed math on our unsigned values + // reduces usable distance by /2 + unsigned int renormalize_interval = (distance_max / 2) / max_error_per_input; + _convolutional_sse_decode_init(sse_conv, 5 * conv->order, 100 * conv->order, + renormalize_interval); + } + + size_t sets = num_encoded_bits / conv->rate; + // XXX fix this vvvvvv + size_t decoded_len_bytes = num_encoded_bytes; + bit_writer_reconfigure(conv->bit_writer, msg, decoded_len_bytes); + + error_buffer_reset(conv->errors); + history_buffer_reset(conv->history_buffer); + + // no outputs are generated during warmup + convolutional_decode_warmup(conv, sets, soft_encoded); + convolutional_sse_decode_inner(sse_conv, sets, soft_encoded); + convolutional_decode_tail(conv, sets, soft_encoded); + + history_buffer_flush(conv->history_buffer, conv->bit_writer); + + return bit_writer_length(conv->bit_writer); +} + +ssize_t correct_convolutional_sse_decode(correct_convolutional_sse *conv, const uint8_t *encoded, + size_t num_encoded_bits, uint8_t *msg) { + if (num_encoded_bits % conv->base_conv.rate) { + // XXX turn this into an error code + // printf("encoded length of message must be a multiple of rate\n"); + return -1; + } + + size_t num_encoded_bytes = + (num_encoded_bits % 8) ? (num_encoded_bits / 8 + 1) : (num_encoded_bits / 8); + bit_reader_reconfigure(conv->base_conv.bit_reader, encoded, num_encoded_bytes); + + return _convolutional_sse_decode(conv, num_encoded_bits, num_encoded_bytes, msg, NULL); +} + +ssize_t correct_convolutional_sse_decode_soft(correct_convolutional_sse *conv, const soft_t *encoded, + size_t num_encoded_bits, uint8_t *msg) { + if (num_encoded_bits % conv->base_conv.rate) { + // XXX turn this into an error code + // printf("encoded length of message must be a multiple of rate\n"); + return -1; + } + + size_t num_encoded_bytes = + (num_encoded_bits % 8) ? (num_encoded_bits / 8 + 1) : (num_encoded_bits / 8); + + return _convolutional_sse_decode(conv, num_encoded_bits, num_encoded_bytes, msg, encoded); +} diff --git a/falcon9_decoder/src/libcorrect/convolutional/sse/encode.c b/falcon9_decoder/src/libcorrect/convolutional/sse/encode.c new file mode 100644 index 00000000..92ea10df --- /dev/null +++ b/falcon9_decoder/src/libcorrect/convolutional/sse/encode.c @@ -0,0 +1,9 @@ +#include "correct/convolutional/sse/convolutional.h" + +size_t correct_convolutional_sse_encode_len(correct_convolutional_sse *conv, size_t msg_len) { + return correct_convolutional_encode_len(&conv->base_conv, msg_len); +} + +size_t correct_convolutional_sse_encode(correct_convolutional_sse *conv, const uint8_t *msg, size_t msg_len, uint8_t *encoded) { + return correct_convolutional_encode(&conv->base_conv, msg, msg_len, encoded); +} diff --git a/falcon9_decoder/src/libcorrect/convolutional/sse/lookup.c b/falcon9_decoder/src/libcorrect/convolutional/sse/lookup.c new file mode 100644 index 00000000..472dd8f9 --- /dev/null +++ b/falcon9_decoder/src/libcorrect/convolutional/sse/lookup.c @@ -0,0 +1,183 @@ +#include "correct/convolutional/sse/lookup.h" + +quad_lookup_t quad_lookup_create(unsigned int rate, + unsigned int order, + const unsigned int *table) { + quad_lookup_t quads; + + quads.keys = malloc(sizeof(unsigned int) * (1 << (order - 2))); + quads.outputs = calloc((1 << (rate * 4)), sizeof(unsigned int)); + unsigned int *inv_outputs = calloc((1 << (rate * 4)), sizeof(unsigned int)); + unsigned int output_counter = 1; + // for every (even-numbered) shift register state, find the concatenated output of the state + // and the subsequent state that follows it (low bit set). then, check to see if this + // concatenated output has a unique key assigned to it already. if not, give it a key. + // if it does, retrieve the key. assign this key to the shift register state. + for (unsigned int i = 0; i < (1 << (order - 2)); i++) { + // first get the concatenated quad of outputs + unsigned int out = table[i * 4 + 3]; + out <<= rate; + out |= table[i * 4 + 2]; + out <<= rate; + out |= table[i * 4 + 1]; + out <<= rate; + out |= table[i * 4]; + + // does this concatenated output exist in the outputs table yet? + if (!inv_outputs[out]) { + // doesn't exist, allocate a new key + inv_outputs[out] = output_counter; + quads.outputs[output_counter] = out; + output_counter++; + } + // set the opaque key for the ith shift register state to the concatenated output entry + quads.keys[i] = inv_outputs[out]; + } + quads.outputs_len = output_counter; + quads.output_mask = (1 << (rate)) - 1; + quads.output_width = rate; + quads.distances = calloc(quads.outputs_len, sizeof(distance_quad_t)); + free(inv_outputs); + return quads; +} + +void quad_lookup_destroy(quad_lookup_t quads) { + free(quads.keys); + free(quads.outputs); + free(quads.distances); +} + +void quad_lookup_fill_distance(quad_lookup_t quads, distance_t *distances) { + for (unsigned int i = 1; i < quads.outputs_len; i += 1) { + output_quad_t concat_out = quads.outputs[i]; + unsigned int i_0 = concat_out & quads.output_mask; + concat_out >>= quads.output_width; + unsigned int i_1 = concat_out & quads.output_mask; + concat_out >>= quads.output_width; + unsigned int i_2 = concat_out & quads.output_mask; + concat_out >>= quads.output_width; + unsigned int i_3 = concat_out; + + quads.distances[i] = ((uint64_t)distances[i_3] << 48) | ((uint64_t)distances[i_2] << 32) | (distances[i_1] << 16) | distances[i_0]; + } +} + +distance_oct_key_t oct_lookup_find_key(output_oct_t *outputs, output_oct_t out, size_t num_keys) { + for (size_t i = 1; i < num_keys; i++) { + if (outputs[i] == out) { + return i; + } + } + return 0; +} + +oct_lookup_t oct_lookup_create(unsigned int rate, + unsigned int order, + const unsigned int *table) { + oct_lookup_t octs; + + octs.keys = malloc((1 << (order - 3)) * sizeof(distance_oct_key_t)); + octs.outputs = malloc(((output_oct_t)2 << rate) * sizeof(uint64_t)); + output_oct_t *short_outs = calloc(((output_oct_t)2 << rate), sizeof(output_oct_t)); + size_t outputs_len = 2 << rate; + unsigned int output_counter = 1; + // for every (even-numbered) shift register state, find the concatenated output of the state + // and the subsequent state that follows it (low bit set). then, check to see if this + // concatenated output has a unique key assigned to it already. if not, give it a key. + // if it does, retrieve the key. assign this key to the shift register state. + for (shift_register_t i = 0; i < (1 << (order - 3)); i++) { + // first get the concatenated oct of outputs + output_oct_t out = table[i * 8 + 7]; + out <<= rate; + out |= table[i * 8 + 6]; + out <<= rate; + out |= table[i * 8 + 5]; + out <<= rate; + out |= table[i * 8 + 4]; + out <<= rate; + out |= table[i * 8 + 3]; + out <<= rate; + out |= table[i * 8 + 2]; + out <<= rate; + out |= table[i * 8 + 1]; + out <<= rate; + out |= table[i * 8]; + + distance_oct_key_t key = oct_lookup_find_key(short_outs, out, output_counter); + // does this concatenated output exist in the outputs table yet? + if (!key) { + // doesn't exist, allocate a new key + // now build it in expanded form + output_oct_t expanded_out = table[i * 8 + 7]; + expanded_out <<= 8; + expanded_out |= table[i * 8 + 6]; + expanded_out <<= 8; + expanded_out |= table[i * 8 + 5]; + expanded_out <<= 8; + expanded_out |= table[i * 8 + 4]; + expanded_out <<= 8; + expanded_out |= table[i * 8 + 3]; + expanded_out <<= 8; + expanded_out |= table[i * 8 + 2]; + expanded_out <<= 8; + expanded_out |= table[i * 8 + 1]; + expanded_out <<= 8; + expanded_out |= table[i * 8]; + + if (output_counter == outputs_len) { + octs.outputs = realloc(octs.outputs, outputs_len * 2 * sizeof(output_oct_t)); + short_outs = realloc(short_outs, outputs_len * 2 * sizeof(output_oct_t)); + outputs_len *= 2; + } + short_outs[output_counter] = out; + octs.outputs[output_counter] = expanded_out; + key = output_counter; + output_counter++; + } + // set the opaque key for the ith shift register state to the concatenated output entry + // we multiply the key by 2 since the distances are strided by 2 + octs.keys[i] = key * 2; + } + free(short_outs); + octs.outputs_len = output_counter; + octs.output_mask = (1 << (rate)) - 1; + octs.output_width = rate; + octs.distances = malloc(octs.outputs_len * 2 * sizeof(uint64_t)); + return octs; +} + +void oct_lookup_destroy(oct_lookup_t octs) { + free(octs.keys); + free(octs.outputs); + free(octs.distances); +} + +// WIP: sse approach to filling the distance table +/* +void oct_lookup_fill_distance_sse(oct_lookup_t octs, distance_t *distances) { + distance_pair_t *distance_pair = (distance_pair_t*)octs.distances; + __v4si index_shuffle_mask = (__v4si){0xffffff00, 0xffffff01, 0xffffff02, 0xffffff03}; + __m256i dist_shuffle_mask = (__m256i){0x01000504, 0x09080d0c, 0xffffffff, 0xffffffff, + 0x01000504, 0x09080d0c, 0xffffffff, 0xffffffff}; + const int dist_permute_mask = 0x0c; + for (unsigned int i = 1; i < octs.outputs_len; i += 2) { + // big heaping todo vvv + // a) we want 16 bit distances GATHERed, not 32 bit + // b) we need to load 8 of those distances, not 4 + __v4si short_concat_index = _mm_loadl_epi64(octs.outputs + 2*i); + __v4si short_concat_index0 = _mm_loadl_epi64(octs.outputs + 2*i + 1); + __m256i concat_index = _mm256_cvtepu8_epi32(short_concat_index); + __m256i concat_index0 = _mm256_cvtepu8_epi32(short_concat_index0); + __m256i dist = _mm256_i32gather_epi32(distances, concat_index, sizeof(distance_t)); + __m256i dist0 = _mm256_i32gather_epi32(distances, concat_index0, sizeof(distance_t)); + dist = _mm256_shuffle_epi8(dist, dist_shuffle_mask); + dist0 = _mm256_shuffle_epi8(dist0, dist_shuffle_mask); + dist = __builtin_shufflevector(dist, dist, 0, 5, 0, 0); + dist0 = __builtin_shufflevector(dist0, dist0, 0, 5, 0, 0); + __v4si packed_dist = _mm256_castsi256_si128(dist); + _mm_store_si128(distance_pair + 8 * i, packed_dist); + __v4si packed_dist0 = _mm256_castsi256_si128(dist0); + _mm_store_si128(distance_pair + 8 * i + 4, packed_dist0); + } +} +*/ diff --git a/falcon9_decoder/src/libcorrect/correct-sse.h b/falcon9_decoder/src/libcorrect/correct-sse.h new file mode 100644 index 00000000..9372f195 --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct-sse.h @@ -0,0 +1,30 @@ +#ifndef CORRECT_SSE_H +#define CORRECT_SSE_H +#include + +struct correct_convolutional_sse; +typedef struct correct_convolutional_sse correct_convolutional_sse; + +/* SSE versions of libcorrect's convolutional encoder/decoder. + * These instances should not be used with the non-sse functions, + * and non-sse instances should not be used with the sse functions. + */ + +correct_convolutional_sse *correct_convolutional_sse_create( + size_t rate, size_t order, const correct_convolutional_polynomial_t *poly); + +void correct_convolutional_sse_destroy(correct_convolutional_sse *conv); + +size_t correct_convolutional_sse_encode_len(correct_convolutional_sse *conv, size_t msg_len); + +size_t correct_convolutional_sse_encode(correct_convolutional_sse *conv, const uint8_t *msg, + size_t msg_len, uint8_t *encoded); + +ssize_t correct_convolutional_sse_decode(correct_convolutional_sse *conv, const uint8_t *encoded, + size_t num_encoded_bits, uint8_t *msg); + +ssize_t correct_convolutional_sse_decode_soft(correct_convolutional_sse *conv, + const correct_convolutional_soft_t *encoded, + size_t num_encoded_bits, uint8_t *msg); + +#endif diff --git a/falcon9_decoder/src/libcorrect/correct.h b/falcon9_decoder/src/libcorrect/correct.h new file mode 100644 index 00000000..12d817db --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct.h @@ -0,0 +1,277 @@ +#ifndef CORRECT_H +#define CORRECT_H +#include + +#ifndef _MSC_VER +#include +#else +#include +typedef ptrdiff_t ssize_t; +#endif + + + +// Convolutional Codes + +// Convolutional polynomials are 16 bits wide +typedef uint16_t correct_convolutional_polynomial_t; + +static const correct_convolutional_polynomial_t correct_conv_r12_6_polynomial[] = {073, 061}; +static const correct_convolutional_polynomial_t correct_conv_r12_7_polynomial[] = {0161, 0127}; +static const correct_convolutional_polynomial_t correct_conv_r12_8_polynomial[] = {0225, 0373}; +static const correct_convolutional_polynomial_t correct_conv_r12_9_polynomial[] = {0767, 0545}; +static const correct_convolutional_polynomial_t correct_conv_r13_6_polynomial[] = {053, 075, 047}; +static const correct_convolutional_polynomial_t correct_conv_r13_7_polynomial[] = {0137, 0153, + 0121}; +static const correct_convolutional_polynomial_t correct_conv_r13_8_polynomial[] = {0333, 0257, + 0351}; +static const correct_convolutional_polynomial_t correct_conv_r13_9_polynomial[] = {0417, 0627, + 0675}; + +typedef uint8_t correct_convolutional_soft_t; + +struct correct_convolutional; +typedef struct correct_convolutional correct_convolutional; + +/* correct_convolutional_create allocates and initializes an encoder/decoder for + * a convolutional code with the given parameters. This function expects that + * poly will contain inv_rate elements. E.g., to create a conv. code instance + * with rate 1/2, order 7 and polynomials 0161, 0127, call + * correct_convolutional_create(2, 7, []correct_convolutional_polynomial_t{0161, 0127}); + * + * If this call is successful, it returns a non-NULL pointer. + */ +correct_convolutional *correct_convolutional_create(size_t inv_rate, size_t order, + const correct_convolutional_polynomial_t *poly); + +/* correct_convolutional_destroy releases all resources associated + * with conv. This pointer should not be used for further calls + * after calling destroy. + */ +void correct_convolutional_destroy(correct_convolutional *conv); + +/* correct_convolutional_encode_len returns the number of *bits* + * in a msg_len of given size, in *bytes*. In order to convert + * this returned length to bytes, save the result of the length + * modulo 8. If it's nonzero, then the length in bytes is + * length/8 + 1. If it is zero, then the length is just + * length/8. + */ +size_t correct_convolutional_encode_len(correct_convolutional *conv, size_t msg_len); + +/* correct_convolutional_encode uses the given conv instance to + * encode a block of data and write it to encoded. The length of + * encoded must be long enough to hold the resulting encoded length, + * which can be calculated by calling correct_convolutional_encode_len. + * However, this length should first be converted to bytes, as that + * function returns the length in bits. + * + * This function returns the number of bits written to encoded. If + * this is not an exact multiple of 8, then it occupies an additional + * byte. + */ +size_t correct_convolutional_encode(correct_convolutional *conv, const uint8_t *msg, size_t msg_len, + uint8_t *encoded); + +/* correct_convolutional_decode uses the given conv instance to + * decode a block encoded by correct_convolutional_encode. This + * call can cope with some bits being corrupted. This function + * cannot detect if there are too many bits corrupted, however, + * and will still write a message even if it is not recovered + * correctly. It is up to the user to perform checksums or CRC + * in order to guarantee that the decoded message is intact. + * + * num_encoded_bits should contain the length of encoded in *bits*. + * This value need not be an exact multiple of 8. However, + * it must be a multiple of the inv_rate used to create + * the conv instance. + * + * This function writes the result to msg, which must be large + * enough to hold the decoded message. A good conservative size + * for this buffer is the number of encoded bits multiplied by the + * rate of the code, e.g. for a rate 1/2 code, divide by 2. This + * value should then be converted to bytes to find the correct + * length for msg. + * + * This function returns the number of bytes written to msg. If + * it fails, it returns -1. + */ +ssize_t correct_convolutional_decode(correct_convolutional *conv, const uint8_t *encoded, + size_t num_encoded_bits, uint8_t *msg); + +/* correct_convolutional_decode_soft uses the given conv instance + * to decode a block encoded by correct_convolutional_encode and + * then modulated/demodulated to 8-bit symbols. This function expects + * that 1 is mapped to 255 and 0 to 0. An erased symbol should be + * set to 128. The decoded message may contain errors. + * + * num_encoded_bits should contain the length of encoded in *bits*. + * This value need not be an exact multiple of 8. However, + * it must be a multiple of the inv_rate used to create + * the conv instance. + * + * This function writes the result to msg, which must be large + * enough to hold the decoded message. A good conservative size + * for this buffer is the number of encoded bits multiplied by the + * rate of the code, e.g. for a rate 1/2 code, divide by 2. This + * value should then be converted to bytes to find the correct + * length for msg. + * + * This function returns the number of bytes written to msg. If + * it fails, it returns -1. + */ +ssize_t correct_convolutional_decode_soft(correct_convolutional *conv, + const correct_convolutional_soft_t *encoded, + size_t num_encoded_bits, uint8_t *msg); + +// Reed-Solomon + +struct correct_reed_solomon; +typedef struct correct_reed_solomon correct_reed_solomon; + +static const uint16_t correct_rs_primitive_polynomial_8_4_3_2_0 = + 0x11d; // x^8 + x^4 + x^3 + x^2 + 1 + +static const uint16_t correct_rs_primitive_polynomial_8_5_3_1_0 = + 0x12b; // x^8 + x^5 + x^3 + x + 1 + +static const uint16_t correct_rs_primitive_polynomial_8_5_3_2_0 = + 0x12d; // x^8 + x^5 + x^3 + x^2 + 1 + +static const uint16_t correct_rs_primitive_polynomial_8_6_3_2_0 = + 0x14d; // x^8 + x^6 + x^3 + x^2 + 1 + +static const uint16_t correct_rs_primitive_polynomial_8_6_4_3_2_1_0 = + 0x15f; // x^8 + x^6 + x^4 + x^3 + x^2 + x + 1; + +static const uint16_t correct_rs_primitive_polynomial_8_6_5_1_0 = + 0x163; // x^8 + x^6 + x^5 + x + 1 + +static const uint16_t correct_rs_primitive_polynomial_8_6_5_2_0 = + 0x165; // x^8 + x^6 + x^5 + x^2 + 1 + +static const uint16_t correct_rs_primitive_polynomial_8_6_5_3_0 = + 0x169; // x^8 + x^6 + x^5 + x^3 + 1 + +static const uint16_t correct_rs_primitive_polynomial_8_6_5_4_0 = + 0x171; // x^8 + x^6 + x^5 + x^4 + 1 + +static const uint16_t correct_rs_primitive_polynomial_8_7_2_1_0 = + 0x187; // x^8 + x^7 + x^2 + x + 1 + +static const uint16_t correct_rs_primitive_polynomial_8_7_3_2_0 = + 0x18d; // x^8 + x^7 + x^3 + x^2 + 1 + +static const uint16_t correct_rs_primitive_polynomial_8_7_5_3_0 = + 0x1a9; // x^8 + x^7 + x^5 + x^3 + 1 + +static const uint16_t correct_rs_primitive_polynomial_8_7_6_1_0 = + 0x1c3; // x^8 + x^7 + x^6 + x + 1 + +static const uint16_t correct_rs_primitive_polynomial_8_7_6_3_2_1_0 = + 0x1cf; // x^8 + x^7 + x^6 + x^3 + x^2 + x + 1 + +static const uint16_t correct_rs_primitive_polynomial_8_7_6_5_2_1_0 = + 0x1e7; // x^8 + x^7 + x^6 + x^5 + x^2 + x + 1 + +static const uint16_t correct_rs_primitive_polynomial_8_7_6_5_4_2_0 = + 0x1f5; // x^8 + x^7 + x^6 + x^5 + x^4 + x^2 + 1 + +static const uint16_t correct_rs_primitive_polynomial_ccsds = + 0x187; // x^8 + x^7 + x^2 + x + 1 + +/* correct_reed_solomon_create allocates and initializes an + * encoder/decoder for a given reed solomon error correction + * code. The block size must be 255 bytes with 8-bit symbols. + * + * This block can repair corrupted bytes. It can handle as + * many as num_roots/2 bytes having corruption and still recover + * the encoded payload. However, using more num_roots + * adds more parity overhead and substantially increases + * the computational time for decoding. + * + * primitive_polynomial should be one of the given values in this + * file. Sane values for first_consecutive_root and + * generator_root_gap are 1 and 1. Not all combinations of + * values produce valid codes. + */ +correct_reed_solomon *correct_reed_solomon_create(uint16_t primitive_polynomial, + uint8_t first_consecutive_root, + uint8_t generator_root_gap, + size_t num_roots); + +/* correct_reed_solomon_encode uses the rs instance to encode + * parity information onto a block of data. msg_length should be + * no more than the payload size for one block e.g. no more + * than 223 for a (255, 223) code. Shorter blocks will be encoded + * with virtual padding where the padding is not emitted. + * + * encoded should be at least msg_length + parity length bytes long + * + * It is allowable for msg and encoded to be the same pointer. In + * that case, the parity bytes will be written after the msg bytes + * end. + * + * This function returns the number of bytes written to encoded. + */ +ssize_t correct_reed_solomon_encode(correct_reed_solomon *rs, const uint8_t *msg, size_t msg_length, + uint8_t *encoded); + +/* correct_reed_solomon_decode uses the rs instance to decode + * a payload from a block containing payload and parity bytes. + * This function can recover in spite of some bytes being corrupted. + * + * In most cases, if the block is too corrupted, this function + * will return -1 and not perform decoding. It is possible but + * unlikely that the payload written to msg will contain + * errors when this function returns a positive value. + * + * msg should be long enough to contain a decoded payload for + * this encoded block. + * + * This function returns a positive number of bytes written to msg + * if it has decoded or -1 if it has encountered an error. + */ +ssize_t correct_reed_solomon_decode(correct_reed_solomon *rs, const uint8_t *encoded, + size_t encoded_length, uint8_t *msg); + +/* correct_reed_solomon_decode_with_erasures uses the rs + * instance to decode a payload from a block containing payload + * and parity bytes. Additionally, the user can provide the + * indices of bytes which have been suspected to be corrupted. + * This erasure information is typically provided by a demodulating + * or receiving device. This function can recover with + * some additional errors on top of the erasures. + * + * In order to successfully decode, the quantity + * (num_erasures + 2*num_errors) must be less than + * num_roots. + * + * erasure_locations shold contain erasure_length items. + * erasure_length should not exceed the number of parity + * bytes encoded into this block. + * + * In most cases, if the block is too corrupted, this function + * will return -1 and not perform decoding. It is possible but + * unlikely that the payload written to msg will contain + * errors when this function returns a positive value. + * + * msg should be long enough to contain a decoded payload for + * this encoded block. + * + * This function returns a positive number of bytes written to msg + * if it has decoded or -1 if it has encountered an error. + */ +ssize_t correct_reed_solomon_decode_with_erasures(correct_reed_solomon *rs, const uint8_t *encoded, + size_t encoded_length, + const uint8_t *erasure_locations, + size_t erasure_length, uint8_t *msg); + +/* correct_reed_solomon_destroy releases the resources + * associated with rs. This pointer should not be + * used for any functions after this call. + */ +void correct_reed_solomon_destroy(correct_reed_solomon *rs); + +#endif + diff --git a/falcon9_decoder/src/libcorrect/correct/convolutional.h b/falcon9_decoder/src/libcorrect/correct/convolutional.h new file mode 100644 index 00000000..06b17108 --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct/convolutional.h @@ -0,0 +1,28 @@ +#ifndef CORRECT_CONVOLUTIONAL +#define CORRECT_CONVOLUTIONAL +#include +#include +#include +#include +#include +#include +#include +#include + +#include "correct.h" +#include "correct/portable.h" + +typedef unsigned int shift_register_t; +typedef uint16_t polynomial_t; +typedef uint64_t path_t; +typedef uint8_t soft_t; +static const soft_t soft_max = UINT8_MAX; + +typedef uint16_t distance_t; +static const distance_t distance_max = UINT16_MAX; + +typedef enum { + CORRECT_SOFT_LINEAR, + CORRECT_SOFT_QUADRATIC, +} soft_measurement_t; +#endif diff --git a/falcon9_decoder/src/libcorrect/correct/convolutional/bit.h b/falcon9_decoder/src/libcorrect/correct/convolutional/bit.h new file mode 100644 index 00000000..64838986 --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct/convolutional/bit.h @@ -0,0 +1,44 @@ +#ifndef CORRECT_CONVOLUTIONAL_BIT +#define CORRECT_CONVOLUTIONAL_BIT +#include "correct/convolutional.h" + +typedef struct { + uint8_t current_byte; + unsigned int current_byte_len; + uint8_t *bytes; + size_t byte_index; + size_t len; +} bit_writer_t; + +bit_writer_t *bit_writer_create(uint8_t *bytes, size_t len); + +void bit_writer_reconfigure(bit_writer_t *w, uint8_t *bytes, size_t len); + +void bit_writer_destroy(bit_writer_t *w); + +void bit_writer_write(bit_writer_t *w, uint8_t val, unsigned int n); + +void bit_writer_write_1(bit_writer_t *w, uint8_t val); + +void bit_writer_write_bitlist_reversed(bit_writer_t *w, uint8_t *l, size_t len); + +void bit_writer_flush_byte(bit_writer_t *w); + +size_t bit_writer_length(bit_writer_t *w); + +typedef struct { + uint8_t current_byte; + size_t byte_index; + size_t len; + size_t current_byte_len; + const uint8_t *bytes; +} bit_reader_t; + +bit_reader_t *bit_reader_create(const uint8_t *bytes, size_t len); + +void bit_reader_reconfigure(bit_reader_t *r, const uint8_t *bytes, size_t len); + +void bit_reader_destroy(bit_reader_t *r); + +uint8_t bit_reader_read(bit_reader_t *r, unsigned int n); +#endif diff --git a/falcon9_decoder/src/libcorrect/correct/convolutional/convolutional.h b/falcon9_decoder/src/libcorrect/correct/convolutional/convolutional.h new file mode 100644 index 00000000..b0f66f09 --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct/convolutional/convolutional.h @@ -0,0 +1,40 @@ +#ifndef CORRECT_CONVOLUTIONAL_H +#define CORRECT_CONVOLUTIONAL_H +#include "correct/convolutional.h" +#include "correct/convolutional/bit.h" +#include "correct/convolutional/metric.h" +#include "correct/convolutional/lookup.h" +#include "correct/convolutional/history_buffer.h" +#include "correct/convolutional/error_buffer.h" + +struct correct_convolutional { + const unsigned int *table; // size 2**order + size_t rate; // e.g. 2, 3... + size_t order; // e.g. 7, 9... + unsigned int numstates; // 2**order + bit_writer_t *bit_writer; + bit_reader_t *bit_reader; + + bool has_init_decode; + distance_t *distances; + pair_lookup_t pair_lookup; + soft_measurement_t soft_measurement; + history_buffer *history_buffer; + error_buffer_t *errors; +}; + +correct_convolutional *_correct_convolutional_init(correct_convolutional *conv, + size_t rate, size_t order, + const polynomial_t *poly); +void _correct_convolutional_teardown(correct_convolutional *conv); + +// portable versions +void _convolutional_decode_init(correct_convolutional *conv, unsigned int min_traceback, unsigned int traceback_length, unsigned int renormalize_interval); +void convolutional_decode_warmup(correct_convolutional *conv, unsigned int sets, + const uint8_t *soft); +void convolutional_decode_inner(correct_convolutional *conv, unsigned int sets, + const uint8_t *soft); +void convolutional_decode_tail(correct_convolutional *conv, unsigned int sets, + const uint8_t *soft); +#endif + diff --git a/falcon9_decoder/src/libcorrect/correct/convolutional/error_buffer.h b/falcon9_decoder/src/libcorrect/correct/convolutional/error_buffer.h new file mode 100644 index 00000000..12e3f756 --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct/convolutional/error_buffer.h @@ -0,0 +1,15 @@ +#include "correct/convolutional.h" + +typedef struct { + unsigned int index; + distance_t *errors[2]; + unsigned int num_states; + + const distance_t *read_errors; + distance_t *write_errors; +} error_buffer_t; + +error_buffer_t *error_buffer_create(unsigned int num_states); +void error_buffer_destroy(error_buffer_t *buf); +void error_buffer_reset(error_buffer_t *buf); +void error_buffer_swap(error_buffer_t *buf); diff --git a/falcon9_decoder/src/libcorrect/correct/convolutional/history_buffer.h b/falcon9_decoder/src/libcorrect/correct/convolutional/history_buffer.h new file mode 100644 index 00000000..ca01e21b --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct/convolutional/history_buffer.h @@ -0,0 +1,59 @@ +#include "correct/convolutional.h" +#include "correct/convolutional/bit.h" + +// ring buffer of path histories +// generates output bits after accumulating sufficient history +typedef struct { + // history entries must be at least this old to be decoded + const unsigned int min_traceback_length; + // we'll decode entries in bursts. this tells us the length of the burst + const unsigned int traceback_group_length; + // we will store a total of cap entries. equal to min_traceback_length + + // traceback_group_length + const unsigned int cap; + + // how many states in the shift register? this is one of the dimensions of + // history table + const unsigned int num_states; + // what's the high order bit of the shift register? + const shift_register_t highbit; + + // history is a compact history representation for every shift register + // state, + // one bit per time slice + uint8_t **history; + + // which slice are we writing next? + unsigned int index; + + // how many valid entries are there? + unsigned int len; + + // temporary store of fetched bits + uint8_t *fetched; + + // how often should we renormalize? + unsigned int renormalize_interval; + unsigned int renormalize_counter; +} history_buffer; + +history_buffer *history_buffer_create(unsigned int min_traceback_length, + unsigned int traceback_group_length, + unsigned int renormalize_interval, + unsigned int num_states, + shift_register_t highbit); +void history_buffer_destroy(history_buffer *buf); +void history_buffer_reset(history_buffer *buf); +void history_buffer_step(history_buffer *buf); +uint8_t *history_buffer_get_slice(history_buffer *buf); +shift_register_t history_buffer_search(history_buffer *buf, + const distance_t *distances, + unsigned int search_every); +void history_buffer_traceback(history_buffer *buf, shift_register_t bestpath, + unsigned int min_traceback_length, + bit_writer_t *output); +void history_buffer_process_skip(history_buffer *buf, distance_t *distances, + bit_writer_t *output, unsigned int skip); +void history_buffer_process(history_buffer *buf, distance_t *distances, + bit_writer_t *output); +void history_buffer_flush(history_buffer *buf, bit_writer_t *output); diff --git a/falcon9_decoder/src/libcorrect/correct/convolutional/lookup.h b/falcon9_decoder/src/libcorrect/correct/convolutional/lookup.h new file mode 100644 index 00000000..c2573897 --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct/convolutional/lookup.h @@ -0,0 +1,27 @@ +#ifndef CORRECT_CONVOLUTIONAL_LOOKUP +#define CORRECT_CONVOLUTIONAL_LOOKUP +#include "correct/convolutional.h" + +typedef unsigned int distance_pair_key_t; +typedef uint32_t output_pair_t; +typedef uint32_t distance_pair_t; + +typedef struct { + distance_pair_key_t *keys; + output_pair_t *outputs; + output_pair_t output_mask; + unsigned int output_width; + size_t outputs_len; + distance_pair_t *distances; +} pair_lookup_t; + +void fill_table(unsigned int order, + unsigned int rate, + const polynomial_t *poly, + unsigned int *table); +pair_lookup_t pair_lookup_create(unsigned int rate, + unsigned int order, + const unsigned int *table); +void pair_lookup_destroy(pair_lookup_t pairs); +void pair_lookup_fill_distance(pair_lookup_t pairs, distance_t *distances); +#endif diff --git a/falcon9_decoder/src/libcorrect/correct/convolutional/metric.h b/falcon9_decoder/src/libcorrect/correct/convolutional/metric.h new file mode 100644 index 00000000..5e1cd55a --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct/convolutional/metric.h @@ -0,0 +1,20 @@ +#include "correct/convolutional.h" + +// measure the hamming distance of two bit strings +// implemented as population count of x XOR y +static inline distance_t metric_distance(unsigned int x, unsigned int y) { + return popcount(x ^ y); +} + +static inline distance_t metric_soft_distance_linear(unsigned int hard_x, const uint8_t *soft_y, size_t len) { + distance_t dist = 0; + for (unsigned int i = 0; i < len; i++) { + unsigned int soft_x = ((int8_t)(0) - (hard_x & 1)) & 0xff; + hard_x >>= 1; + int d = soft_y[i] - soft_x; + dist += (d < 0) ? -d : d; + } + return dist; +} + +distance_t metric_soft_distance_quadratic(unsigned int hard_x, const uint8_t *soft_y, size_t len); diff --git a/falcon9_decoder/src/libcorrect/correct/convolutional/sse/convolutional.h b/falcon9_decoder/src/libcorrect/correct/convolutional/sse/convolutional.h new file mode 100644 index 00000000..bf4ad880 --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct/convolutional/sse/convolutional.h @@ -0,0 +1,15 @@ +#include "correct/convolutional/convolutional.h" +#include "correct/convolutional/sse/lookup.h" +// BIG HEAPING TODO sort out the include mess +#include "correct-sse.h" +#ifdef _MSC_VER +#include +#else +#include +#endif + + +struct correct_convolutional_sse { + correct_convolutional base_conv; + oct_lookup_t oct_lookup; +}; diff --git a/falcon9_decoder/src/libcorrect/correct/convolutional/sse/lookup.h b/falcon9_decoder/src/libcorrect/correct/convolutional/sse/lookup.h new file mode 100644 index 00000000..53e6e8a5 --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct/convolutional/sse/lookup.h @@ -0,0 +1,65 @@ +#include "correct/convolutional/lookup.h" +#ifdef _MSC_VER +#include +#else +#include +#endif + +typedef unsigned int distance_quad_key_t; +typedef unsigned int output_quad_t; +typedef uint64_t distance_quad_t; + +typedef struct { + distance_quad_key_t *keys; + output_quad_t *outputs; + output_quad_t output_mask; + unsigned int output_width; + size_t outputs_len; + distance_quad_t *distances; +} quad_lookup_t; + +typedef uint16_t distance_oct_key_t; +typedef uint64_t output_oct_t; +typedef uint64_t distance_oct_t; + +typedef struct { + distance_oct_key_t *keys; + output_oct_t *outputs; + output_oct_t output_mask; + unsigned int output_width; + size_t outputs_len; + distance_oct_t *distances; +} oct_lookup_t; + +quad_lookup_t quad_lookup_create(unsigned int rate, + unsigned int order, + const unsigned int *table); +void quad_lookup_destroy(quad_lookup_t quads); +void quad_lookup_fill_distance(quad_lookup_t quads, distance_t *distances); +distance_oct_key_t oct_lookup_find_key(output_oct_t *outputs, output_oct_t out, size_t num_keys); +oct_lookup_t oct_lookup_create(unsigned int rate, + unsigned int order, + const unsigned int *table); +void oct_lookup_destroy(oct_lookup_t octs); +static inline void oct_lookup_fill_distance(oct_lookup_t octs, distance_t *distances) { + distance_pair_t *pairs = (distance_pair_t*)octs.distances; + for (unsigned int i = 1; i < octs.outputs_len; i += 1) { + output_oct_t concat_out = octs.outputs[i]; + unsigned int i_0 = concat_out & 0xff; + unsigned int i_1 = (concat_out >> 8) & 0xff; + unsigned int i_2 = (concat_out >> 16) & 0xff; + unsigned int i_3 = (concat_out >> 24) & 0xff; + + pairs[i*4 + 1] = distances[i_3] << 16 | distances[i_2]; + pairs[i*4 + 0] = distances[i_1] << 16 | distances[i_0]; + + concat_out >>= 32; + unsigned int i_4 = concat_out & 0xff; + unsigned int i_5 = (concat_out >> 8) & 0xff; + unsigned int i_6 = (concat_out >> 16) & 0xff; + unsigned int i_7 = (concat_out >> 24) & 0xff; + + pairs[i*4 + 3] = distances[i_7] << 16 | distances[i_6]; + pairs[i*4 + 2] = distances[i_5] << 16 | distances[i_4]; + } +} diff --git a/falcon9_decoder/src/libcorrect/correct/portable.h b/falcon9_decoder/src/libcorrect/correct/portable.h new file mode 100644 index 00000000..0c62e9fb --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct/portable.h @@ -0,0 +1,20 @@ +#ifdef __GNUC__ +#define HAVE_BUILTINS +#endif + + +#ifdef HAVE_BUILTINS +#define popcount __builtin_popcount +#define prefetch __builtin_prefetch +#else + +static inline int popcount(int x) { + /* taken from the helpful http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel */ + x = x - ((x >> 1) & 0x55555555); + x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + return ((x + (x >> 4) & 0x0f0f0f0f) * 0x01010101) >> 24; +} + +static inline void prefetch(void *x) {} + +#endif diff --git a/falcon9_decoder/src/libcorrect/correct/reed-solomon.h b/falcon9_decoder/src/libcorrect/correct/reed-solomon.h new file mode 100644 index 00000000..94d3efbb --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct/reed-solomon.h @@ -0,0 +1,76 @@ +#ifndef CORRECT_REED_SOLOMON +#define CORRECT_REED_SOLOMON +#include +#include +#include +#include +#include +#include + +#include "correct.h" +#include "correct/portable.h" + +// an element in GF(2^8) +typedef uint8_t field_element_t; + +// a power of the primitive element alpha +typedef uint8_t field_logarithm_t; + +// give us some bits of headroom to do arithmetic +// variables of this type aren't really in any proper space +typedef uint16_t field_operation_t; + +// generated by find_poly +typedef struct { + const field_element_t *exp; + const field_logarithm_t *log; +} field_t; + +typedef struct { + field_element_t *coeff; + unsigned int order; +} polynomial_t; + +struct correct_reed_solomon { + size_t block_length; + size_t message_length; + size_t min_distance; + + field_logarithm_t first_consecutive_root; + field_logarithm_t generator_root_gap; + + field_t field; + + polynomial_t generator; + field_element_t *generator_roots; + field_logarithm_t **generator_root_exp; + + polynomial_t encoded_polynomial; + polynomial_t encoded_remainder; + + field_element_t *syndromes; + field_element_t *modified_syndromes; + polynomial_t received_polynomial; + polynomial_t error_locator; + polynomial_t error_locator_log; + polynomial_t erasure_locator; + field_element_t *error_roots; + field_element_t *error_vals; + field_logarithm_t *error_locations; + + field_logarithm_t **element_exp; + + // scratch + // (do no allocations at steady state) + + // used during find_error_locator + polynomial_t last_error_locator; + + // used during error value search + polynomial_t error_evaluator; + polynomial_t error_locator_derivative; + polynomial_t init_from_roots_scratch[2]; + bool has_init_decode; + +}; +#endif diff --git a/falcon9_decoder/src/libcorrect/correct/reed-solomon/decode.h b/falcon9_decoder/src/libcorrect/correct/reed-solomon/decode.h new file mode 100644 index 00000000..db1e011c --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct/reed-solomon/decode.h @@ -0,0 +1,3 @@ +#include "correct/reed-solomon.h" +#include "correct/reed-solomon/field.h" +#include "correct/reed-solomon/polynomial.h" diff --git a/falcon9_decoder/src/libcorrect/correct/reed-solomon/encode.h b/falcon9_decoder/src/libcorrect/correct/reed-solomon/encode.h new file mode 100644 index 00000000..db1e011c --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct/reed-solomon/encode.h @@ -0,0 +1,3 @@ +#include "correct/reed-solomon.h" +#include "correct/reed-solomon/field.h" +#include "correct/reed-solomon/polynomial.h" diff --git a/falcon9_decoder/src/libcorrect/correct/reed-solomon/field.h b/falcon9_decoder/src/libcorrect/correct/reed-solomon/field.h new file mode 100644 index 00000000..ef656a59 --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct/reed-solomon/field.h @@ -0,0 +1,167 @@ +#ifndef CORRECT_REED_SOLOMON_FIELD +#define CORRECT_REED_SOLOMON_FIELD +#include "correct/reed-solomon.h" + +/* +field_t field_create(field_operation_t primitive_poly); +void field_destroy(field_t field); +field_element_t field_add(field_t field, field_element_t l, field_element_t r); +field_element_t field_sub(field_t field, field_element_t l, field_element_t r); +field_element_t field_sum(field_t field, field_element_t elem, unsigned int n); +field_element_t field_mul(field_t field, field_element_t l, field_element_t r); +field_element_t field_div(field_t field, field_element_t l, field_element_t r); +field_logarithm_t field_mul_log(field_t field, field_logarithm_t l, field_logarithm_t r); +field_logarithm_t field_div_log(field_t field, field_logarithm_t l, field_logarithm_t r); +field_element_t field_mul_log_element(field_t field, field_logarithm_t l, field_logarithm_t r); +field_element_t field_pow(field_t field, field_element_t elem, int pow); +*/ + +static inline field_element_t field_mul_log_element(field_t field, field_logarithm_t l, field_logarithm_t r) { + // like field_mul_log, but returns a field_element_t + // because we are doing lookup here, we can safely skip the wrapover check + field_operation_t res = (field_operation_t)l + (field_operation_t)r; + return field.exp[res]; +} + +static inline field_t field_create(field_operation_t primitive_poly) { + // in GF(2^8) + // log and exp + // bits are in GF(2), compute alpha^val in GF(2^8) + // exp should be of size 512 so that it can hold a "wraparound" which prevents some modulo ops + // log should be of size 256. no wraparound here, the indices into this table are field elements + field_element_t *exp = malloc(512 * sizeof(field_element_t)); + field_logarithm_t *log = malloc(256 * sizeof(field_logarithm_t)); + + // assume alpha is a primitive element, p(x) (primitive_poly) irreducible in GF(2^8) + // addition is xor + // subtraction is addition (also xor) + // e.g. x^5 + x^4 + x^4 + x^2 + 1 = x^5 + x^2 + 1 + // each row of exp contains the field element found by exponentiating + // alpha by the row index + // each row of log contains the coefficients of + // alpha^7 + alpha^6 + alpha^5 + alpha^4 + alpha^3 + alpha^2 + alpha + 1 + // as 8 bits packed into one byte + + field_operation_t element = 1; + exp[0] = (field_element_t)element; + log[0] = (field_logarithm_t)0; // really, it's undefined. we shouldn't ever access this + for (field_operation_t i = 1; i < 512; i++) { + element = element * 2; + element = (element > 255) ? (element ^ primitive_poly) : element; + exp[i] = (field_element_t)element; + if (i < 256) { + log[element] = (field_logarithm_t)i; + } + } + + field_t field; + *(field_element_t **)&field.exp = exp; + *(field_logarithm_t **)&field.log = log; + + return field; +} + +static inline void field_destroy(field_t field) { + free(*(field_element_t **)&field.exp); + free(*(field_element_t **)&field.log); +} + +static inline field_element_t field_add(field_t field, field_element_t l, field_element_t r) { + return l ^ r; +} + +static inline field_element_t field_sub(field_t field, field_element_t l, field_element_t r) { + return l ^ r; +} + +static inline field_element_t field_sum(field_t field, field_element_t elem, unsigned int n) { + // we'll do a closed-form expression of the sum, although we could also + // choose to call field_add n times + + // since the sum is actually the bytewise XOR operator, this suggests two + // kinds of values: n odd, and n even + + // if you sum once, you have coeff + // if you sum twice, you have coeff XOR coeff = 0 + // if you sum thrice, you are back at coeff + // an even number of XORs puts you at 0 + // an odd number of XORs puts you back at your value + + // so, just throw away all the even n + return (n % 2) ? elem : 0; +} + +static inline field_element_t field_mul(field_t field, field_element_t l, field_element_t r) { + if (l == 0 || r == 0) { + return 0; + } + // multiply two field elements by adding their logarithms. + // yep, get your slide rules out + field_operation_t res = (field_operation_t)field.log[l] + (field_operation_t)field.log[r]; + + // if coeff exceeds 255, we would normally have to wrap it back around + // alpha^255 = 1; alpha^256 = alpha^255 * alpha^1 = alpha^1 + // however, we've constructed exponentiation table so that + // we can just directly lookup this result + // the result must be clamped to [0, 511] + // the greatest we can see at this step is alpha^255 * alpha^255 + // = alpha^510 + return field.exp[res]; +} + +static inline field_element_t field_div(field_t field, field_element_t l, field_element_t r) { + if (l == 0) { + return 0; + } + + if (r == 0) { + // XXX ??? + return 0; + } + + // division as subtraction of logarithms + + // if rcoeff is larger, then log[l] - log[r] wraps under + // so, instead, always add 255. in some cases, we'll wrap over, but + // that's ok because the exp table runs up to 511. + field_operation_t res = (field_operation_t)255 + (field_operation_t)field.log[l] - (field_operation_t)field.log[r]; + return field.exp[res]; +} + +static inline field_logarithm_t field_mul_log(field_t field, field_logarithm_t l, field_logarithm_t r) { + // this function performs the equivalent of field_mul on two logarithms + // we save a little time by skipping the lookup step at the beginning + field_operation_t res = (field_operation_t)l + (field_operation_t)r; + + // because we arent using the table, the value we return must be a valid logarithm + // which we have decided must live in [0, 255] (they are 8-bit values) + // ensuring this makes it so that multiple muls will not reach past the end of the + // exp table whenever we finally convert back to an element + if (res > 255) { + return (field_logarithm_t)(res - 255); + } + return (field_logarithm_t)res; +} + +static inline field_logarithm_t field_div_log(field_t field, field_logarithm_t l, field_logarithm_t r) { + // like field_mul_log, this performs field_div without going through a field_element_t + field_operation_t res = (field_operation_t)255 + (field_operation_t)l - (field_operation_t)r; + if (res > 255) { + return (field_logarithm_t)(res - 255); + } + return (field_logarithm_t)res; +} + +static inline field_element_t field_pow(field_t field, field_element_t elem, int pow) { + // take the logarithm, multiply, and then "exponentiate" + // n.b. the exp table only considers powers of alpha, the primitive element + // but here we have an arbitrary coeff + field_logarithm_t log = field.log[elem]; + int res_log = log * pow; + int mod = res_log % 255; + if (mod < 0) { + mod += 255; + } + return field.exp[mod]; +} +#endif diff --git a/falcon9_decoder/src/libcorrect/correct/reed-solomon/polynomial.h b/falcon9_decoder/src/libcorrect/correct/reed-solomon/polynomial.h new file mode 100644 index 00000000..e084e11e --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct/reed-solomon/polynomial.h @@ -0,0 +1,14 @@ +#include "correct/reed-solomon.h" +#include "correct/reed-solomon/field.h" + +polynomial_t polynomial_create(unsigned int order); +void polynomial_destroy(polynomial_t polynomial); +void polynomial_mul(field_t field, polynomial_t l, polynomial_t r, polynomial_t res); +void polynomial_mod(field_t field, polynomial_t dividend, polynomial_t divisor, polynomial_t mod); +void polynomial_formal_derivative(field_t field, polynomial_t poly, polynomial_t der); +field_element_t polynomial_eval(field_t field, polynomial_t poly, field_element_t val); +field_element_t polynomial_eval_lut(field_t field, polynomial_t poly, const field_logarithm_t *val_exp); +field_element_t polynomial_eval_log_lut(field_t field, polynomial_t poly_log, const field_logarithm_t *val_exp); +void polynomial_build_exp_lut(field_t field, field_element_t val, unsigned int order, field_logarithm_t *val_exp); +polynomial_t polynomial_init_from_roots(field_t field, unsigned int nroots, field_element_t *roots, polynomial_t poly, polynomial_t *scratch); +polynomial_t polynomial_create_from_roots(field_t field, unsigned int nroots, field_element_t *roots); diff --git a/falcon9_decoder/src/libcorrect/correct/reed-solomon/reed-solomon.h b/falcon9_decoder/src/libcorrect/correct/reed-solomon/reed-solomon.h new file mode 100644 index 00000000..db1e011c --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct/reed-solomon/reed-solomon.h @@ -0,0 +1,3 @@ +#include "correct/reed-solomon.h" +#include "correct/reed-solomon/field.h" +#include "correct/reed-solomon/polynomial.h" diff --git a/falcon9_decoder/src/libcorrect/correct/util/error-sim-fec.h b/falcon9_decoder/src/libcorrect/correct/util/error-sim-fec.h new file mode 100644 index 00000000..b3deeb1d --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct/util/error-sim-fec.h @@ -0,0 +1,8 @@ +#include "correct/util/error-sim.h" + +#include + +void conv_fec27_decode(void *conv_v, uint8_t *soft, size_t soft_len, uint8_t *msg); +void conv_fec29_decode(void *conv_v, uint8_t *soft, size_t soft_len, uint8_t *msg); +void conv_fec39_decode(void *conv_v, uint8_t *soft, size_t soft_len, uint8_t *msg); +void conv_fec615_decode(void *conv_v, uint8_t *soft, size_t soft_len, uint8_t *msg); diff --git a/falcon9_decoder/src/libcorrect/correct/util/error-sim-shim.h b/falcon9_decoder/src/libcorrect/correct/util/error-sim-shim.h new file mode 100644 index 00000000..aed1fc14 --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct/util/error-sim-shim.h @@ -0,0 +1,7 @@ +#include "correct/util/error-sim.h" +#include "fec_shim.h" + +ssize_t conv_shim27_decode(void *conv_v, uint8_t *soft, size_t soft_len, uint8_t *msg); +ssize_t conv_shim29_decode(void *conv_v, uint8_t *soft, size_t soft_len, uint8_t *msg); +ssize_t conv_shim39_decode(void *conv_v, uint8_t *soft, size_t soft_len, uint8_t *msg); +ssize_t conv_shim615_decode(void *conv_v, uint8_t *soft, size_t soft_len, uint8_t *msg); diff --git a/falcon9_decoder/src/libcorrect/correct/util/error-sim-sse.h b/falcon9_decoder/src/libcorrect/correct/util/error-sim-sse.h new file mode 100644 index 00000000..2447854b --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct/util/error-sim-sse.h @@ -0,0 +1,7 @@ +#include "correct/util/error-sim.h" + +#include "correct-sse.h" + +size_t conv_correct_sse_enclen(void *conv_v, size_t msg_len); +void conv_correct_sse_encode(void *conv_v, uint8_t *msg, size_t msg_len, uint8_t *encoded); +ssize_t conv_correct_sse_decode(void *conv_v, uint8_t *soft, size_t soft_len, uint8_t *msg); diff --git a/falcon9_decoder/src/libcorrect/correct/util/error-sim.h b/falcon9_decoder/src/libcorrect/correct/util/error-sim.h new file mode 100644 index 00000000..e75c5a1e --- /dev/null +++ b/falcon9_decoder/src/libcorrect/correct/util/error-sim.h @@ -0,0 +1,47 @@ +#include +#include +#include +#include +#include +#include + +#include "correct.h" +#include "correct/portable.h" + +size_t distance(uint8_t *a, uint8_t *b, size_t len); +void gaussian(double *res, size_t n_res, double sigma); + +void encode_bpsk(uint8_t *msg, double *voltages, size_t n_syms, double bpsk_voltage); +void byte2bit(uint8_t *bytes, uint8_t *bits, size_t n_bits); +void decode_bpsk(uint8_t *soft, uint8_t *msg, size_t n_syms); +void decode_bpsk_soft(double *voltages, uint8_t *soft, size_t n_syms, double bpsk_voltage); +double log2amp(double l); +double amp2log(double a); +double sigma_for_eb_n0(double eb_n0, double bpsk_bit_energy); +void build_white_noise(double *noise, size_t n_syms, double eb_n0, double bpsk_bit_energy); +void add_white_noise(double *signal, double *noise, size_t n_syms); + +typedef struct { + uint8_t *msg_out; + size_t msg_len; + uint8_t *encoded; + double *v; + double *corrupted; + uint8_t *soft; + double *noise; + size_t enclen; + size_t enclen_bytes; + void (*encode)(void *, uint8_t *msg, size_t msg_len, uint8_t *encoded); + void *encoder; + ssize_t (*decode)(void *, uint8_t *soft, size_t soft_len, uint8_t *msg); + void *decoder; +} conv_testbench; + +conv_testbench *resize_conv_testbench(conv_testbench *scratch, size_t (*enclen)(void *, size_t), void *enc, size_t msg_len); +void free_scratch(conv_testbench *scratch); +int test_conv_noise(conv_testbench *scratch, uint8_t *msg, size_t n_bytes, + double bpsk_voltage); + +size_t conv_correct_enclen(void *conv_v, size_t msg_len); +void conv_correct_encode(void *conv_v, uint8_t *msg, size_t msg_len, uint8_t *encoded); +ssize_t conv_correct_decode(void *conv_v, uint8_t *soft, size_t soft_len, uint8_t *msg); diff --git a/falcon9_decoder/src/libcorrect/fec_shim.c b/falcon9_decoder/src/libcorrect/fec_shim.c new file mode 100644 index 00000000..146c60ce --- /dev/null +++ b/falcon9_decoder/src/libcorrect/fec_shim.c @@ -0,0 +1,255 @@ +#include +#include + +#include "fec_shim.h" + +typedef struct { + correct_reed_solomon *rs; + unsigned int msg_length; + unsigned int block_length; + unsigned int num_roots; + uint8_t *msg_out; + unsigned int pad; + uint8_t *erasures; +} reed_solomon_shim; + +void *init_rs_char(int symbol_size, int primitive_polynomial, + int first_consecutive_root, int root_gap, int number_roots, + unsigned int pad) { + if (symbol_size != 8) { + return NULL; + } + + reed_solomon_shim *shim = malloc(sizeof(reed_solomon_shim)); + + shim->pad = pad; + shim->block_length = 255 - pad; + shim->num_roots = number_roots; + shim->msg_length = shim->block_length - number_roots; + shim->rs = correct_reed_solomon_create(primitive_polynomial, + first_consecutive_root, root_gap, number_roots); + shim->msg_out = malloc(shim->block_length); + shim->erasures = malloc(number_roots); + + return shim; +} + +void free_rs_char(void *rs) { + reed_solomon_shim *shim = (reed_solomon_shim *)rs; + correct_reed_solomon_destroy(shim->rs); + free(shim->msg_out); + free(shim->erasures); + free(shim); +} + +void encode_rs_char(void *rs, const unsigned char *msg, unsigned char *parity) { + reed_solomon_shim *shim = (reed_solomon_shim *)rs; + correct_reed_solomon_encode(shim->rs, msg, shim->msg_length, shim->msg_out); + memcpy(parity, shim->msg_out + shim->msg_length, shim->num_roots); +} + +void decode_rs_char(void *rs, unsigned char *block, int *erasure_locations, + int num_erasures) { + reed_solomon_shim *shim = (reed_solomon_shim *)rs; + for (int i = 0; i < num_erasures; i++) { + shim->erasures[i] = (uint8_t)(erasure_locations[i]) - shim->pad; + } + correct_reed_solomon_decode_with_erasures(shim->rs, block, shim->block_length, + shim->erasures, num_erasures, + block); +} + +typedef struct { + correct_convolutional *conv; + unsigned int rate; + unsigned int order; + uint8_t *buf; + size_t buf_len; + uint8_t *read_iter; + uint8_t *write_iter; +} convolutional_shim; + +static correct_convolutional_polynomial_t r12k7[] = {V27POLYA, V27POLYB}; + +static correct_convolutional_polynomial_t r12k9[] = {V29POLYA, V29POLYB}; + +static correct_convolutional_polynomial_t r13k9[] = {V39POLYA, V39POLYB, + V39POLYC}; + +static correct_convolutional_polynomial_t r16k15[] = { + V615POLYA, V615POLYB, V615POLYC, V615POLYD, V615POLYE, V615POLYF}; + +/* Common methods */ +static void *create_viterbi(unsigned int num_decoded_bits, unsigned int rate, + unsigned int order, + correct_convolutional_polynomial_t *poly) { + convolutional_shim *shim = malloc(sizeof(convolutional_shim)); + + size_t num_decoded_bytes = (num_decoded_bits % 8) + ? (num_decoded_bits / 8 + 1) + : num_decoded_bits / 8; + + shim->rate = rate; + shim->order = order; + shim->buf = malloc(num_decoded_bytes); + shim->buf_len = num_decoded_bytes; + shim->conv = correct_convolutional_create(rate, order, poly); + shim->read_iter = shim->buf; + shim->write_iter = shim->buf; + + return shim; +} + +static void delete_viterbi(void *vit) { + convolutional_shim *shim = (convolutional_shim *)vit; + free(shim->buf); + correct_convolutional_destroy(shim->conv); + free(shim); +} + +static void init_viterbi(void *vit) { + convolutional_shim *shim = (convolutional_shim *)vit; + shim->read_iter = shim->buf; + shim->write_iter = shim->buf; +} + +static void update_viterbi_blk(void *vit, const unsigned char *encoded_soft, + unsigned int num_encoded_groups) { + convolutional_shim *shim = (convolutional_shim *)vit; + + // don't overwrite our buffer + size_t rem = (shim->buf + shim->buf_len) - shim->write_iter; + size_t rem_bits = 8 * rem; + // this math isn't very clear + // here we sort of do the opposite of what liquid-dsp does + size_t n_write_bits = num_encoded_groups - (shim->order - 1); + if (n_write_bits > rem_bits) { + size_t reduction = n_write_bits - rem_bits; + num_encoded_groups -= reduction; + n_write_bits -= reduction; + } + + // what if n_write_bits isn't a multiple of 8? + // libcorrect can't start and stop at arbitrary indices... + correct_convolutional_decode_soft( + shim->conv, encoded_soft, num_encoded_groups * shim->rate, shim->write_iter); + shim->write_iter += n_write_bits / 8; +} + +static void chainback_viterbi(void *vit, unsigned char *decoded, + unsigned int num_decoded_bits) { + convolutional_shim *shim = (convolutional_shim *)vit; + + // num_decoded_bits not a multiple of 8? + // this is a similar problem to update_viterbi_blk + // although here we could actually resolve a non-multiple of 8 + size_t rem = shim->write_iter - shim->read_iter; + size_t rem_bits = 8 * rem; + + if (num_decoded_bits > rem_bits) { + num_decoded_bits = rem_bits; + } + + size_t num_decoded_bytes = (num_decoded_bits % 8) + ? (num_decoded_bits / 8 + 1) + : num_decoded_bits / 8; + memcpy(decoded, shim->read_iter, num_decoded_bytes); + + shim->read_iter += num_decoded_bytes; +} + +/* Rate 1/2, k = 7 */ +void *create_viterbi27(int num_decoded_bits) { + return create_viterbi(num_decoded_bits, 2, 7, r12k7); +} + +void delete_viterbi27(void *vit) { delete_viterbi(vit); } + +int init_viterbi27(void *vit, int _) { + init_viterbi(vit); + return 0; +} + +int update_viterbi27_blk(void *vit, unsigned char *encoded_soft, + int num_encoded_groups) { + update_viterbi_blk(vit, encoded_soft, num_encoded_groups); + return 0; +} + +int chainback_viterbi27(void *vit, unsigned char *decoded, + unsigned int num_decoded_bits, unsigned int _) { + chainback_viterbi(vit, decoded, num_decoded_bits); + return 0; +} + +/* Rate 1/2, k = 9 */ +void *create_viterbi29(int num_decoded_bits) { + return create_viterbi(num_decoded_bits, 2, 9, r12k9); +} + +void delete_viterbi29(void *vit) { delete_viterbi(vit); } + +int init_viterbi29(void *vit, int _) { + init_viterbi(vit); + return 0; +} + +int update_viterbi29_blk(void *vit, unsigned char *encoded_soft, + int num_encoded_groups) { + update_viterbi_blk(vit, encoded_soft, num_encoded_groups); + return 0; +} + +int chainback_viterbi29(void *vit, unsigned char *decoded, + unsigned int num_decoded_bits, unsigned int _) { + chainback_viterbi(vit, decoded, num_decoded_bits); + return 0; +} + +/* Rate 1/3, k = 9 */ +void *create_viterbi39(int num_decoded_bits) { + return create_viterbi(num_decoded_bits, 3, 9, r13k9); +} + +void delete_viterbi39(void *vit) { delete_viterbi(vit); } + +int init_viterbi39(void *vit, int _) { + init_viterbi(vit); + return 0; +} + +int update_viterbi39_blk(void *vit, unsigned char *encoded_soft, + int num_encoded_groups) { + update_viterbi_blk(vit, encoded_soft, num_encoded_groups); + return 0; +} + +int chainback_viterbi39(void *vit, unsigned char *decoded, + unsigned int num_decoded_bits, unsigned int _) { + chainback_viterbi(vit, decoded, num_decoded_bits); + return 0; +} + +/* Rate 1/6, k = 15 */ +void *create_viterbi615(int num_decoded_bits) { + return create_viterbi(num_decoded_bits, 6, 15, r16k15); +} + +void delete_viterbi615(void *vit) { delete_viterbi(vit); } + +int init_viterbi615(void *vit, int _) { + init_viterbi(vit); + return 0; +} + +int update_viterbi615_blk(void *vit, unsigned char *encoded_soft, + int num_encoded_groups) { + update_viterbi_blk(vit, encoded_soft, num_encoded_groups); + return 0; +} + +int chainback_viterbi615(void *vit, unsigned char *decoded, + unsigned int num_decoded_bits, unsigned int _) { + chainback_viterbi(vit, decoded, num_decoded_bits); + return 0; +} diff --git a/falcon9_decoder/src/libcorrect/fec_shim.h b/falcon9_decoder/src/libcorrect/fec_shim.h new file mode 100644 index 00000000..b06e463c --- /dev/null +++ b/falcon9_decoder/src/libcorrect/fec_shim.h @@ -0,0 +1,74 @@ +#ifndef CORRECT_FEC_H +#define CORRECT_FEC_H +// libcorrect's libfec shim header +// this is a partial implementation of libfec +// header signatures derived from found usages of libfec -- some things may be different +#include + +// Reed-Solomon +void *init_rs_char(int symbol_size, int primitive_polynomial, int first_consecutive_root, + int root_gap, int number_roots, unsigned int pad); +void free_rs_char(void *rs); +void encode_rs_char(void *rs, const unsigned char *msg, unsigned char *parity); +void decode_rs_char(void *rs, unsigned char *block, int *erasure_locations, int num_erasures); + +// Convolutional Codes + +// Polynomials +// These have been determined via find_conv_libfec_poly.c +// We could just make up new ones, but we use libfec's here so that +// codes encoded by this library can be decoded by the original libfec +// and vice-versa +#define V27POLYA 0155 +#define V27POLYB 0117 + +#define V29POLYA 0657 +#define V29POLYB 0435 + +#define V39POLYA 0755 +#define V39POLYB 0633 +#define V39POLYC 0447 + +#define V615POLYA 042631 +#define V615POLYB 047245 +#define V615POLYC 056507 +#define V615POLYD 073363 +#define V615POLYE 077267 +#define V615POLYF 064537 + +// Convolutional Methods +void *create_viterbi27(int num_decoded_bits); +int init_viterbi27(void *vit, int _mystery); +int update_viterbi27_blk(void *vit, unsigned char *encoded_soft, int n_encoded_groups); +int chainback_viterbi27(void *vit, unsigned char *decoded, unsigned int n_decoded_bits, unsigned int _mystery); +void delete_viterbi27(void *vit); + +void *create_viterbi29(int num_decoded_bits); +int init_viterbi29(void *vit, int _mystery); +int update_viterbi29_blk(void *vit, unsigned char *encoded_soft, int n_encoded_groups); +int chainback_viterbi29(void *vit, unsigned char *decoded, unsigned int n_decoded_bits, unsigned int _mystery); +void delete_viterbi29(void *vit); + +void *create_viterbi39(int num_decoded_bits); +int init_viterbi39(void *vit, int _mystery); +int update_viterbi39_blk(void *vit, unsigned char *encoded_soft, int n_encoded_groups); +int chainback_viterbi39(void *vit, unsigned char *decoded, unsigned int n_decoded_bits, unsigned int _mystery); +void delete_viterbi39(void *vit); + +void *create_viterbi615(int num_decoded_bits); +int init_viterbi615(void *vit, int _mystery); +int update_viterbi615_blk(void *vit, unsigned char *encoded_soft, int n_encoded_groups); +int chainback_viterbi615(void *vit, unsigned char *decoded, unsigned int n_decoded_bits, unsigned int _mystery); +void delete_viterbi615(void *vit); + +// Misc other +static inline int parity(unsigned int x) { + /* http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel */ + x ^= x >> 16; + x ^= x >> 8; + x ^= x >> 4; + x &= 0xf; + return (0x6996 >> x) & 1; +} + +#endif diff --git a/falcon9_decoder/src/libcorrect/reed-solomon/CMakeLists.txt b/falcon9_decoder/src/libcorrect/reed-solomon/CMakeLists.txt new file mode 100644 index 00000000..eabe75e5 --- /dev/null +++ b/falcon9_decoder/src/libcorrect/reed-solomon/CMakeLists.txt @@ -0,0 +1,2 @@ +set(SRCFILES polynomial.c reed-solomon.c encode.c decode.c) +add_library(correct-reed-solomon OBJECT ${SRCFILES}) diff --git a/falcon9_decoder/src/libcorrect/reed-solomon/decode.c b/falcon9_decoder/src/libcorrect/reed-solomon/decode.c new file mode 100644 index 00000000..ea30b522 --- /dev/null +++ b/falcon9_decoder/src/libcorrect/reed-solomon/decode.c @@ -0,0 +1,508 @@ +#include "correct/reed-solomon/encode.h" + +// calculate all syndromes of the received polynomial at the roots of the generator +// because we're evaluating at the roots of the generator, and because the transmitted +// polynomial was made to be a product of the generator, we know that the transmitted +// polynomial is 0 at these roots +// any nonzero syndromes we find here are the values of the error polynomial evaluated +// at these roots, so these values give us a window into the error polynomial. if +// these syndromes are all zero, then we can conclude the error polynomial is also +// zero. if they're nonzero, then we know our message received an error in transit. +// returns true if syndromes are all zero +static bool reed_solomon_find_syndromes(field_t field, polynomial_t msgpoly, field_logarithm_t **generator_root_exp, + field_element_t *syndromes, size_t min_distance) { + bool all_zero = true; + memset(syndromes, 0, min_distance * sizeof(field_element_t)); + for (unsigned int i = 0; i < min_distance; i++) { + // profiling reveals that this function takes about 50% of the cpu time of + // decoding. so, in order to speed it up a little, we precompute and save + // the successive powers of the roots of the generator, which are + // located in generator_root_exp + field_element_t eval = polynomial_eval_lut(field, msgpoly, generator_root_exp[i]); + if (eval) { + all_zero = false; + } + syndromes[i] = eval; + } + return all_zero; +} + +// Berlekamp-Massey algorithm to find LFSR that describes syndromes +// returns number of errors and writes the error locator polynomial to rs->error_locator +static unsigned int reed_solomon_find_error_locator(correct_reed_solomon *rs, size_t num_erasures) { + unsigned int numerrors = 0; + + memset(rs->error_locator.coeff, 0, (rs->min_distance + 1) * sizeof(field_element_t)); + + // initialize to f(x) = 1 + rs->error_locator.coeff[0] = 1; + rs->error_locator.order = 0; + + memcpy(rs->last_error_locator.coeff, rs->error_locator.coeff, (rs->min_distance + 1) * sizeof(field_element_t)); + rs->last_error_locator.order = rs->error_locator.order; + + field_element_t discrepancy; + field_element_t last_discrepancy = 1; + unsigned int delay_length = 1; + + for (unsigned int i = rs->error_locator.order; i < rs->min_distance - num_erasures; i++) { + discrepancy = rs->syndromes[i]; + for (unsigned int j = 1; j <= numerrors; j++) { + discrepancy = field_add(rs->field, discrepancy, + field_mul(rs->field, rs->error_locator.coeff[j], rs->syndromes[i - j])); + } + + if (!discrepancy) { + // our existing LFSR describes the new syndrome as well + // leave it as-is but update the number of delay elements + // so that if a discrepancy occurs later we can eliminate it + delay_length++; + continue; + } + + if (2 * numerrors <= i) { + // there's a discrepancy, but we still have room for more taps + // lengthen LFSR by one tap and set weight to eliminate discrepancy + + // shift the last locator by the delay length, multiply by discrepancy, + // and divide by the last discrepancy + // we move down because we're shifting up, and this prevents overwriting + for (int j = rs->last_error_locator.order; j >= 0; j--) { + // the bounds here will be ok since we have a headroom of numerrors + rs->last_error_locator.coeff[j + delay_length] = field_div( + rs->field, field_mul(rs->field, rs->last_error_locator.coeff[j], discrepancy), last_discrepancy); + } + for (int j = delay_length - 1; j >= 0; j--) { + rs->last_error_locator.coeff[j] = 0; + } + + // locator = locator - last_locator + // we will also update last_locator to be locator before this loop takes place + field_element_t temp; + for (int j = 0; j <= (rs->last_error_locator.order + delay_length); j++) { + temp = rs->error_locator.coeff[j]; + rs->error_locator.coeff[j] = + field_add(rs->field, rs->error_locator.coeff[j], rs->last_error_locator.coeff[j]); + rs->last_error_locator.coeff[j] = temp; + } + unsigned int temp_order = rs->error_locator.order; + rs->error_locator.order = rs->last_error_locator.order + delay_length; + rs->last_error_locator.order = temp_order; + + // now last_locator is locator before we started, + // and locator is (locator - (discrepancy/last_discrepancy) * x^(delay_length) * last_locator) + + numerrors = i + 1 - numerrors; + last_discrepancy = discrepancy; + delay_length = 1; + continue; + } + + // no more taps + // unlike the previous case, we are preserving last locator, + // but we'll update locator as before + // we're basically flattening the two loops from the previous case because + // we no longer need to update last_locator + for (int j = rs->last_error_locator.order; j >= 0; j--) { + rs->error_locator.coeff[j + delay_length] = + field_add(rs->field, rs->error_locator.coeff[j + delay_length], + field_div(rs->field, field_mul(rs->field, rs->last_error_locator.coeff[j], discrepancy), + last_discrepancy)); + } + rs->error_locator.order = (rs->last_error_locator.order + delay_length > rs->error_locator.order) + ? rs->last_error_locator.order + delay_length + : rs->error_locator.order; + delay_length++; + } + return rs->error_locator.order; +} + +// find the roots of the error locator polynomial +// Chien search +bool reed_solomon_factorize_error_locator(field_t field, unsigned int num_skip, polynomial_t locator_log, field_element_t *roots, + field_logarithm_t **element_exp) { + // normally it'd be tricky to find all the roots + // but, the finite field is awfully finite... + // just brute force search across every field element + unsigned int root = num_skip; + memset(roots + num_skip, 0, (locator_log.order) * sizeof(field_element_t)); + for (field_operation_t i = 0; i < 256; i++) { + // we make two optimizations here to help this search go faster + // a) we have precomputed the first successive powers of every single element + // in the field. we need at most n powers, where n is the largest possible + // degree of the error locator + // b) we have precomputed the error locator polynomial in log form, which + // helps reduce some lookups that would be done here + if (!polynomial_eval_log_lut(field, locator_log, element_exp[i])) { + roots[root] = (field_element_t)i; + root++; + } + } + // this is where we find out if we are have too many errors to recover from + // berlekamp-massey may have built an error locator that has 0 discrepancy + // on the syndromes but doesn't have enough roots + return root == locator_log.order + num_skip; +} + +// use error locator and syndromes to find the error evaluator polynomial +void reed_solomon_find_error_evaluator(field_t field, polynomial_t locator, polynomial_t syndromes, + polynomial_t error_evaluator) { + // the error evaluator, omega(x), is S(x)*Lamba(x) mod x^(2t) + // where S(x) is a polynomial constructed from the syndromes + // S(1) + S(2)*x + ... + S(2t)*x(2t - 1) + // and Lambda(x) is the error locator + // the modulo is implicit here -- we have limited the max length of error_evaluator, + // which polynomial_mul will interpret to mean that it should not compute + // powers larger than that, which is the same as performing mod x^(2t) + polynomial_mul(field, locator, syndromes, error_evaluator); +} + +// use error locator, error roots and syndromes to find the error values +// that is, the elements in the finite field which can be added to the received +// polynomial at the locations of the error roots in order to produce the +// transmitted polynomial +// forney algorithm +void reed_solomon_find_error_values(correct_reed_solomon *rs) { + // error value e(j) = -(X(j)^(1-c) * omega(X(j)^-1))/(lambda'(X(j)^-1)) + // where X(j)^-1 is a root of the error locator, omega(X) is the error evaluator, + // lambda'(X) is the first formal derivative of the error locator, + // and c is the first consecutive root of the generator used in encoding + + // first find omega(X), the error evaluator + // we generate S(x), the polynomial constructed from the roots of the syndromes + // this is *not* the polynomial constructed by expanding the products of roots + // S(x) = S(1) + S(2)*x + ... + S(2t)*x(2t - 1) + polynomial_t syndrome_poly; + syndrome_poly.order = rs->min_distance - 1; + syndrome_poly.coeff = rs->syndromes; + memset(rs->error_evaluator.coeff, 0, (rs->error_evaluator.order + 1) * sizeof(field_element_t)); + reed_solomon_find_error_evaluator(rs->field, rs->error_locator, syndrome_poly, rs->error_evaluator); + + // now find lambda'(X) + rs->error_locator_derivative.order = rs->error_locator.order - 1; + polynomial_formal_derivative(rs->field, rs->error_locator, rs->error_locator_derivative); + + // calculate each e(j) + for (unsigned int i = 0; i < rs->error_locator.order; i++) { + if (rs->error_roots[i] == 0) { + continue; + } + rs->error_vals[i] = field_mul( + rs->field, field_pow(rs->field, rs->error_roots[i], rs->first_consecutive_root - 1), + field_div( + rs->field, polynomial_eval_lut(rs->field, rs->error_evaluator, rs->element_exp[rs->error_roots[i]]), + polynomial_eval_lut(rs->field, rs->error_locator_derivative, rs->element_exp[rs->error_roots[i]]))); + } +} + +void reed_solomon_find_error_locations(field_t field, field_logarithm_t generator_root_gap, + field_element_t *error_roots, field_logarithm_t *error_locations, + unsigned int num_errors, unsigned int num_skip) { + for (unsigned int i = 0; i < num_errors; i++) { + // the error roots are the reciprocals of the error locations, so div 1 by them + + // we do mod 255 here because the log table aliases at index 1 + // the log of 1 is both 0 and 255 (alpha^255 = alpha^0 = 1) + // for most uses it makes sense to have log(1) = 255, but in this case + // we're interested in a byte index, and the 255th index is not even valid + // just wrap it back to 0 + + if (error_roots[i] == 0) { + continue; + } + + field_operation_t loc = field_div(field, 1, error_roots[i]); + for (field_operation_t j = 0; j < 256; j++) { + if (field_pow(field, j, generator_root_gap) == loc) { + error_locations[i] = field.log[j]; + break; + } + } + } +} + +// erasure method -- take given locations and convert to roots +// this is the inverse of reed_solomon_find_error_locations +static void reed_solomon_find_error_roots_from_locations(field_t field, field_logarithm_t generator_root_gap, + const field_logarithm_t *error_locations, + field_element_t *error_roots, unsigned int num_errors) { + for (unsigned int i = 0; i < num_errors; i++) { + field_element_t loc = field_pow(field, field.exp[error_locations[i]], generator_root_gap); + // field_element_t loc = field.exp[error_locations[i]]; + error_roots[i] = field_div(field, 1, loc); + // error_roots[i] = loc; + } +} + +// erasure method -- given the roots of the error locator, create the polynomial +static polynomial_t reed_solomon_find_error_locator_from_roots(field_t field, unsigned int num_errors, + field_element_t *error_roots, + polynomial_t error_locator, + polynomial_t *scratch) { + // multiply out roots to build the error locator polynomial + return polynomial_init_from_roots(field, num_errors, error_roots, error_locator, scratch); +} + +// erasure method +static void reed_solomon_find_modified_syndromes(correct_reed_solomon *rs, field_element_t *syndromes, polynomial_t error_locator, field_element_t *modified_syndromes) { + polynomial_t syndrome_poly; + syndrome_poly.order = rs->min_distance - 1; + syndrome_poly.coeff = syndromes; + + polynomial_t modified_syndrome_poly; + modified_syndrome_poly.order = rs->min_distance - 1; + modified_syndrome_poly.coeff = modified_syndromes; + + polynomial_mul(rs->field, error_locator, syndrome_poly, modified_syndrome_poly); +} + +void correct_reed_solomon_decoder_create(correct_reed_solomon *rs) { + rs->has_init_decode = true; + rs->syndromes = calloc(rs->min_distance, sizeof(field_element_t)); + rs->modified_syndromes = calloc(2 * rs->min_distance, sizeof(field_element_t)); + rs->received_polynomial = polynomial_create(rs->block_length - 1); + rs->error_locator = polynomial_create(rs->min_distance); + rs->error_locator_log = polynomial_create(rs->min_distance); + rs->erasure_locator = polynomial_create(rs->min_distance); + rs->error_roots = calloc(2 * rs->min_distance, sizeof(field_element_t)); + rs->error_vals = malloc(rs->min_distance * sizeof(field_element_t)); + rs->error_locations = malloc(rs->min_distance * sizeof(field_logarithm_t)); + + rs->last_error_locator = polynomial_create(rs->min_distance); + rs->error_evaluator = polynomial_create(rs->min_distance - 1); + rs->error_locator_derivative = polynomial_create(rs->min_distance - 1); + + // calculate and store the first block_length powers of every generator root + // we would have to do this work in order to calculate the syndromes + // if we save it, we can prevent the need to recalculate it on subsequent calls + // total memory usage is min_distance * block_length bytes e.g. 32 * 255 ~= 8k + rs->generator_root_exp = malloc(rs->min_distance * sizeof(field_logarithm_t *)); + for (unsigned int i = 0; i < rs->min_distance; i++) { + rs->generator_root_exp[i] = malloc(rs->block_length * sizeof(field_logarithm_t)); + polynomial_build_exp_lut(rs->field, rs->generator_roots[i], rs->block_length - 1, rs->generator_root_exp[i]); + } + + // calculate and store the first min_distance powers of every element in the field + // we would have to do this for chien search anyway, and its size is only 256 * min_distance bytes + // for min_distance = 32 this is 8k of memory, a pittance for the speedup we receive in exchange + // we also get to reuse this work during error value calculation + rs->element_exp = malloc(256 * sizeof(field_logarithm_t *)); + for (field_operation_t i = 0; i < 256; i++) { + rs->element_exp[i] = malloc(rs->min_distance * sizeof(field_logarithm_t)); + polynomial_build_exp_lut(rs->field, i, rs->min_distance - 1, rs->element_exp[i]); + } + + rs->init_from_roots_scratch[0] = polynomial_create(rs->min_distance); + rs->init_from_roots_scratch[1] = polynomial_create(rs->min_distance); +} + +ssize_t correct_reed_solomon_decode(correct_reed_solomon *rs, const uint8_t *encoded, size_t encoded_length, + uint8_t *msg) { + if (encoded_length > rs->block_length) { + return -1; + } + + // the message is the non-remainder part + size_t msg_length = encoded_length - rs->min_distance; + // if they handed us a nonfull block, we'll write in 0s + size_t pad_length = rs->block_length - encoded_length; + + if (!rs->has_init_decode) { + // initialize rs for decoding + correct_reed_solomon_decoder_create(rs); + } + + // we need to copy to our local buffer + // the buffer we're given has the coordinates in the wrong direction + // e.g. byte 0 corresponds to the 254th order coefficient + // so we're going to flip and then write padding + // the final copied buffer will look like + // | rem (rs->min_distance) | msg (msg_length) | pad (pad_length) | + + for (unsigned int i = 0; i < encoded_length; i++) { + rs->received_polynomial.coeff[i] = encoded[encoded_length - (i + 1)]; + } + + // fill the pad_length with 0s + for (unsigned int i = 0; i < pad_length; i++) { + rs->received_polynomial.coeff[i + encoded_length] = 0; + } + + + bool all_zero = reed_solomon_find_syndromes(rs->field, rs->received_polynomial, rs->generator_root_exp, + rs->syndromes, rs->min_distance); + + if (all_zero) { + // syndromes were all zero, so there was no error in the message + // copy to msg and we are done + for (unsigned int i = 0; i < msg_length; i++) { + msg[i] = rs->received_polynomial.coeff[encoded_length - (i + 1)]; + } + return msg_length; + } + + unsigned int order = reed_solomon_find_error_locator(rs, 0); + // XXX fix this vvvv + rs->error_locator.order = order; + + for (unsigned int i = 0; i <= rs->error_locator.order; i++) { + // this is a little strange since the coeffs are logs, not elements + // also, we'll be storing log(0) = 0 for any 0 coeffs in the error locator + // that would seem bad but we'll just be using this in chien search, and we'll skip all 0 coeffs + // (you might point out that log(1) also = 0, which would seem to alias. however, that's ok, + // because log(1) = 255 as well, and in fact that's how it's represented in our log table) + rs->error_locator_log.coeff[i] = rs->field.log[rs->error_locator.coeff[i]]; + } + rs->error_locator_log.order = rs->error_locator.order; + + if (!reed_solomon_factorize_error_locator(rs->field, 0, rs->error_locator_log, rs->error_roots, rs->element_exp)) { + // roots couldn't be found, so there were too many errors to deal with + // RS has failed for this message + return -1; + } + + reed_solomon_find_error_locations(rs->field, rs->generator_root_gap, rs->error_roots, rs->error_locations, + rs->error_locator.order, 0); + + reed_solomon_find_error_values(rs); + + for (unsigned int i = 0; i < rs->error_locator.order; i++) { + rs->received_polynomial.coeff[rs->error_locations[i]] = + field_sub(rs->field, rs->received_polynomial.coeff[rs->error_locations[i]], rs->error_vals[i]); + } + + for (unsigned int i = 0; i < msg_length; i++) { + msg[i] = rs->received_polynomial.coeff[encoded_length - (i + 1)]; + } + + return msg_length; +} + +ssize_t correct_reed_solomon_decode_with_erasures(correct_reed_solomon *rs, const uint8_t *encoded, + size_t encoded_length, const uint8_t *erasure_locations, + size_t erasure_length, uint8_t *msg) { + if (!erasure_length) { + return correct_reed_solomon_decode(rs, encoded, encoded_length, msg); + } + + if (encoded_length > rs->block_length) { + return -1; + } + + if (erasure_length > rs->min_distance) { + return -1; + } + + // the message is the non-remainder part + size_t msg_length = encoded_length - rs->min_distance; + // if they handed us a nonfull block, we'll write in 0s + size_t pad_length = rs->block_length - encoded_length; + + if (!rs->has_init_decode) { + // initialize rs for decoding + correct_reed_solomon_decoder_create(rs); + } + + // we need to copy to our local buffer + // the buffer we're given has the coordinates in the wrong direction + // e.g. byte 0 corresponds to the 254th order coefficient + // so we're going to flip and then write padding + // the final copied buffer will look like + // | rem (rs->min_distance) | msg (msg_length) | pad (pad_length) | + + for (unsigned int i = 0; i < encoded_length; i++) { + rs->received_polynomial.coeff[i] = encoded[encoded_length - (i + 1)]; + } + + // fill the pad_length with 0s + for (unsigned int i = 0; i < pad_length; i++) { + rs->received_polynomial.coeff[i + encoded_length] = 0; + } + + for (unsigned int i = 0; i < erasure_length; i++) { + // remap the coordinates of the erasures + rs->error_locations[i] = rs->block_length - (erasure_locations[i] + pad_length + 1); + } + + reed_solomon_find_error_roots_from_locations(rs->field, rs->generator_root_gap, rs->error_locations, + rs->error_roots, erasure_length); + + rs->erasure_locator = + reed_solomon_find_error_locator_from_roots(rs->field, erasure_length, rs->error_roots, rs->erasure_locator, rs->init_from_roots_scratch); + + bool all_zero = reed_solomon_find_syndromes(rs->field, rs->received_polynomial, rs->generator_root_exp, + rs->syndromes, rs->min_distance); + + if (all_zero) { + // syndromes were all zero, so there was no error in the message + // copy to msg and we are done + for (unsigned int i = 0; i < msg_length; i++) { + msg[i] = rs->received_polynomial.coeff[encoded_length - (i + 1)]; + } + return msg_length; + } + + reed_solomon_find_modified_syndromes(rs, rs->syndromes, rs->erasure_locator, rs->modified_syndromes); + + field_element_t *syndrome_copy = malloc(rs->min_distance * sizeof(field_element_t)); + memcpy(syndrome_copy, rs->syndromes, rs->min_distance * sizeof(field_element_t)); + + for (unsigned int i = erasure_length; i < rs->min_distance; i++) { + rs->syndromes[i - erasure_length] = rs->modified_syndromes[i]; + } + + unsigned int order = reed_solomon_find_error_locator(rs, erasure_length); + // XXX fix this vvvv + rs->error_locator.order = order; + + for (unsigned int i = 0; i <= rs->error_locator.order; i++) { + // this is a little strange since the coeffs are logs, not elements + // also, we'll be storing log(0) = 0 for any 0 coeffs in the error locator + // that would seem bad but we'll just be using this in chien search, and we'll skip all 0 coeffs + // (you might point out that log(1) also = 0, which would seem to alias. however, that's ok, + // because log(1) = 255 as well, and in fact that's how it's represented in our log table) + rs->error_locator_log.coeff[i] = rs->field.log[rs->error_locator.coeff[i]]; + } + rs->error_locator_log.order = rs->error_locator.order; + + /* + for (unsigned int i = 0; i < erasure_length; i++) { + rs->error_roots[i] = field_div(rs->field, 1, rs->error_roots[i]); + } + */ + + if (!reed_solomon_factorize_error_locator(rs->field, erasure_length, rs->error_locator_log, rs->error_roots, rs->element_exp)) { + // roots couldn't be found, so there were too many errors to deal with + // RS has failed for this message + free(syndrome_copy); + return -1; + } + + polynomial_t temp_poly = polynomial_create(rs->error_locator.order + erasure_length); + polynomial_mul(rs->field, rs->erasure_locator, rs->error_locator, temp_poly); + polynomial_t placeholder_poly = rs->error_locator; + rs->error_locator = temp_poly; + + reed_solomon_find_error_locations(rs->field, rs->generator_root_gap, rs->error_roots, rs->error_locations, + rs->error_locator.order, erasure_length); + + memcpy(rs->syndromes, syndrome_copy, rs->min_distance * sizeof(field_element_t)); + + reed_solomon_find_error_values(rs); + + for (unsigned int i = 0; i < rs->error_locator.order; i++) { + rs->received_polynomial.coeff[rs->error_locations[i]] = + field_sub(rs->field, rs->received_polynomial.coeff[rs->error_locations[i]], rs->error_vals[i]); + } + + rs->error_locator = placeholder_poly; + + for (unsigned int i = 0; i < msg_length; i++) { + msg[i] = rs->received_polynomial.coeff[encoded_length - (i + 1)]; + } + + polynomial_destroy(temp_poly); + free(syndrome_copy); + + return msg_length; +} diff --git a/falcon9_decoder/src/libcorrect/reed-solomon/encode.c b/falcon9_decoder/src/libcorrect/reed-solomon/encode.c new file mode 100644 index 00000000..d4eb6f3a --- /dev/null +++ b/falcon9_decoder/src/libcorrect/reed-solomon/encode.c @@ -0,0 +1,34 @@ +#include "correct/reed-solomon/encode.h" + +ssize_t correct_reed_solomon_encode(correct_reed_solomon *rs, const uint8_t *msg, size_t msg_length, uint8_t *encoded) { + if (msg_length > rs->message_length) { + return -1; + } + + size_t pad_length = rs->message_length - msg_length; + for (unsigned int i = 0; i < msg_length; i++) { + // message goes from high order to low order but libcorrect polynomials go low to high + // so we reverse on the way in and on the way out + // we'd have to do a copy anyway so this reversal should be free + rs->encoded_polynomial.coeff[rs->encoded_polynomial.order - (i + pad_length)] = msg[i]; + } + + // 0-fill the rest of the coefficients -- this length will always be > 0 + // because the order of this poly is block_length and the msg_length <= message_length + // e.g. 255 and 223 + memset(rs->encoded_polynomial.coeff + (rs->encoded_polynomial.order + 1 - pad_length), 0, pad_length); + memset(rs->encoded_polynomial.coeff, 0, (rs->encoded_polynomial.order + 1 - rs->message_length)); + + polynomial_mod(rs->field, rs->encoded_polynomial, rs->generator, rs->encoded_remainder); + + // now return byte order to highest order to lowest order + for (unsigned int i = 0; i < msg_length; i++) { + encoded[i] = rs->encoded_polynomial.coeff[rs->encoded_polynomial.order - (i + pad_length)]; + } + + for (unsigned int i = 0; i < rs->min_distance; i++) { + encoded[msg_length + i] = rs->encoded_remainder.coeff[rs->min_distance - (i + 1)]; + } + + return rs->block_length; +} diff --git a/falcon9_decoder/src/libcorrect/reed-solomon/polynomial.c b/falcon9_decoder/src/libcorrect/reed-solomon/polynomial.c new file mode 100644 index 00000000..32c2792a --- /dev/null +++ b/falcon9_decoder/src/libcorrect/reed-solomon/polynomial.c @@ -0,0 +1,255 @@ +#include "correct/reed-solomon/polynomial.h" + +polynomial_t polynomial_create(unsigned int order) { + polynomial_t polynomial; + polynomial.coeff = malloc(sizeof(field_element_t) * (order + 1)); + polynomial.order = order; + return polynomial; +} + +void polynomial_destroy(polynomial_t polynomial) { + free(polynomial.coeff); +} + +// if you want a full multiplication, then make res.order = l.order + r.order +// but if you just care about a lower order, e.g. mul mod x^i, then you can select +// fewer coefficients +void polynomial_mul(field_t field, polynomial_t l, polynomial_t r, polynomial_t res) { + // perform an element-wise multiplication of two polynomials + memset(res.coeff, 0, sizeof(field_element_t) * (res.order + 1)); + for (unsigned int i = 0; i <= l.order; i++) { + if (i > res.order) { + continue; + } + unsigned int j_limit = (r.order > res.order - i) ? res.order - i : r.order; + for (unsigned int j = 0; j <= j_limit; j++) { + // e.g. alpha^5*x * alpha^37*x^2 --> alpha^42*x^3 + res.coeff[i + j] = field_add(field, res.coeff[i + j], field_mul(field, l.coeff[i], r.coeff[j])); + } + } +} + +void polynomial_mod(field_t field, polynomial_t dividend, polynomial_t divisor, polynomial_t mod) { + // find the polynomial remainder of dividend mod divisor + // do long division and return just the remainder (written to mod) + + if (mod.order < dividend.order) { + // mod.order must be >= dividend.order (scratch space needed) + // this is an error -- catch it in debug? + return; + } + // initialize remainder as dividend + memcpy(mod.coeff, dividend.coeff, sizeof(field_element_t) * (dividend.order + 1)); + + // XXX make sure divisor[divisor_order] is nonzero + field_logarithm_t divisor_leading = field.log[divisor.coeff[divisor.order]]; + // long division steps along one order at a time, starting at the highest order + for (unsigned int i = dividend.order; i > 0; i--) { + // look at the leading coefficient of dividend and divisor + // if leading coefficient of dividend / leading coefficient of divisor is q + // then the next row of subtraction will be q * divisor + // if order of q < 0 then what we have is the remainder and we are done + if (i < divisor.order) { + break; + } + if (mod.coeff[i] == 0) { + continue; + } + unsigned int q_order = i - divisor.order; + field_logarithm_t q_coeff = field_div_log(field, field.log[mod.coeff[i]], divisor_leading); + + // now that we've chosen q, multiply the divisor by q and subtract from + // our remainder. subtracting in GF(2^8) is XOR, just like addition + for (unsigned int j = 0; j <= divisor.order; j++) { + if (divisor.coeff[j] == 0) { + continue; + } + // all of the multiplication is shifted up by q_order places + mod.coeff[j + q_order] = field_add(field, mod.coeff[j + q_order], + field_mul_log_element(field, field.log[divisor.coeff[j]], q_coeff)); + } + } +} + +void polynomial_formal_derivative(field_t field, polynomial_t poly, polynomial_t der) { + // if f(x) = a(n)*x^n + ... + a(1)*x + a(0) + // then f'(x) = n*a(n)*x^(n-1) + ... + 2*a(2)*x + a(1) + // where n*a(n) = sum(k=1, n, a(n)) e.g. the nth sum of a(n) in GF(2^8) + + // assumes der.order = poly.order - 1 + memset(der.coeff, 0, sizeof(field_element_t) * (der.order + 1)); + for (unsigned int i = 0; i <= der.order; i++) { + // we're filling in the ith power of der, so we look ahead one power in poly + // f(x) = a(i + 1)*x^(i + 1) -> f'(x) = (i + 1)*a(i + 1)*x^i + // where (i + 1)*a(i + 1) is the sum of a(i + 1) (i + 1) times, not the product + der.coeff[i] = field_sum(field, poly.coeff[i + 1], i + 1); + } +} + +field_element_t polynomial_eval(field_t field, polynomial_t poly, field_element_t val) { + // evaluate the polynomial poly at a particular element val + if (val == 0) { + return poly.coeff[0]; + } + + field_element_t res = 0; + + // we're going to start at 0th order and multiply by val each time + field_logarithm_t val_exponentiated = field.log[1]; + field_logarithm_t val_log = field.log[val]; + + for (unsigned int i = 0; i <= poly.order; i++) { + if (poly.coeff[i] != 0) { + // multiply-accumulate by the next coeff times the next power of val + res = field_add(field, res, + field_mul_log_element(field, field.log[poly.coeff[i]], val_exponentiated)); + } + // now advance to the next power + val_exponentiated = field_mul_log(field, val_exponentiated, val_log); + } + return res; +} + +field_element_t polynomial_eval_lut(field_t field, polynomial_t poly, const field_logarithm_t *val_exp) { + // evaluate the polynomial poly at a particular element val + // in this case, all of the logarithms of the successive powers of val have been precalculated + // this removes the extra work we'd have to do to calculate val_exponentiated each time + // if this function is to be called on the same val multiple times + if (val_exp[0] == 0) { + return poly.coeff[0]; + } + + field_element_t res = 0; + + for (unsigned int i = 0; i <= poly.order; i++) { + if (poly.coeff[i] != 0) { + // multiply-accumulate by the next coeff times the next power of val + res = field_add(field, res, + field_mul_log_element(field, field.log[poly.coeff[i]], val_exp[i])); + } + } + return res; +} + +field_element_t polynomial_eval_log_lut(field_t field, polynomial_t poly_log, const field_logarithm_t *val_exp) { + // evaluate the log_polynomial poly at a particular element val + // like polynomial_eval_lut, the logarithms of the successive powers of val have been + // precomputed + if (val_exp[0] == 0) { + if (poly_log.coeff[0] == 0) { + // special case for the non-existant log case + return 0; + } + return field.exp[poly_log.coeff[0]]; + } + + field_element_t res = 0; + + for (unsigned int i = 0; i <= poly_log.order; i++) { + // using 0 as a sentinel value in log -- log(0) is really -inf + if (poly_log.coeff[i] != 0) { + // multiply-accumulate by the next coeff times the next power of val + res = field_add(field, res, + field_mul_log_element(field, poly_log.coeff[i], val_exp[i])); + } + } + return res; +} + +void polynomial_build_exp_lut(field_t field, field_element_t val, unsigned int order, field_logarithm_t *val_exp) { + // create the lookup table of successive powers of val used by polynomial_eval_lut + field_logarithm_t val_exponentiated = field.log[1]; + field_logarithm_t val_log = field.log[val]; + for (unsigned int i = 0; i <= order; i++) { + if (val == 0) { + val_exp[i] = 0; + } else { + val_exp[i] = val_exponentiated; + val_exponentiated = field_mul_log(field, val_exponentiated, val_log); + } + } +} + +polynomial_t polynomial_init_from_roots(field_t field, unsigned int nroots, field_element_t *roots, polynomial_t poly, polynomial_t *scratch) { + unsigned int order = nroots; + polynomial_t l; + field_element_t l_coeff[2]; + l.order = 1; + l.coeff = l_coeff; + + // we'll keep two temporary stores of rightside polynomial + // each time through the loop, we take the previous result and use it as new rightside + // swap back and forth (prevents the need for a copy) + polynomial_t r[2]; + r[0] = scratch[0]; + r[1] = scratch[1]; + unsigned int rcoeffres = 0; + + // initialize the result with x + roots[0] + r[rcoeffres].coeff[1] = 1; + r[rcoeffres].coeff[0] = roots[0]; + r[rcoeffres].order = 1; + + // initialize lcoeff[1] with x + // we'll fill in the 0th order term in each loop iter + l.coeff[1] = 1; + + // loop through, using previous run's result as the new right hand side + // this allows us to multiply one group at a time + for (unsigned int i = 1; i < nroots; i++) { + l.coeff[0] = roots[i]; + unsigned int nextrcoeff = rcoeffres; + rcoeffres = (rcoeffres + 1) % 2; + r[rcoeffres].order = i + 1; + polynomial_mul(field, l, r[nextrcoeff], r[rcoeffres]); + } + + memcpy(poly.coeff, r[rcoeffres].coeff, (order + 1) * sizeof(field_element_t)); + poly.order = order; + + return poly; +} + +polynomial_t polynomial_create_from_roots(field_t field, unsigned int nroots, field_element_t *roots) { + polynomial_t poly = polynomial_create(nroots); + unsigned int order = nroots; + polynomial_t l; + l.order = 1; + l.coeff = calloc(2, sizeof(field_element_t)); + + polynomial_t r[2]; + // we'll keep two temporary stores of rightside polynomial + // each time through the loop, we take the previous result and use it as new rightside + // swap back and forth (prevents the need for a copy) + r[0].coeff = calloc(order + 1, sizeof(field_element_t)); + r[1].coeff = calloc(order + 1, sizeof(field_element_t)); + unsigned int rcoeffres = 0; + + // initialize the result with x + roots[0] + r[rcoeffres].coeff[0] = roots[0]; + r[rcoeffres].coeff[1] = 1; + r[rcoeffres].order = 1; + + // initialize lcoeff[1] with x + // we'll fill in the 0th order term in each loop iter + l.coeff[1] = 1; + + // loop through, using previous run's result as the new right hand side + // this allows us to multiply one group at a time + for (unsigned int i = 1; i < nroots; i++) { + l.coeff[0] = roots[i]; + unsigned int nextrcoeff = rcoeffres; + rcoeffres = (rcoeffres + 1) % 2; + r[rcoeffres].order = i + 1; + polynomial_mul(field, l, r[nextrcoeff], r[rcoeffres]); + } + + memcpy(poly.coeff, r[rcoeffres].coeff, (order + 1) * sizeof(field_element_t)); + poly.order = order; + + free(l.coeff); + free(r[0].coeff); + free(r[1].coeff); + + return poly; +} diff --git a/falcon9_decoder/src/libcorrect/reed-solomon/reed-solomon.c b/falcon9_decoder/src/libcorrect/reed-solomon/reed-solomon.c new file mode 100644 index 00000000..91a708e2 --- /dev/null +++ b/falcon9_decoder/src/libcorrect/reed-solomon/reed-solomon.c @@ -0,0 +1,187 @@ +#include "correct/reed-solomon/reed-solomon.h" + +// coeff must be of size nroots + 1 +// e.g. 2 roots (x + alpha)(x + alpha^2) yields a poly with 3 terms x^2 + g0*x + g1 +static polynomial_t reed_solomon_build_generator(field_t field, unsigned int nroots, field_element_t first_consecutive_root, unsigned int root_gap, polynomial_t generator, field_element_t *roots) { + // generator has order 2*t + // of form (x + alpha^1)(x + alpha^2)...(x - alpha^2*t) + for (unsigned int i = 0; i < nroots; i++) { + roots[i] = field.exp[(root_gap * (i + first_consecutive_root)) % 255]; + } + return polynomial_create_from_roots(field, nroots, roots); +} + +correct_reed_solomon *correct_reed_solomon_create(field_operation_t primitive_polynomial, field_logarithm_t first_consecutive_root, field_logarithm_t generator_root_gap, size_t num_roots) { + correct_reed_solomon *rs = calloc(1, sizeof(correct_reed_solomon)); + rs->field = field_create(primitive_polynomial); + + rs->block_length = 255; + rs->min_distance = num_roots; + rs->message_length = rs->block_length - rs->min_distance; + + rs->first_consecutive_root = first_consecutive_root; + rs->generator_root_gap = generator_root_gap; + + rs->generator_roots = malloc(rs->min_distance * sizeof(field_element_t)); + + rs->generator = reed_solomon_build_generator(rs->field, rs->min_distance, rs->first_consecutive_root, rs->generator_root_gap, rs->generator, rs->generator_roots); + + rs->encoded_polynomial = polynomial_create(rs->block_length - 1); + rs->encoded_remainder = polynomial_create(rs->block_length - 1); + + rs->has_init_decode = false; + + return rs; +} + +void correct_reed_solomon_destroy(correct_reed_solomon *rs) { + field_destroy(rs->field); + polynomial_destroy(rs->generator); + free(rs->generator_roots); + polynomial_destroy(rs->encoded_polynomial); + polynomial_destroy(rs->encoded_remainder); + if (rs->has_init_decode) { + free(rs->syndromes); + free(rs->modified_syndromes); + polynomial_destroy(rs->received_polynomial); + polynomial_destroy(rs->error_locator); + polynomial_destroy(rs->error_locator_log); + polynomial_destroy(rs->erasure_locator); + free(rs->error_roots); + free(rs->error_vals); + free(rs->error_locations); + polynomial_destroy(rs->last_error_locator); + polynomial_destroy(rs->error_evaluator); + polynomial_destroy(rs->error_locator_derivative); + for (unsigned int i = 0; i < rs->min_distance; i++) { + free(rs->generator_root_exp[i]); + } + free(rs->generator_root_exp); + for (field_operation_t i = 0; i < 256; i++) { + free(rs->element_exp[i]); + } + free(rs->element_exp); + polynomial_destroy(rs->init_from_roots_scratch[0]); + polynomial_destroy(rs->init_from_roots_scratch[1]); + } + free(rs); +} + +void correct_reed_solomon_debug_print(correct_reed_solomon *rs) { + for (unsigned int i = 0; i < 256; i++) { + printf("%3d %3d %3d %3d\n", i, rs->field.exp[i], i, rs->field.log[i]); + } + printf("\n"); + + printf("roots: "); + for (unsigned int i = 0; i < rs->min_distance; i++) { + printf("%d", rs->generator_roots[i]); + if (i < rs->min_distance - 1) { + printf(", "); + } + } + printf("\n\n"); + + printf("generator: "); + for (unsigned int i = 0; i < rs->generator.order + 1; i++) { + printf("%d*x^%d", rs->generator.coeff[i], i); + if (i < rs->generator.order) { + printf(" + "); + } + } + printf("\n\n"); + + printf("generator (alpha format): "); + for (unsigned int i = rs->generator.order + 1; i > 0; i--) { + printf("alpha^%d*x^%d", rs->field.log[rs->generator.coeff[i - 1]], i - 1); + if (i > 1) { + printf(" + "); + } + } + printf("\n\n"); + + printf("remainder: "); + bool has_printed = false; + for (unsigned int i = 0; i < rs->encoded_remainder.order + 1; i++) { + if (!rs->encoded_remainder.coeff[i]) { + continue; + } + if (has_printed) { + printf(" + "); + } + has_printed = true; + printf("%d*x^%d", rs->encoded_remainder.coeff[i], i); + } + printf("\n\n"); + + printf("syndromes: "); + for (unsigned int i = 0; i < rs->min_distance; i++) { + printf("%d", rs->syndromes[i]); + if (i < rs->min_distance - 1) { + printf(", "); + } + } + printf("\n\n"); + + printf("numerrors: %d\n\n", rs->error_locator.order); + + printf("error locator: "); + has_printed = false; + for (unsigned int i = 0; i < rs->error_locator.order + 1; i++) { + if (!rs->error_locator.coeff[i]) { + continue; + } + if (has_printed) { + printf(" + "); + } + has_printed = true; + printf("%d*x^%d", rs->error_locator.coeff[i], i); + } + printf("\n\n"); + + printf("error roots: "); + for (unsigned int i = 0; i < rs->error_locator.order; i++) { + printf("%d@%d", polynomial_eval(rs->field, rs->error_locator, rs->error_roots[i]), rs->error_roots[i]); + if (i < rs->error_locator.order - 1) { + printf(", "); + } + } + printf("\n\n"); + + printf("error evaluator: "); + has_printed = false; + for (unsigned int i = 0; i < rs->error_evaluator.order; i++) { + if (!rs->error_evaluator.coeff[i]) { + continue; + } + if (has_printed) { + printf(" + "); + } + has_printed = true; + printf("%d*x^%d", rs->error_evaluator.coeff[i], i); + } + printf("\n\n"); + + printf("error locator derivative: "); + has_printed = false; + for (unsigned int i = 0; i < rs->error_locator_derivative.order; i++) { + if (!rs->error_locator_derivative.coeff[i]) { + continue; + } + if (has_printed) { + printf(" + "); + } + has_printed = true; + printf("%d*x^%d", rs->error_locator_derivative.coeff[i], i); + } + printf("\n\n"); + + printf("error locator: "); + for (unsigned int i = 0; i < rs->error_locator.order; i++) { + printf("%d@%d", rs->error_vals[i], rs->error_locations[i]); + if (i < rs->error_locator.order - 1) { + printf(", "); + } + } + printf("\n\n"); +} diff --git a/falcon9_decoder/src/main.cpp b/falcon9_decoder/src/main.cpp new file mode 100644 index 00000000..48f883be --- /dev/null +++ b/falcon9_decoder/src/main.cpp @@ -0,0 +1,254 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#define CONCAT(a, b) ((std::string(a) + b).c_str()) + +SDRPP_MOD_INFO { + /* Name: */ "falcon9_decoder", + /* Description: */ "Falcon9 telemetry decoder for SDR++", + /* Author: */ "Ryzerth", + /* Version: */ 0, 1, 0, + /* Max instances */ -1 +}; + +#define INPUT_SAMPLE_RATE 6000000 + +class Falcon9DecoderModule : public ModuleManager::Instance { +public: + Falcon9DecoderModule(std::string name) { + this->name = name; + + vfo = sigpath::vfoManager.createVFO(name, ImGui::WaterfallVFO::REF_CENTER, 0, 4000000, INPUT_SAMPLE_RATE, 1); + + // dsp::Splitter split; + // dsp::Reshaper reshape; + // dsp::HandlerSink symSink; + + // dsp::stream thrInput; + // dsp::Threshold thr; + + demod.init(vfo->output, INPUT_SAMPLE_RATE, 2000000.0f); + recov.init(&demod.out, (float)INPUT_SAMPLE_RATE / 3572000.0f, 0.00765625f, 0.175f, 0.005f); + split.init(&recov.out); + split.bindStream(&reshapeInput); + split.bindStream(&thrInput); + reshape.init(&reshapeInput, 1024, 198976); + symSink.init(&reshape.out, symSinkHandler, this); + thr.init(&thrInput); + deframe.init(&thr.out, 10232, syncWord, 32); + falconRS.init(&deframe.out); + pkt.init(&falconRS.out); + sink.init(&pkt.out, sinkHandler, this); + + demod.start(); + recov.start(); + split.start(); + reshape.start(); + symSink.start(); + sink.start(); + pkt.start(); + falconRS.start(); + deframe.start(); + thr.start(); + + ffplay = _popen("ffplay -framedrop -hide_banner -loglevel panic -window_title \"Falcon 9 Cameras\" -", "wb"); + + gui::menu.registerEntry(name, menuHandler, this, this); + } + + ~Falcon9DecoderModule() { + + } + + void enable() { + vfo = sigpath::vfoManager.createVFO(name, ImGui::WaterfallVFO::REF_CENTER, 0, 4000000, INPUT_SAMPLE_RATE, 1); + + demod.setInput(vfo->output); + + demod.start(); + recov.start(); + split.start(); + reshape.start(); + symSink.start(); + sink.start(); + pkt.start(); + falconRS.start(); + deframe.start(); + thr.start(); + + enabled = true; + } + + void disable() { + demod.stop(); + recov.stop(); + split.stop(); + reshape.stop(); + symSink.stop(); + sink.stop(); + pkt.stop(); + falconRS.stop(); + deframe.stop(); + thr.stop(); + + sigpath::vfoManager.deleteVFO(vfo); + enabled = false; + } + + bool isEnabled() { + return enabled; + } + +private: + static void menuHandler(void* ctx) { + Falcon9DecoderModule* _this = (Falcon9DecoderModule*)ctx; + + float menuWidth = ImGui::GetContentRegionAvailWidth(); + + if (!_this->enabled) { style::beginDisabled(); } + + ImGui::SetNextItemWidth(menuWidth); + _this->symDiag.draw(); + + if (_this->logsVisible) { + if (ImGui::Button("Hide logs", ImVec2(menuWidth, 0))) { _this->logsVisible = false; } + } + else { + if (ImGui::Button("Show logs", ImVec2(menuWidth, 0))) { _this->logsVisible = true; } + } + + if (_this->logsVisible) { + std::lock_guard lck(_this->logsMtx); + + ImGui::Begin("Falcon9 Telemetry"); + ImGui::BeginTabBar("Falcon9Tabs"); + + // GPS Logs + ImGui::BeginTabItem("GPS"); + if (ImGui::Button("Clear logs##GPSClear")) { _this->gpsLogs.clear(); } + ImGui::BeginChild(ImGuiID("GPSChild")); + ImGui::TextUnformatted(_this->gpsLogs.c_str()); + ImGui::SetScrollHere(1.0f); + ImGui::EndChild(); + ImGui::EndTabItem(); + + // STMM1A Logs + ImGui::BeginTabItem("STMM1A"); + + ImGui::EndTabItem(); + + // STMM1B Logs + ImGui::BeginTabItem("STMM1B"); + + ImGui::EndTabItem(); + + // STMM1C Logs + ImGui::BeginTabItem("STMM1C"); + + ImGui::EndTabItem(); + + ImGui::EndTabBar(); + ImGui::End(); + } + + if (!_this->enabled) { style::endDisabled(); } + } + + static void sinkHandler(uint8_t* data, int count, void* ctx) { + Falcon9DecoderModule* _this = (Falcon9DecoderModule*)ctx; + + uint16_t length = (((data[0] & 0b1111) << 8) | data[1]) + 2; + uint64_t pktId = ((uint64_t)data[2] << 56) | ((uint64_t)data[3] << 48) | ((uint64_t)data[4] << 40) | ((uint64_t)data[5] << 32) + | ((uint64_t)data[6] << 24) | ((uint64_t)data[7] << 16) | ((uint64_t)data[8] << 8) | data[9]; + + if (pktId == 0x0117FE0800320303 || pktId == 0x0112FA0800320303) { + data[length - 2] = 0; + _this->logsMtx.lock(); + _this->gpsLogs += (char*)(data + 25); + _this->logsMtx.unlock(); + } + else if (pktId == 0x01123201042E1403) { + fwrite(data + 25, 1, 940, _this->ffplay); + } + + //printf("%016" PRIX64 ": %d bytes, %d full\n", pktId, length, count); + } + + static void symSinkHandler(float* data, int count, void* ctx) { + Falcon9DecoderModule* _this = (Falcon9DecoderModule*)ctx; + float* buf = _this->symDiag.aquireBuffer(); + memcpy(buf, data, 1024*sizeof(float)); + _this->symDiag.releaseBuffer(); + } + + std::string name; + bool enabled = true; + + bool logsVisible = false; + + std::mutex logsMtx; + std::string gpsLogs = ""; + + // DSP Chain + dsp::FloatFMDemod demod; + dsp::MMClockRecovery recov; + + dsp::Splitter split; + + dsp::stream reshapeInput; + dsp::Reshaper reshape; + dsp::HandlerSink symSink; + + dsp::stream thrInput; + dsp::Threshold thr; + + uint8_t syncWord[32] = {0,0,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,1,1,1,0,1}; + dsp::Deframer deframe; + dsp::FalconRS falconRS; + dsp::FalconPacketSync pkt; + dsp::HandlerSink sink; + + FILE* ffplay; + + VFOManager::VFO* vfo; + + ImGui::SymbolDiagram symDiag; + +}; + +MOD_EXPORT void _INIT_() { + // Nothing +} + +MOD_EXPORT ModuleManager::Instance* _CREATE_INSTANCE_(std::string name) { + return new Falcon9DecoderModule(name); +} + +MOD_EXPORT void _DELETE_INSTANCE_(void* instance) { + delete (Falcon9DecoderModule*)instance; +} + +MOD_EXPORT void _END_() { + // Nothing either +} \ No newline at end of file diff --git a/make_windows_package.ps1 b/make_windows_package.ps1 index 4b8f0db2..fc39c9bf 100644 --- a/make_windows_package.ps1 +++ b/make_windows_package.ps1 @@ -32,6 +32,9 @@ cp build/soapy_source/Release/soapy_source.dll sdrpp_windows_x64/modules/ cp build/audio_sink/Release/audio_sink.dll sdrpp_windows_x64/modules/ cp "C:/Program Files (x86)/RtAudio/bin/rtaudio.dll" sdrpp_windows_x64/ +# Copy supporting libs +cp 'C:/Program Files/PothosSDR/bin/libusb-1.0.dll' sdrpp_windows_x64/ + Compress-Archive -Path sdrpp_windows_x64/ -DestinationPath sdrpp_windows_x64.zip rm -Force -Recurse sdrpp_windows_x64 \ No newline at end of file