mirror of
https://github.com/AlexandreRouma/SDRPlusPlus.git
synced 2025-10-21 13:19:22 +02:00
DSP performance upgrades + bugfix
This commit is contained in:
@@ -1,185 +1,117 @@
|
||||
#pragma once
|
||||
#include <dsp/block.h>
|
||||
#include <dsp/interpolation_taps.h>
|
||||
|
||||
#define DSP_SIGN(n) ((n) >= 0)
|
||||
#define DSP_STEP(n) (((n) > 0.0f) ? 1.0f : -1.0f)
|
||||
#include <math.h>
|
||||
#include <dsp/utils/macros.h>
|
||||
|
||||
namespace dsp {
|
||||
class SqSymbolRecovery : public generic_block<SqSymbolRecovery> {
|
||||
template <int ORDER>
|
||||
class CostasLoop: public generic_block<CostasLoop<ORDER>> {
|
||||
public:
|
||||
SqSymbolRecovery() {}
|
||||
CostasLoop() {}
|
||||
CostasLoop(stream<complex_t>* in, float loopBandwidth) { init(in, loopBandwidth); }
|
||||
|
||||
SqSymbolRecovery(stream<float>* in, int omega) { init(in, omega); }
|
||||
void init(stream<complex_t>* in, float loopBandwidth) {
|
||||
_in = in;
|
||||
lastVCO.re = 1.0f;
|
||||
lastVCO.im = 0.0f;
|
||||
_loopBandwidth = loopBandwidth;
|
||||
|
||||
~SqSymbolRecovery() {
|
||||
generic_block<SqSymbolRecovery>::stop();
|
||||
float dampningFactor = sqrtf(2.0f) / 2.0f;
|
||||
float denominator = (1.0 + 2.0 * dampningFactor * _loopBandwidth + _loopBandwidth * _loopBandwidth);
|
||||
_alpha = (4 * dampningFactor * _loopBandwidth) / denominator;
|
||||
_beta = (4 * _loopBandwidth * _loopBandwidth) / denominator;
|
||||
|
||||
generic_block<CostasLoop<ORDER>>::registerInput(_in);
|
||||
generic_block<CostasLoop<ORDER>>::registerOutput(&out);
|
||||
}
|
||||
|
||||
void init(stream<float>* in, int omega) {
|
||||
void setInput(stream<complex_t>* in) {
|
||||
generic_block<CostasLoop<ORDER>>::tempStop();
|
||||
generic_block<CostasLoop<ORDER>>::unregisterInput(_in);
|
||||
_in = in;
|
||||
samplesPerSymbol = omega;
|
||||
generic_block<SqSymbolRecovery>::registerInput(_in);
|
||||
generic_block<SqSymbolRecovery>::registerOutput(&out);
|
||||
generic_block<CostasLoop<ORDER>>::registerInput(_in);
|
||||
generic_block<CostasLoop<ORDER>>::tempStart();
|
||||
}
|
||||
|
||||
void setLoopBandwidth(float loopBandwidth) {
|
||||
generic_block<CostasLoop<ORDER>>::tempStop();
|
||||
_loopBandwidth = loopBandwidth;
|
||||
float dampningFactor = sqrtf(2.0f) / 2.0f;
|
||||
float denominator = (1.0 + 2.0 * dampningFactor * _loopBandwidth + _loopBandwidth * _loopBandwidth);
|
||||
_alpha = (4 * dampningFactor * _loopBandwidth) / denominator;
|
||||
_beta = (4 * _loopBandwidth * _loopBandwidth) / denominator;
|
||||
generic_block<CostasLoop<ORDER>>::tempStart();
|
||||
}
|
||||
|
||||
int run() {
|
||||
count = _in->read();
|
||||
int count = _in->read();
|
||||
if (count < 0) { return -1; }
|
||||
|
||||
int outCount = 0;
|
||||
complex_t outVal;
|
||||
float error;
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
if (DSP_SIGN(lastVal) != DSP_SIGN(_in->readBuf[i])) {
|
||||
counter = samplesPerSymbol / 2;
|
||||
lastVal = _in->readBuf[i];
|
||||
continue;
|
||||
}
|
||||
|
||||
if (counter >= samplesPerSymbol) {
|
||||
counter = 0;
|
||||
out.writeBuf[outCount] = _in->readBuf[i];
|
||||
outCount++;
|
||||
}
|
||||
else {
|
||||
counter++;
|
||||
}
|
||||
// Mix the VFO with the input to create the output value
|
||||
outVal = lastVCO * _in->readBuf[i];
|
||||
out.writeBuf[i] = outVal;
|
||||
|
||||
// Calculate the phase error estimation
|
||||
if constexpr (ORDER == 2) {
|
||||
error = outVal.re * outVal.im;
|
||||
}
|
||||
if constexpr (ORDER == 4) {
|
||||
error = (DSP_STEP(outVal.re) * outVal.im) - (DSP_STEP(outVal.im) * outVal.re);
|
||||
}
|
||||
if constexpr (ORDER == 8) {
|
||||
// This is taken from GR, I have no idea how it works but it does...
|
||||
const float K = (sqrtf(2.0) - 1);
|
||||
if (fabsf(outVal.re) >= fabsf(outVal.im)) {
|
||||
error = ((outVal.re > 0.0f ? 1.0f : -1.0f) * outVal.im -
|
||||
(outVal.im > 0.0f ? 1.0f : -1.0f) * outVal.re * K);
|
||||
} else {
|
||||
error = ((outVal.re > 0.0f ? 1.0f : -1.0f) * outVal.im * K -
|
||||
(outVal.im > 0.0f ? 1.0f : -1.0f) * outVal.re);
|
||||
}
|
||||
}
|
||||
|
||||
if (error > 1.0f) { error = 1.0f; }
|
||||
if (error < -1.0f) { error = -1.0f; }
|
||||
|
||||
// Integrate frequency and clamp it
|
||||
vcoFrequency += _beta * error;
|
||||
if (vcoFrequency > 1.0f) { vcoFrequency = 1.0f; }
|
||||
if (vcoFrequency < -1.0f) { vcoFrequency = -1.0f; }
|
||||
|
||||
// Calculate new phase and wrap it
|
||||
vcoPhase += vcoFrequency + (_alpha * error);
|
||||
while (vcoPhase > (2.0f * FL_M_PI)) { vcoPhase -= (2.0f * FL_M_PI); }
|
||||
while (vcoPhase < (-2.0f * FL_M_PI)) { vcoPhase += (2.0f * FL_M_PI); }
|
||||
|
||||
// Calculate output
|
||||
lastVCO.re = cosf(vcoPhase);
|
||||
lastVCO.im = sinf(vcoPhase);
|
||||
|
||||
lastVal = _in->readBuf[i];
|
||||
}
|
||||
|
||||
_in->flush();
|
||||
if (!out.swap(outCount)) { return -1; }
|
||||
if (!out.swap(count)) { return -1; }
|
||||
return count;
|
||||
}
|
||||
|
||||
stream<float> out;
|
||||
stream<complex_t> out;
|
||||
|
||||
private:
|
||||
int count;
|
||||
int samplesPerSymbol = 1;
|
||||
int counter = 0;
|
||||
float lastVal = 0;
|
||||
stream<float>* _in;
|
||||
float _loopBandwidth = 1.0f;
|
||||
|
||||
};
|
||||
float _alpha; // Integral coefficient
|
||||
float _beta; // Proportional coefficient
|
||||
float vcoFrequency = 0.0f;
|
||||
float vcoPhase = 0.0f;
|
||||
complex_t lastVCO;
|
||||
|
||||
|
||||
|
||||
class MMClockRecovery : public generic_block<MMClockRecovery> {
|
||||
public:
|
||||
MMClockRecovery() {}
|
||||
|
||||
MMClockRecovery(stream<float>* in, float omega, float gainOmega, float muGain, float omegaRelLimit) {
|
||||
init(in, omega, gainOmega, muGain, omegaRelLimit);
|
||||
}
|
||||
|
||||
~MMClockRecovery() {
|
||||
generic_block<MMClockRecovery>::stop();
|
||||
}
|
||||
|
||||
void init(stream<float>* in, float omega, float gainOmega, float muGain, float omegaRelLimit) {
|
||||
_in = in;
|
||||
_omega = omega;
|
||||
_muGain = muGain;
|
||||
_gainOmega = gainOmega;
|
||||
_omegaRelLimit = omegaRelLimit;
|
||||
|
||||
omegaMin = _omega - (_omega * _omegaRelLimit);
|
||||
omegaMax = _omega + (_omega * _omegaRelLimit);
|
||||
_dynOmega = _omega;
|
||||
|
||||
generic_block<MMClockRecovery>::registerInput(_in);
|
||||
generic_block<MMClockRecovery>::registerOutput(&out);
|
||||
}
|
||||
|
||||
int run() {
|
||||
count = _in->read();
|
||||
if (count < 0) { return -1; }
|
||||
|
||||
int outCount = 0;
|
||||
float outVal;
|
||||
float phaseError;
|
||||
float roundedStep;
|
||||
int maxOut = 2.0f * _omega * (float)count;
|
||||
|
||||
// Copy the first 7 values to the delay buffer for fast computing
|
||||
memcpy(&delay[7], _in->readBuf, 7);
|
||||
|
||||
int i = nextOffset;
|
||||
for (; i < count && outCount < maxOut;) {
|
||||
// Calculate output value
|
||||
// If we still need to use the old values, calculate using delay buf
|
||||
// Otherwise, use normal buffer
|
||||
if (i < 7) {
|
||||
volk_32f_x2_dot_prod_32f(&outVal, &delay[i], INTERP_TAPS[(int)roundf(_mu * 128.0f)], 8);
|
||||
}
|
||||
else {
|
||||
volk_32f_x2_dot_prod_32f(&outVal, &_in->readBuf[i - 7], INTERP_TAPS[(int)roundf(_mu * 128.0f)], 8);
|
||||
}
|
||||
out.writeBuf[outCount] = outVal;
|
||||
|
||||
|
||||
// Cursed phase detect approximation (don't ask me how this approximation works)
|
||||
phaseError = (DSP_STEP(lastOutput)*outVal) - (lastOutput*DSP_STEP(outVal));
|
||||
lastOutput = outVal;
|
||||
outCount++;
|
||||
|
||||
// Adjust the symbol rate using the phase error approximation and clamp
|
||||
// TODO: Branchless clamp
|
||||
_dynOmega = _dynOmega + (_gainOmega * phaseError);
|
||||
if (_dynOmega > omegaMax) { _dynOmega = omegaMax; }
|
||||
else if (_dynOmega < omegaMin) { _dynOmega = omegaMin; }
|
||||
|
||||
// Adjust the symbol phase according to the phase error approximation
|
||||
// It will now contain the phase delta needed to jump to the next symbol
|
||||
// Rounded step will contain the rounded number of symbols
|
||||
_mu = _mu + _dynOmega + (_muGain * phaseError);
|
||||
roundedStep = floor(_mu);
|
||||
|
||||
// Step to where the next symbol should be
|
||||
i += (int)roundedStep;
|
||||
|
||||
// Now that we've stepped to the next symbol, keep only the offset inside the symbol
|
||||
_mu -= roundedStep;
|
||||
}
|
||||
|
||||
nextOffset = i - count;
|
||||
|
||||
// Save the last 7 values for the next round
|
||||
memcpy(delay, &_in->readBuf[count - 7], 7);
|
||||
|
||||
_in->flush();
|
||||
if (!out.swap(outCount)) { return -1; }
|
||||
return count;
|
||||
}
|
||||
|
||||
stream<float> out;
|
||||
|
||||
private:
|
||||
int count;
|
||||
|
||||
// Delay buffer
|
||||
float delay[15];
|
||||
int nextOffset = 0;
|
||||
|
||||
// Configuration
|
||||
float _omega = 1.0f;
|
||||
float _muGain = 1.0f;
|
||||
float _gainOmega = 0.001f;
|
||||
float _omegaRelLimit = 0.005;
|
||||
|
||||
// Precalculated values
|
||||
float omegaMin = _omega + (_omega * _omegaRelLimit);
|
||||
float omegaMax = _omega + (_omega * _omegaRelLimit);
|
||||
|
||||
// Runtime adjusted
|
||||
float _dynOmega = _omega;
|
||||
float _mu = 0.5f;
|
||||
float lastOutput = 0.0f;
|
||||
|
||||
|
||||
stream<float>* _in;
|
||||
stream<complex_t>* _in;
|
||||
|
||||
};
|
||||
}
|
Reference in New Issue
Block a user