aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPřemysl Eric Janouch <p@janouch.name>2021-07-08 19:07:42 +0200
committerPřemysl Eric Janouch <p@janouch.name>2021-07-08 19:14:26 +0200
commit20c8385f2e3fc00305396a3aaa74cb1d30cecc8f (patch)
tree901d006d872495fabeda5bfcd6ff34e3e0ca36d8
parentfa4443a3cee593611b1720282927e12e4c8c691f (diff)
downloadnncmpp-20c8385f2e3fc00305396a3aaa74cb1d30cecc8f.tar.gz
nncmpp-20c8385f2e3fc00305396a3aaa74cb1d30cecc8f.tar.xz
nncmpp-20c8385f2e3fc00305396a3aaa74cb1d30cecc8f.zip
Spectrum analyser: optimise the x:16:2 case
nncmpp CPU usage went from 2 to 1.7 percent, a 15% improvement. Sort of worth it, given that it's a constant load. The assembly certainly looks nicer.
-rw-r--r--nncmpp.c29
1 files changed, 21 insertions, 8 deletions
diff --git a/nncmpp.c b/nncmpp.c
index 485b269..b422298 100644
--- a/nncmpp.c
+++ b/nncmpp.c
@@ -638,13 +638,13 @@ spectrum_decode_8 (struct spectrum *s, int sample)
{
size_t n = s->useful_bins;
float *data = s->data + n;
- int8_t *p = (int8_t *) s->buffer + sample * n * s->channels;
- while (n--)
+ for (int8_t *p = (int8_t *) s->buffer + sample * n * s->channels;
+ n--; p += s->channels)
{
int32_t acc = 0;
for (int ch = 0; ch < s->channels; ch++)
- acc += *p++;
- *data++ = (float) acc / -INT8_MIN / s->channels;
+ acc += p[ch];
+ *data++ = (float) acc / s->channels / -INT8_MIN;
}
}
@@ -653,16 +653,25 @@ spectrum_decode_16 (struct spectrum *s, int sample)
{
size_t n = s->useful_bins;
float *data = s->data + n;
- int16_t *p = (int16_t *) s->buffer + sample * n * s->channels;
- while (n--)
+ for (int16_t *p = (int16_t *) s->buffer + sample * n * s->channels;
+ n--; p += s->channels)
{
int32_t acc = 0;
for (int ch = 0; ch < s->channels; ch++)
- acc += *p++;
- *data++ = (float) acc / -INT16_MIN / s->channels;
+ acc += p[ch];
+ *data++ = (float) acc / s->channels / -INT16_MIN;
}
}
+static void
+spectrum_decode_16_2 (struct spectrum *s, int sample)
+{
+ size_t n = s->useful_bins;
+ float *data = s->data + n;
+ for (int16_t *p = (int16_t *) s->buffer + sample * n * 2; n--; p += 2)
+ *data++ = ((int32_t) p[0] + p[1]) / 2. / -INT16_MIN;
+}
+
// - - Spectrum analysis - - - - - - - - - - - - - - - - - - - - - - - - - - - -
static const char *spectrum_bars[] =
@@ -803,6 +812,10 @@ spectrum_init (struct spectrum *s, char *format, int bars, struct error **e)
if (s->bits == 8) s->decode = spectrum_decode_8;
if (s->bits == 16) s->decode = spectrum_decode_16;
+ // Micro-optimize to achieve some piece of mind; it's weak but measurable
+ if (s->bits == 16 && s->channels == 2)
+ s->decode = spectrum_decode_16_2;
+
s->buffer_size = s->samples * s->useful_bins * s->bits / 8 * s->channels;
s->buffer = xcalloc (1, s->buffer_size);