some speed improvements

This commit is contained in:
Christian Kroll 2012-09-04 01:45:29 +00:00
parent bc59653e9b
commit bcb5e146ce

View file

@ -11,6 +11,7 @@
#include <assert.h> #include <assert.h>
#include <stdint.h> #include <stdint.h>
#include <stddef.h>
#include "../config.h" #include "../config.h"
#include "../pixel.h" #include "../pixel.h"
#include "../util.h" #include "../util.h"
@ -346,38 +347,50 @@ static void fixDrawPattern(fixp_t const t_start,
for (fixp_t t = t_start; t < t_stop; t += t_delta) for (fixp_t t = t_start; t < t_stop; t += t_delta)
{ {
// For performance reasons the pattern is drawn to an off-screen buffer // For performance reasons the pattern is drawn to an off-screen buffer
// without distributing bits of higher planes down to lower ones. This // without distributing bits of higher planes down to lower ones.
// is done afterwards when the off-screen contents are copied to the ptrdiff_t nRowColOffset = 0;
// actual frame buffer. for (unsigned char y = 0; y < UNUM_ROWS; ++y)
for (unsigned char y = 0; y < NUM_ROWS; ++y)
{ {
for (unsigned char x = 0; x < (LINEBYTES * 8u); ++x) for (unsigned char x = 0; x < (LINEBYTES * 8u); ++x)
{ {
pOffScreen[fpPattern(x, y, t, r)][y][x / 8] |= shl_table[x % 8]; // Since multidimensional subscript expressions are rather
// expensive, we just resolve the first dimension (which
// represents the plane) and add an offset that correlates to
// the currently processed row and column.
*(&pOffScreen[fpPattern(x, y, t, r)][0][0] + nRowColOffset) |=
shl_table[x % 8u];
// increment the offset after completion of a byte
if ((x % 8u) == 7u)
{
nRowColOffset++;
}
} }
} }
// last byte of the frame buffer // one byte behind the frame buffer
unsigned char *pPixmap = unsigned char *pPixmap =
&pixmap[NUMPLANE - 1][NUM_ROWS - 1][LINEBYTES - 1]; (&pixmap[NUMPLANE - 1][NUM_ROWS - 1][LINEBYTES - 1]) + 1;
// last byte of the off-screen buffer // one byte behind the off-screen buffer
unsigned char *pOffscreenDistHigh = unsigned char *pOffscreenDistHigh =
&pOffScreen[NUMPLANE][NUM_ROWS - 1][LINEBYTES - 1]; (&pOffScreen[NUMPLANE][NUM_ROWS - 1][LINEBYTES - 1]) + 1;
// last byte of the second last plane of the off-screen buffer // one byte behind the second last plane of the off-screen buffer
unsigned char *pOffscreenDistLow = unsigned char *pOffscreenDistLow =
&pOffScreen[NUMPLANE - 1][NUM_ROWS - 1][LINEBYTES - 1]; (&pOffScreen[NUMPLANE - 1][NUM_ROWS - 1][LINEBYTES - 1]) + 1;
// Here we transcribe the off-screen contents to the actual frame buffer // Here we transcribe the off-screen contents to the actual frame buffer
// by distributing down 8 bits in parallel per iteration. We start at // by distributing down 8 bits in parallel per iteration. We start at
// the end of both buffers and move backwards through their space. // the end of both buffers and move backwards through their space.
// The pre-decrement operator is used so that GCC utilizes the AVR's
// built-in pre-decrement variants of the "ld" and "st" instructions.
while (pPixmap >= (unsigned char *)pixmap) // stop at the beginning while (pPixmap >= (unsigned char *)pixmap) // stop at the beginning
{ {
// actually draw off-screen contents // actually draw off-screen contents
*(pPixmap--) = *pOffscreenDistHigh; *(--pPixmap) = *(--pOffscreenDistHigh);
// distribute bits down to the next lower plane // distribute bits down to the next lower plane
*(pOffscreenDistLow--) |= *pOffscreenDistHigh; *(--pOffscreenDistLow) |= *pOffscreenDistHigh;
// clear already drawn off-screen contents // clear already drawn off-screen contents
*(pOffscreenDistHigh--) = 0; *pOffscreenDistHigh = 0;
} }
// wait a moment to ensure that the current frame is visible // wait a moment to ensure that the current frame is visible
@ -447,10 +460,10 @@ static unsigned char fixAnimPlasma(unsigned char const x,
{ {
p->fFunc2CosArg = NUM_ROWS * fixCos(t) + fixScaleUp(NUM_ROWS); p->fFunc2CosArg = NUM_ROWS * fixCos(t) + fixScaleUp(NUM_ROWS);
p->fFunc2SinArg = NUM_COLS * fixSin(t) + fixScaleUp(NUM_COLS); p->fFunc2SinArg = NUM_COLS * fixSin(t) + fixScaleUp(NUM_COLS);
} for (unsigned char i = NUM_COLS; i--;)
if (y == 0) {
{ p->fFunc1[i] = fixSin(fixMul(fixScaleUp(i), fPlasmaX) + t);
p->fFunc1[x] = fixSin(fixMul(fixScaleUp(x), fPlasmaX) + t); }
} }
fixp_t const fFunc2 = fixSin(fixMul(fixDist(fixScaleUp(x), fixScaleUp(y), fixp_t const fFunc2 = fixSin(fixMul(fixDist(fixScaleUp(x), fixScaleUp(y),
@ -458,7 +471,7 @@ static unsigned char fixAnimPlasma(unsigned char const x,
uint8_t const nRes = fixScaleDown(fixDiv(fixMul(p->fFunc1[x] + fFunc2 + uint8_t const nRes = fixScaleDown(fixDiv(fixMul(p->fFunc1[x] + fFunc2 +
fixScaleUp(2), fixScaleUp(NUMPLANE - 1)), fixScaleUp(2))); fixScaleUp(2), fixScaleUp(NUMPLANE - 1)), fixScaleUp(2)));
assert (nRes <= 3); assert (nRes <= NUMPLANE);
return nRes; return nRes;
} }