Rotate the image by entire strips instead of doing it pixel by pixel.
This seems to result in about 50% performance gain.
Closes #12739.
git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@66309
c3d73ce0-8a6f-49c7-b76d-
6d57e0e08775
}
unsigned char *data = image.GetData();
}
unsigned char *data = image.GetData();
- const unsigned char *source_data = M_IMGDATA->m_data;
unsigned char *target_data;
unsigned char *target_data;
- for (long j = 0; j < height; j++)
+ // we rotate the image in 21-pixel (63-byte) wide strips
+ // to make better use of cpu cache - memory transfers
+ // (note: while much better than single-pixel "strips",
+ // our vertical strips will still generally straddle cachelines)
+ for (long ii = 0; ii < width; )
- for (long i = 0; i < width; i++)
+ long next_ii = wxMin(ii + 21, width);
+
+ for (long j = 0; j < height; j++)
- if ( clockwise )
- {
- target_data = data + (((i+1)*height) - j - 1)*3;
- }
- else
+ const unsigned char *source_data
+ = M_IMGDATA->m_data + (j*width + ii)*3;
+
+ for (long i = ii; i < next_ii; i++)
- target_data = data + ((height*(width-1)) + j - (i*height))*3;
+ if ( clockwise )
+ {
+ target_data = data + (((i+1)*height) - j - 1)*3;
+ }
+ else
+ {
+ target_data = data + ((height*(width - 1 - i)) + j)*3;
+ }
+ memcpy( target_data, source_data, 3 );
+ source_data += 3;
- memcpy( target_data, source_data, 3 );
- source_data += 3;
}
const unsigned char *source_alpha = M_IMGDATA->m_alpha;
}
const unsigned char *source_alpha = M_IMGDATA->m_alpha;