The patch below fixes the problem by open-coding the ntohl() call and
combining it with a byte-by-byte load of the 32-bit word.
static inline void LOAD_OP(int I, u32 *W, const u8 *input)
{
static inline void LOAD_OP(int I, u32 *W, const u8 *input)
{
- W[I] = ntohl( ((u32*)(input))[I] );
+ W[I] = ( ((u32) input[I + 0] << 24)
+ | ((u32) input[I + 1] << 16)
+ | ((u32) input[I + 2] << 8)
+ | ((u32) input[I + 3]));
}
static inline void BLEND_OP(int I, u32 *W)
}
static inline void BLEND_OP(int I, u32 *W)