- paddb mm0, mm1 // add (Prev_row/2) to Avg for each byte
- // Add 1st active group (Raw(x-bpp)/2) to Average with LBCarry
- movq mm1, mm3 // now use mm1 for getting LBCarrys
- pand mm1, mm2 // get LBCarrys for each byte where both
- // lsb's were == 1 (Only valid for active group)
- psrlq mm2, 1 // divide raw bytes by 2
- pand mm2, mm4 // clear invalid bit 7 of each byte
- paddb mm2, mm1 // add LBCarrys to (Raw(x-bpp)/2) for each byte
- pand mm2, mm6 // Leave only Active Group 1 bytes to add to Avg
- paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active byte
- // Add 2nd active group (Raw(x-bpp)/2) to Average with LBCarry
- psllq mm6, ShiftBpp // shift the mm6 mask to cover bytes 2 & 3
- movq mm2, mm0 // mov updated Raws to mm2
- psllq mm2, ShiftBpp // shift data to position correctly
- movq mm1, mm3 // now use mm1 for getting LBCarrys
- pand mm1, mm2 // get LBCarrys for each byte where both
- // lsb's were == 1 (Only valid for active group)
- psrlq mm2, 1 // divide raw bytes by 2
- pand mm2, mm4 // clear invalid bit 7 of each byte
- paddb mm2, mm1 // add LBCarrys to (Raw(x-bpp)/2) for each byte
- pand mm2, mm6 // Leave only Active Group 2 bytes to add to Avg
- paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active byte
-
- // Add rdd active group (Raw(x-bpp)/2) to Average with LBCarry
- psllq mm6, ShiftBpp // shift the mm6 mask to cover bytes 4 & 5
- movq mm2, mm0 // mov updated Raws to mm2
- psllq mm2, ShiftBpp // shift data to position correctly
- // Data only needs to be shifted once here to
- // get the correct x-bpp offset.
- movq mm1, mm3 // now use mm1 for getting LBCarrys
- pand mm1, mm2 // get LBCarrys for each byte where both
- // lsb's were == 1 (Only valid for active group)
- psrlq mm2, 1 // divide raw bytes by 2
- pand mm2, mm4 // clear invalid bit 7 of each byte
- paddb mm2, mm1 // add LBCarrys to (Raw(x-bpp)/2) for each byte
- pand mm2, mm6 // Leave only Active Group 2 bytes to add to Avg
- paddb mm0, mm2 // add (Raw/2) + LBCarrys to Avg for each Active byte
-
- // Add 4th active group (Raw(x-bpp)/2) to Average with LBCarry
- psllq mm6, ShiftBpp // shift the mm6 mask to cover bytes 6 & 7
- movq mm2, mm0 // mov updated Raws to mm2
- psllq mm2, ShiftBpp // shift data to position correctly
- // Data only needs to be shifted once here to
- // get the correct x-bpp offset.
+ paddb mm0, mm1 /* add (Prev_row/2) to Avg for each byte */
+ /* Add 1st active group (Raw(x-bpp)/2) to Average with LBCarry */
+ movq mm1, mm3 /* now use mm1 for getting LBCarrys */
+ pand mm1, mm2 /* get LBCarrys for each byte where both */
+ /* lsb's were == 1 (Only valid for active group) */
+ psrlq mm2, 1 /* divide raw bytes by 2 */
+ pand mm2, mm4 /* clear invalid bit 7 of each byte */
+ paddb mm2, mm1 /* add LBCarrys to (Raw(x-bpp)/2) for each byte */
+ pand mm2, mm6 /* Leave only Active Group 1 bytes to add to Avg */
+ paddb mm0, mm2 /* add (Raw/2) + LBCarrys to Avg for each Active byte */
+ /* Add 2nd active group (Raw(x-bpp)/2) to Average with LBCarry */
+ psllq mm6, ShiftBpp /* shift the mm6 mask to cover bytes 2 & 3 */
+ movq mm2, mm0 /* mov updated Raws to mm2 */
+ psllq mm2, ShiftBpp /* shift data to position correctly */
+ movq mm1, mm3 /* now use mm1 for getting LBCarrys */
+ pand mm1, mm2 /* get LBCarrys for each byte where both */
+ /* lsb's were == 1 (Only valid for active group) */
+ psrlq mm2, 1 /* divide raw bytes by 2 */
+ pand mm2, mm4 /* clear invalid bit 7 of each byte */
+ paddb mm2, mm1 /* add LBCarrys to (Raw(x-bpp)/2) for each byte */
+ pand mm2, mm6 /* Leave only Active Group 2 bytes to add to Avg */
+ paddb mm0, mm2 /* add (Raw/2) + LBCarrys to Avg for each Active byte */
+
+ /* Add rdd active group (Raw(x-bpp)/2) to Average with LBCarry */
+ psllq mm6, ShiftBpp /* shift the mm6 mask to cover bytes 4 & 5 */
+ movq mm2, mm0 /* mov updated Raws to mm2 */
+ psllq mm2, ShiftBpp /* shift data to position correctly */
+ /* Data only needs to be shifted once here to */
+ /* get the correct x-bpp offset. */
+ movq mm1, mm3 /* now use mm1 for getting LBCarrys */
+ pand mm1, mm2 /* get LBCarrys for each byte where both */
+ /* lsb's were == 1 (Only valid for active group) */
+ psrlq mm2, 1 /* divide raw bytes by 2 */
+ pand mm2, mm4 /* clear invalid bit 7 of each byte */
+ paddb mm2, mm1 /* add LBCarrys to (Raw(x-bpp)/2) for each byte */
+ pand mm2, mm6 /* Leave only Active Group 2 bytes to add to Avg */
+ paddb mm0, mm2 /* add (Raw/2) + LBCarrys to Avg for each Active byte */
+
+ /* Add 4th active group (Raw(x-bpp)/2) to Average with LBCarry */
+ psllq mm6, ShiftBpp /* shift the mm6 mask to cover bytes 6 & 7 */
+ movq mm2, mm0 /* mov updated Raws to mm2 */
+ psllq mm2, ShiftBpp /* shift data to position correctly */
+ /* Data only needs to be shifted once here to */
+ /* get the correct x-bpp offset. */