14 #include "../stdafx.h" 
   15 #include "../video/video_driver.hpp" 
   16 #include "../table/sprites.h" 
   17 #include "32bpp_anim_sse4.hpp" 
   20 #include "../safeguards.h" 
   23 static FBlitter_32bppSSE4_Anim iFBlitter_32bppSSE4_Anim;
 
   32 IGNORE_UNINITIALIZED_WARNING_START
 
   33 template <BlitterMode mode, Blitter_32bppSSE2::ReadMode read_mode, Blitter_32bppSSE2::BlockType bt_last, 
bool translucent, 
bool animated>
 
   36   const byte * 
const remap = bp->
remap;
 
   38   uint16 *anim_line = this->anim_buf + ((uint32 *)bp->
dst - (uint32 *)_screen.dst_ptr) + bp->
top * this->anim_buf_width + bp->
left;
 
   39   int effective_width = bp->
width;
 
   42   const Blitter_32bppSSE_Base::SpriteData * 
const sd = (
const Blitter_32bppSSE_Base::SpriteData *) bp->
sprite;
 
   43   const SpriteInfo * 
const si = &sd->infos[zoom];
 
   44   const MapValue *src_mv_line = (
const MapValue *) &sd->data[si->mv_offset] + bp->
skip_top * si->sprite_width;
 
   45   const Colour *src_rgba_line = (
const Colour *) ((
const byte *) &sd->data[si->sprite_offset] + bp->
skip_top * si->sprite_line_size);
 
   47   if (read_mode != RM_WITH_MARGIN) {
 
   51   const MapValue *src_mv = src_mv_line;
 
   54   const __m128i a_cm        = ALPHA_CONTROL_MASK;
 
   55   const __m128i pack_low_cm = PACK_LOW_CONTROL_MASK;
 
   56   const __m128i tr_nom_base = TRANSPARENT_NOM_BASE;
 
   58   for (
int y = bp->
height; y != 0; y--) {
 
   60     const Colour *src = src_rgba_line + META_LENGTH;
 
   62     uint16 *anim = anim_line;
 
   64     if (read_mode == RM_WITH_MARGIN) {
 
   65       assert(bt_last == BT_NONE); 
 
   66       anim += src_rgba_line[0].
data;
 
   67       src += src_rgba_line[0].
data;
 
   68       dst += src_rgba_line[0].
data;
 
   70       const int width_diff = si->sprite_width - bp->
width;
 
   71       effective_width = bp->
width - (int) src_rgba_line[0].data;
 
   72       const int delta_diff = (int) src_rgba_line[1].data - width_diff;
 
   73       const int new_width = effective_width - delta_diff;
 
   74       effective_width = delta_diff > 0 ? new_width : effective_width;
 
   75       if (effective_width <= 0) 
goto next_line;
 
   81           for (uint x = (uint) effective_width; x > 0; x--) {
 
   84                 *anim = *(
const uint16*) src_mv;
 
   85                 *dst = (src_mv->m >= 
PALETTE_ANIM_START) ? AdjustBrightneSSE(this->LookupColourInPalette(src_mv->m), src_mv->v) : src->
data;
 
   91             if (animated) src_mv++;
 
   99         for (uint x = (uint) effective_width/2; x != 0; x--) {
 
  100           uint32 mvX2 = *((uint32 *) const_cast<MapValue *>(src_mv));
 
  101           __m128i srcABCD = _mm_loadl_epi64((
const __m128i*) src);
 
  102           __m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
 
  106             const byte m0 = mvX2;
 
  108               const Colour c0 = (this->LookupColourInPalette(m0).data & 0x00FFFFFF) | (src[0].data & 0xFF000000);
 
  109               InsertFirstUint32(AdjustBrightneSSE(c0, (byte) (mvX2 >> 8)).data, srcABCD);
 
  111             const byte m1 = mvX2 >> 16;
 
  113               const Colour c1 = (this->LookupColourInPalette(m1).data & 0x00FFFFFF) | (src[1].data & 0xFF000000);
 
  114               InsertSecondUint32(AdjustBrightneSSE(c1, (byte) (mvX2 >> 24)).data, srcABCD);
 
  118             const byte a0 = src[0].
a;
 
  119             const byte a1 = src[1].
a;
 
  123                 *(uint32*) anim = mvX2;
 
  124                 goto bmno_full_opacity;
 
  126               anim01 = (uint16) mvX2;
 
  127             } 
else if (a0 == 0) {
 
  129                 goto bmno_full_transparency;
 
  131                 if (a1 == 255) anim[1] = (uint16) (mvX2 >> 16);
 
  132                 goto bmno_alpha_blend;
 
  136               if (a1 == 255) anim01 |= mvX2 & 0xFFFF0000;
 
  137               *(uint32*) anim = anim01;
 
  139               anim[0] = (uint16) anim01;
 
  142             if (src[0].a) anim[0] = 0;
 
  143             if (src[1].a) anim[1] = 0;
 
  148           srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm);
 
  150           _mm_storel_epi64((__m128i *) dst, srcABCD);
 
  151 bmno_full_transparency:
 
  158         if ((bt_last == BT_NONE && effective_width & 1) || bt_last == BT_ODD) {
 
  160           } 
else if (src->
a == 255) {
 
  161             *anim = *(
const uint16*) src_mv;
 
  162             *dst = (src_mv->m >= 
PALETTE_ANIM_START) ? AdjustBrightneSSE(LookupColourInPalette(src_mv->m), src_mv->v) : *src;
 
  166             __m128i dstABCD = _mm_cvtsi32_si128(dst->
data);
 
  168               Colour colour = AdjustBrightneSSE(LookupColourInPalette(src_mv->m), src_mv->v);
 
  170               srcABCD = _mm_cvtsi32_si128(colour.
data);
 
  172               srcABCD = _mm_cvtsi32_si128(src->
data);
 
  174             dst->
data = _mm_cvtsi128_si32(AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm));
 
  180         for (uint x = (uint) effective_width / 2; x != 0; x--) {
 
  181           uint32 mvX2 = *((uint32 *) const_cast<MapValue *>(src_mv));
 
  182           __m128i srcABCD = _mm_loadl_epi64((
const __m128i*) src);
 
  183           __m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
 
  186           const uint m0 = (byte) mvX2;
 
  187           const uint r0 = remap[m0];
 
  188           const uint m1 = (byte) (mvX2 >> 16);
 
  189           const uint r1 = remap[m1];
 
  190           if (mvX2 & 0x00FF00FF) {
 
  191             #define CMOV_REMAP(m_colour, m_colour_init, m_src, m_m) \ 
  193               Colour m_colour = m_colour_init; \ 
  195               const Colour srcm = (Colour) (m_src); \ 
  196               const uint m = (byte) (m_m); \ 
  197               const uint r = remap[m]; \ 
  198               const Colour cmap = (this->LookupColourInPalette(r).data & 0x00FFFFFF) | (srcm.data & 0xFF000000); \ 
  199               m_colour = r == 0 ? m_colour : cmap; \ 
  200               m_colour = m != 0 ? m_colour : srcm; \ 
  203             uint64 srcs = _mm_cvtsi128_si64(srcABCD);
 
  205             if (animated) dsts = _mm_cvtsi128_si64(dstABCD);
 
  206             uint64 remapped_src = 0;
 
  207             CMOV_REMAP(c0, animated ? dsts : 0, srcs, mvX2);
 
  208             remapped_src = c0.
data;
 
  209             CMOV_REMAP(c1, animated ? dsts >> 32 : 0, srcs >> 32, mvX2 >> 16);
 
  210             remapped_src |= (uint64) c1.
data << 32;
 
  211             srcABCD = _mm_cvtsi64_si128(remapped_src);
 
  214             CMOV_REMAP(c0, animated ? _mm_cvtsi128_si32(dstABCD) : 0, _mm_cvtsi128_si32(srcABCD), mvX2);
 
  215             remapped_src[0] = c0.
data;
 
  216             CMOV_REMAP(c1, animated ? dst[1] : 0, src[1], mvX2 >> 16);
 
  217             remapped_src[1] = c1.
data;
 
  218             srcABCD = _mm_loadl_epi64((__m128i*) &remapped_src);
 
  221             if ((mvX2 & 0xFF00FF00) != 0x80008000) srcABCD = AdjustBrightnessOfTwoPixels(srcABCD, mvX2);
 
  226             const byte a0 = src[0].
a;
 
  227             const byte a1 = src[1].
a;
 
  228             uint32 anim01 = mvX2 & 0xFF00FF00;
 
  232                 *(uint32*) anim = anim01 | (r1 << 16);
 
  233                 goto bmcr_full_opacity;
 
  235             } 
else if (a0 == 0) {
 
  237                 goto bmcr_full_transparency;
 
  240                   anim[1] = r1 | (anim01 >> 16);
 
  242                 goto bmcr_alpha_blend;
 
  246               if (a1 == 255) anim01 |= r1 << 16;
 
  247               *(uint32*) anim = anim01;
 
  249               anim[0] = (uint16) anim01;
 
  252             if (src[0].a) anim[0] = 0;
 
  253             if (src[1].a) anim[1] = 0;
 
  258           srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm);
 
  260           _mm_storel_epi64((__m128i *) dst, srcABCD);
 
  261 bmcr_full_transparency:
 
  268         if ((bt_last == BT_NONE && effective_width & 1) || bt_last == BT_ODD) {
 
  271           if (src->
a == 0) 
break;
 
  273             const uint r = remap[src_mv->m];
 
  274             *anim = (animated && src->
a == 255) ? r | ((uint16) src_mv->v << 8 ) : 0;
 
  276               Colour remapped_colour = AdjustBrightneSSE(this->LookupColourInPalette(r), src_mv->v);
 
  278                 *dst = remapped_colour;
 
  280                 remapped_colour.
a = src->
a;
 
  281                 srcABCD = _mm_cvtsi32_si128(remapped_colour.
data);
 
  282                 goto bmcr_alpha_blend_single;
 
  287             srcABCD = _mm_cvtsi32_si128(src->
data);
 
  289 bmcr_alpha_blend_single:
 
  290               __m128i dstABCD = _mm_cvtsi32_si128(dst->
data);
 
  291               srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm);
 
  293             dst->
data = _mm_cvtsi128_si32(srcABCD);
 
  300         for (uint x = (uint) bp->
width / 2; x > 0; x--) {
 
  301           __m128i srcABCD = _mm_loadl_epi64((
const __m128i*) src);
 
  302           __m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
 
  303           _mm_storel_epi64((__m128i *) dst, DarkenTwoPixels(srcABCD, dstABCD, a_cm, tr_nom_base));
 
  307           if (src[-2].a) anim[-2] = 0;
 
  308           if (src[-1].a) anim[-1] = 0;
 
  311         if ((bt_last == BT_NONE && bp->
width & 1) || bt_last == BT_ODD) {
 
  312           __m128i srcABCD = _mm_cvtsi32_si128(src->
data);
 
  313           __m128i dstABCD = _mm_cvtsi32_si128(dst->
data);
 
  314           dst->
data = _mm_cvtsi128_si32(DarkenTwoPixels(srcABCD, dstABCD, a_cm, tr_nom_base));
 
  315           if (src[0].a) anim[0] = 0;
 
  320         for (uint x = (uint) bp->
width; x > 0; x--) {
 
  321           if (src_mv->m == 0) {
 
  323               uint8 g = MakeDark(src->r, src->g, src->b);
 
  324               *dst = ComposeColourRGBA(g, g, g, src->
a, *dst);
 
  328             uint r = remap[src_mv->m];
 
  329             if (r != 0) *dst = ComposeColourPANoCheck(this->AdjustBrightness(this->LookupColourInPalette(r), src_mv->v), src->
a, *dst);
 
  339         for (uint x = (uint) bp->
width; x > 0; x--) {
 
  354     src_rgba_line = (
const Colour*) ((
const byte*) src_rgba_line + si->sprite_line_size);
 
  355     dst_line += bp->
pitch;
 
  356     anim_line += this->anim_buf_width;
 
  359 IGNORE_UNINITIALIZED_WARNING_STOP
 
  370   const Blitter_32bppSSE_Base::SpriteFlags sprite_flags = ((
const Blitter_32bppSSE_Base::SpriteData *) bp->
sprite)->flags;
 
  375         const BlockType bt_last = (BlockType) (bp->
width & 1);
 
  376         if (bt_last == BT_EVEN) {
 
  377           if (sprite_flags & SF_NO_ANIM) Draw<BM_NORMAL, RM_WITH_SKIP, BT_EVEN, true, false>(bp, zoom);
 
  378           else                           Draw<BM_NORMAL, RM_WITH_SKIP, BT_EVEN, true, true>(bp, zoom);
 
  380           if (sprite_flags & SF_NO_ANIM) Draw<BM_NORMAL, RM_WITH_SKIP, BT_ODD, true, false>(bp, zoom);
 
  381           else                           Draw<BM_NORMAL, RM_WITH_SKIP, BT_ODD, true, true>(bp, zoom);
 
  385         if (sprite_flags & SF_TRANSLUCENT) {
 
  386           if (sprite_flags & SF_NO_ANIM) Draw<BM_NORMAL, RM_WITH_MARGIN, BT_NONE, true, false>(bp, zoom);
 
  387           else                           Draw<BM_NORMAL, RM_WITH_MARGIN, BT_NONE, true, true>(bp, zoom);
 
  389           if (sprite_flags & SF_NO_ANIM) Draw<BM_NORMAL, RM_WITH_MARGIN, BT_NONE, false, false>(bp, zoom);
 
  390           else                           Draw<BM_NORMAL, RM_WITH_MARGIN, BT_NONE, false, true>(bp, zoom);
 
  393         if (sprite_flags & SF_NO_ANIM) Draw<BM_NORMAL, RM_WITH_MARGIN, BT_NONE, true, false>(bp, zoom);
 
  394         else                           Draw<BM_NORMAL, RM_WITH_MARGIN, BT_NONE, true, true>(bp, zoom);
 
  400       if (sprite_flags & SF_NO_REMAP) 
goto bm_normal;
 
  402         if (sprite_flags & SF_NO_ANIM) Draw<BM_COLOUR_REMAP, RM_WITH_SKIP, BT_NONE, true, false>(bp, zoom);
 
  403         else                           Draw<BM_COLOUR_REMAP, RM_WITH_SKIP, BT_NONE, true, true>(bp, zoom);
 
  405         if (sprite_flags & SF_NO_ANIM) Draw<BM_COLOUR_REMAP, RM_WITH_MARGIN, BT_NONE, true, false>(bp, zoom);
 
  406         else                           Draw<BM_COLOUR_REMAP, RM_WITH_MARGIN, BT_NONE, true, true>(bp, zoom);
 
  409     case BM_TRANSPARENT:  Draw<BM_TRANSPARENT, RM_NONE, BT_NONE, true, true>(bp, zoom); 
return;
 
  410     case BM_CRASH_REMAP:  Draw<BM_CRASH_REMAP, RM_NONE, BT_NONE, true, true>(bp, zoom); 
return;
 
  411     case BM_BLACK_REMAP:  Draw<BM_BLACK_REMAP, RM_NONE, BT_NONE, true, true>(bp, zoom); 
return;