/*
 * Automatically Tuned Linear Algebra Software v3.11.38
 * Copyright (C) 2012 R. Clint Whaley
 */
#include "atlas_asm.h"
#ifndef KB
   #define KB 0
#endif
/*
 * innermost (K-) loop items get priority on 1st 7 regs
 */
#define pA      %rcx
#define pB      %rdi
#define i256    %rax
#define i768    %rdx   /* 3 * 256 */
#define i1280   %rsi   /* 5 * 256 */
#define i1792   %rbx   /* 7 * 256 */
/*
 * Second (N-) loop items get next level of priority on good regs
 */
#define pC      %rbp
#define pfA     %r12
#define pfB     %r8
#define incPF   %r9
#define nnu     %r10
/*
 * Outer- (M-) loop variables assigned to any regs
 */
#define nmu     %r13
#define pB0     %r14
#define nnu0    %r15
/*
 * floating point registers
 */
#define a0      %xmm0
#define b0      %xmm1
#define rC0     %xmm2
#define rC1     %xmm3
#define rC2     %xmm4
#define rC3     %xmm5
#define rC4     %xmm6
#define rC5     %xmm7
#define rC6     %xmm8
#define rC7     %xmm9
#define rC8     %xmm10
#define rC9     %xmm11
#define rC10    %xmm12
#define rC11    %xmm13
/*
                    rdi      rsi    rdx        rcx         r8        r9
void ATL_USERMM(SZT nmu, SZT nnu, SZT K, CTYPE *pA, CTYPE *pB, TYPE *pC,
                  8(%rsp)    16(%rsp)     24(%rsp)
                CTYPE *pAn, CTYPE *pBn, CTYPE *pCn);
 */

#define FSIZE 6*8
#ifndef prefA
   #define prefA prefetcht0
#endif
#ifndef prefB
   #define prefB prefetcht0
#endif
#ifndef prefC
   #ifdef ATL_3DNow
      #define prefC prefetchw
   #else
      #define prefC prefetcht0
   #endif
#endif
#ifdef BETAN1
   #define BETCOP subpd
#else
   #define BETCOP addpd
#endif
/*
                    rdi      rsi    rdx        rcx         r8        r9
void ATL_USERMM(SZT nmu, SZT nnu, SZT K, CTYPE *pA, CTYPE *pB, TYPE *pC,
                  8(%rsp)    16(%rsp)     24(%rsp)
                CTYPE *pAn, CTYPE *pBn, CTYPE *pCn);
 */
.text
.global ATL_asmdecor(ATL_USERMM)
ALIGN16
ATL_asmdecor(ATL_USERMM):
/*
 * Save callee-saved iregs
 */
   sub $FSIZE, %rsp
   movq    %rbp, 0(%rsp)
   movq    %rbx, 8(%rsp)
   movq    %r12, 16(%rsp)
   movq    %r13, 24(%rsp)
   movq    %r14, 32(%rsp)
   movq    %r15, 40(%rsp)
/*
 * Load paramaters
 */
   mov %rdi, nmu
   mov %rsi, nnu
   mov %r8, pB
   mov %r9, pC
   mov nnu, nnu0
   movq FSIZE+8(%rsp), pfB      /* pfB = pAn */
   movq FSIZE+16(%rsp), pfA     /* pf = pBn */
   cmp pfA, pB
   CMOVE pfB, pfA
   CMOVEq FSIZE+24(%rsp), pfB
   mov $2*12*8, incPF           /* incPF = mu*nu*sizeof */
/*
 * Extend range of 1-byte offsets  by starting at -128
 */
   sub $-128, pA
   sub $-128, pB
   sub $-128, pC
   sub $-128, pfA
   sub $-128, pfB
   movq pB, pB0
   mov $256, i256
   lea (i256, i256,2), i768
   lea (i256, i256,4), i1280
   lea (i256, i768,2), i1792
   ALIGN8
   .local MNLOOP
   MNLOOP:
/*
      .local NLOOP
      NLOOP:
*/
/*
 *       Peel first iteration of K loop to initialize rCx
 */
         movapd -128(pA), a0
         movddup -128(pB), rC0
         mulpd a0, rC0
            prefC -128(pC)
         movddup -120(pB), rC1
         mulpd a0, rC1
         movddup -112(pB), rC2
         mulpd a0, rC2
         movddup -104(pB), rC3
         mulpd a0, rC3
         movddup -96(pB), rC4
         mulpd a0, rC4
         movddup -88(pB), rC5
         mulpd a0, rC5
         movddup -80(pB), rC6
         mulpd a0, rC6
         movddup -72(pB), rC7
         mulpd a0, rC7
         movddup -64(pB), rC8
         mulpd a0, rC8
         movddup -56(pB), rC9
         mulpd a0, rC9
         movddup -48(pB), rC10
         mulpd a0, rC10
         movddup -40(pB), rC11
         mulpd a0, rC11
/*
 *       Fully unrolled K-loop
 */
         #if KB > 1
            movapd -112(pA), a0
            movddup -32(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup -24(pB), b0
               prefC (pC)
            mulpd a0, b0
            addpd b0, rC1
            movddup -16(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup -8(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup (pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 8(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 16(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 24(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 32(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 40(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 48(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 56(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 2
            movapd -96(pA), a0
            movddup 64(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 72(pB), b0
            prefA -128(pfA)
            mulpd a0, b0
            addpd b0, rC1
            movddup 80(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 88(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 96(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 104(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 112(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 120(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup -128(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup -120(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup -112(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup -104(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 3
            movapd -80(pA), a0
            movddup -96(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup -88(pB,i256), b0
            prefA -64(pfA)
            mulpd a0, b0
            addpd b0, rC1
            movddup -80(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup -72(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup -64(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup -56(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup -48(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup -40(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup -32(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup -24(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup -16(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup -8(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 4
            movapd -64(pA), a0
            movddup (pB,i256), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 8(pB,i256), b0
            prefA (pfA)
            mulpd a0, b0
            addpd b0, rC1
            movddup 16(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 24(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 32(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 40(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 48(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 56(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 64(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 72(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 80(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 88(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 5
            movapd -48(pA), a0
            movddup 96(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 104(pB,i256), b0
            prefB -128(pfB)
            mulpd a0, b0
            addpd b0, rC1
            movddup 112(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 120(pB,i256), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup -128(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup -120(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup -112(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup -104(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup -96(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup -88(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup -80(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup -72(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 6
            movapd -32(pA), a0
            movddup -64(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup -56(pB,i256,2), b0
            prefB -64(pfB)
            mulpd a0, b0
            addpd b0, rC1
            movddup -48(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup -40(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup -32(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup -24(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup -16(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup -8(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup (pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 8(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 16(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 24(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 7
            movapd -16(pA), a0
            movddup 32(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 40(pB,i256,2), b0
            prefB (pfB)
            mulpd a0, b0
            addpd b0, rC1
            movddup 48(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 56(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 64(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 72(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 80(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 88(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 96(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 104(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 112(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 120(pB,i256,2), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 8
            movapd (pA), a0
            movddup -128(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup -120(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup -112(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup -104(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup -96(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup -88(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup -80(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup -72(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup -64(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup -56(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup -48(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup -40(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 9
            movapd 16(pA), a0
            movddup -32(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup -24(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup -16(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup -8(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup (pB,i768), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 8(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 16(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 24(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 32(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 40(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 48(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 56(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 10
            movapd 32(pA), a0
            movddup 64(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 72(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 80(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 88(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 96(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 104(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 112(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 120(pB,i768), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup -128(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup -120(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup -112(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup -104(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 11
            movapd 48(pA), a0
            movddup -96(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup -88(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup -80(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup -72(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup -64(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup -56(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup -48(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup -40(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup -32(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup -24(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup -16(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup -8(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 12
            movapd 64(pA), a0
            movddup (pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 8(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 16(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 24(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 32(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 40(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 48(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 56(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 64(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 72(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 80(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 88(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 13
            movapd 80(pA), a0
            movddup 96(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 104(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 112(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 120(pB,i256,4), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup -128(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup -120(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup -112(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup -104(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup -96(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup -88(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup -80(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup -72(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 14
            movapd 96(pA), a0
            movddup -64(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup -56(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup -48(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup -40(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup -32(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup -24(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup -16(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup -8(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup (pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 8(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 16(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 24(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 15
            movapd 112(pA), a0
            movddup 32(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 40(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 48(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 56(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 64(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 72(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 80(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 88(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 96(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 104(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 112(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 120(pB,i1280), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 16
            movapd -128(pA,i256), a0
            movddup -128(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup -120(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup -112(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup -104(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup -96(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup -88(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup -80(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup -72(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup -64(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup -56(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup -48(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup -40(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 17
            movapd -112(pA,i256), a0
            movddup -32(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup -24(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup -16(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup -8(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup (pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 8(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 16(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 24(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 32(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 40(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 48(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 56(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 18
            movapd -96(pA,i256), a0
            movddup 64(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 72(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 80(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 88(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 96(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 104(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 112(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 120(pB,i768,2), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup -128(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup -120(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup -112(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup -104(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 19
            movapd -80(pA,i256), a0
            movddup -96(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup -88(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup -80(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup -72(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup -64(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup -56(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup -48(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup -40(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup -32(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup -24(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup -16(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup -8(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 20
            movapd -64(pA,i256), a0
            movddup (pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 8(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 16(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 24(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 32(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 40(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 48(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 56(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 64(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 72(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 80(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 88(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 21
            movapd -48(pA,i256), a0
            movddup 96(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 104(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 112(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 120(pB,i1792), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup -128(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup -120(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup -112(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup -104(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup -96(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup -88(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup -80(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup -72(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 22
            movapd -32(pA,i256), a0
            movddup -64(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup -56(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup -48(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup -40(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup -32(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup -24(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup -16(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup -8(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup (pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 8(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 16(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 24(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 23
            movapd -16(pA,i256), a0
            movddup 32(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 40(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 48(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 56(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 64(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 72(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 80(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 88(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 96(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 104(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 112(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 120(pB,i256,8), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 24
            movapd (pA,i256), a0
            movddup 2176(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 2184(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 2192(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 2200(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 2208(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 2216(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 2224(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 2232(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 2240(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 2248(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 2256(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 2264(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 25
            movapd 16(pA,i256), a0
            movddup 2272(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 2280(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 2288(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 2296(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 2304(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 2312(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 2320(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 2328(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 2336(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 2344(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 2352(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 2360(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 26
            movapd 32(pA,i256), a0
            movddup 2368(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 2376(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 2384(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 2392(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 2400(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 2408(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 2416(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 2424(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup -128(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup -120(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup -112(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup -104(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 27
            movapd 48(pA,i256), a0
            movddup -96(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup -88(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup -80(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup -72(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup -64(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup -56(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup -48(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup -40(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup -32(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup -24(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup -16(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup -8(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 28
            movapd 64(pA,i256), a0
            movddup (pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 8(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 16(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 24(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 32(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 40(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 48(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 56(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 64(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 72(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 80(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 88(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 29
            movapd 80(pA,i256), a0
            movddup 96(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 104(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 112(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 120(pB,i1280,2), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 2688(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 2696(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 2704(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 2712(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 2720(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 2728(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 2736(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 2744(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 30
            movapd 96(pA,i256), a0
            movddup 2752(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 2760(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 2768(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 2776(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 2784(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 2792(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 2800(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 2808(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 2816(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 2824(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 2832(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 2840(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 31
            movapd 112(pA,i256), a0
            movddup 2848(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 2856(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 2864(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 2872(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 2880(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 2888(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 2896(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 2904(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 2912(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 2920(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 2928(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 2936(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 32
            movapd -128(pA,i256,2), a0
            movddup -128(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup -120(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup -112(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup -104(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup -96(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup -88(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup -80(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup -72(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup -64(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup -56(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup -48(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup -40(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 33
            movapd -112(pA,i256,2), a0
            movddup -32(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup -24(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup -16(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup -8(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup (pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 8(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 16(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 24(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 32(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 40(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 48(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 56(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 34
            movapd -96(pA,i256,2), a0
            movddup 64(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 72(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 80(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 88(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 96(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 104(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 112(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 120(pB,i768,4), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 3200(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 3208(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 3216(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 3224(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 35
            movapd -80(pA,i256,2), a0
            movddup 3232(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 3240(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 3248(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 3256(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 3264(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 3272(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 3280(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 3288(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 3296(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 3304(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 3312(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 3320(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 36
            movapd -64(pA,i256,2), a0
            movddup 3328(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 3336(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 3344(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 3352(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 3360(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 3368(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 3376(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 3384(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 3392(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 3400(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 3408(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 3416(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 37
            movapd -48(pA,i256,2), a0
            movddup 3424(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 3432(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 3440(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 3448(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup -128(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup -120(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup -112(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup -104(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup -96(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup -88(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup -80(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup -72(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 38
            movapd -32(pA,i256,2), a0
            movddup -64(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup -56(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup -48(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup -40(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup -32(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup -24(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup -16(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup -8(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup (pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 8(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 16(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 24(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 39
            movapd -16(pA,i256,2), a0
            movddup 32(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 40(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 48(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 56(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 64(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 72(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 80(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 88(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 96(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 104(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 112(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 120(pB,i1792,2), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 40
            movapd (pA,i256,2), a0
            movddup 3712(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 3720(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 3728(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 3736(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 3744(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 3752(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 3760(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 3768(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 3776(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 3784(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 3792(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 3800(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 41
            movapd 16(pA,i256,2), a0
            movddup 3808(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 3816(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 3824(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 3832(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 3840(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 3848(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 3856(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 3864(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 3872(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 3880(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 3888(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 3896(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 42
            movapd 32(pA,i256,2), a0
            movddup 3904(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 3912(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 3920(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 3928(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 3936(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 3944(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 3952(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 3960(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 3968(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 3976(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 3984(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 3992(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 43
            movapd 48(pA,i256,2), a0
            movddup 4000(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 4008(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 4016(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 4024(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 4032(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 4040(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 4048(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 4056(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 4064(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 4072(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 4080(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 4088(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 44
            movapd 64(pA,i256,2), a0
            movddup 4096(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 4104(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 4112(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 4120(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 4128(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 4136(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 4144(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 4152(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 4160(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 4168(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 4176(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 4184(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 45
            movapd 80(pA,i256,2), a0
            movddup 4192(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 4200(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 4208(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 4216(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 4224(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 4232(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 4240(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 4248(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 4256(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 4264(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 4272(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 4280(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 46
            movapd 96(pA,i256,2), a0
            movddup 4288(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 4296(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 4304(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 4312(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 4320(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 4328(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 4336(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 4344(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 4352(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 4360(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 4368(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 4376(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 47
            movapd 112(pA,i256,2), a0
            movddup 4384(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 4392(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 4400(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 4408(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 4416(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 4424(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 4432(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 4440(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 4448(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 4456(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 4464(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 4472(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 48
            movapd -128(pA,i768), a0
            movddup 4480(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 4488(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 4496(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 4504(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 4512(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 4520(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 4528(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 4536(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 4544(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 4552(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 4560(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 4568(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 49
            movapd -112(pA,i768), a0
            movddup 4576(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 4584(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 4592(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 4600(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 4608(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 4616(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 4624(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 4632(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 4640(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 4648(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 4656(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 4664(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 50
            movapd -96(pA,i768), a0
            movddup 4672(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 4680(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 4688(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 4696(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 4704(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 4712(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 4720(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 4728(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 4736(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 4744(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 4752(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 4760(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 51
            movapd -80(pA,i768), a0
            movddup 4768(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 4776(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 4784(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 4792(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 4800(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 4808(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 4816(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 4824(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 4832(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 4840(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 4848(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 4856(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 52
            movapd -64(pA,i768), a0
            movddup 4864(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 4872(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 4880(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 4888(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 4896(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 4904(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 4912(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 4920(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 4928(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 4936(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 4944(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 4952(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 53
            movapd -48(pA,i768), a0
            movddup 4960(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 4968(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 4976(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 4984(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 4992(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 5000(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 5008(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 5016(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 5024(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 5032(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 5040(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 5048(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 54
            movapd -32(pA,i768), a0
            movddup 5056(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 5064(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 5072(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 5080(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 5088(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 5096(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 5104(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 5112(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 5120(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 5128(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 5136(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 5144(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 55
            movapd -16(pA,i768), a0
            movddup 5152(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 5160(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 5168(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 5176(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 5184(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 5192(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 5200(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 5208(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 5216(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 5224(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 5232(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 5240(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 56
            movapd (pA,i768), a0
            movddup 5248(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 5256(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 5264(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 5272(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 5280(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 5288(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 5296(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 5304(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 5312(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 5320(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 5328(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 5336(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 57
            movapd 16(pA,i768), a0
            movddup 5344(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 5352(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 5360(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 5368(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 5376(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 5384(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 5392(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 5400(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 5408(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 5416(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 5424(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 5432(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 58
            movapd 32(pA,i768), a0
            movddup 5440(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 5448(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 5456(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 5464(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 5472(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 5480(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 5488(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 5496(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 5504(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 5512(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 5520(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 5528(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 59
            movapd 48(pA,i768), a0
            movddup 5536(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 5544(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 5552(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 5560(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 5568(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 5576(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 5584(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 5592(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 5600(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 5608(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 5616(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 5624(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 60
            movapd 64(pA,i768), a0
            movddup 5632(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 5640(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 5648(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 5656(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 5664(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 5672(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 5680(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 5688(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 5696(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 5704(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 5712(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 5720(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 61
            movapd 80(pA,i768), a0
            movddup 5728(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 5736(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 5744(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 5752(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 5760(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 5768(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 5776(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 5784(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 5792(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 5800(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 5808(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 5816(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 62
            movapd 96(pA,i768), a0
            movddup 5824(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 5832(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 5840(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 5848(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 5856(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 5864(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 5872(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 5880(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 5888(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 5896(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 5904(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 5912(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 63
            movapd 112(pA,i768), a0
            movddup 5920(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 5928(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 5936(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 5944(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 5952(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 5960(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 5968(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 5976(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 5984(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 5992(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 6000(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 6008(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 64
            movapd -128(pA,i256,4), a0
            movddup 6016(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 6024(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 6032(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 6040(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 6048(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 6056(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 6064(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 6072(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 6080(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 6088(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 6096(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 6104(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 65
            movapd -112(pA,i256,4), a0
            movddup 6112(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 6120(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 6128(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 6136(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 6144(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 6152(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 6160(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 6168(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 6176(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 6184(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 6192(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 6200(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 66
            movapd -96(pA,i256,4), a0
            movddup 6208(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 6216(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 6224(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 6232(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 6240(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 6248(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 6256(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 6264(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 6272(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 6280(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 6288(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 6296(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 67
            movapd -80(pA,i256,4), a0
            movddup 6304(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 6312(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 6320(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 6328(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 6336(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 6344(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 6352(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 6360(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 6368(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 6376(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 6384(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 6392(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 68
            movapd -64(pA,i256,4), a0
            movddup 6400(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 6408(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 6416(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 6424(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 6432(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 6440(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 6448(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 6456(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 6464(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 6472(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 6480(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 6488(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 69
            movapd -48(pA,i256,4), a0
            movddup 6496(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 6504(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 6512(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 6520(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 6528(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 6536(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 6544(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 6552(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 6560(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 6568(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 6576(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 6584(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 70
            movapd -32(pA,i256,4), a0
            movddup 6592(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 6600(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 6608(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 6616(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 6624(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 6632(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 6640(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 6648(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 6656(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 6664(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 6672(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 6680(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 71
            movapd -16(pA,i256,4), a0
            movddup 6688(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 6696(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 6704(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 6712(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 6720(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 6728(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 6736(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 6744(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 6752(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 6760(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 6768(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 6776(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 72
            movapd (pA,i256,4), a0
            movddup 6784(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 6792(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 6800(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 6808(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 6816(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 6824(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 6832(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 6840(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 6848(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 6856(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 6864(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 6872(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 73
            movapd 16(pA,i256,4), a0
            movddup 6880(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 6888(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 6896(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 6904(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 6912(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 6920(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 6928(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 6936(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 6944(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 6952(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 6960(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 6968(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 74
            movapd 32(pA,i256,4), a0
            movddup 6976(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 6984(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 6992(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 7000(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 7008(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 7016(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 7024(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 7032(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 7040(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 7048(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 7056(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 7064(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 75
            movapd 48(pA,i256,4), a0
            movddup 7072(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 7080(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 7088(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 7096(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 7104(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 7112(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 7120(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 7128(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 7136(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 7144(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 7152(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 7160(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 76
            movapd 64(pA,i256,4), a0
            movddup 7168(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 7176(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 7184(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 7192(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 7200(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 7208(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 7216(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 7224(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 7232(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 7240(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 7248(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 7256(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 77
            movapd 80(pA,i256,4), a0
            movddup 7264(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 7272(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 7280(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 7288(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 7296(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 7304(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 7312(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 7320(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 7328(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 7336(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 7344(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 7352(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 78
            movapd 96(pA,i256,4), a0
            movddup 7360(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 7368(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 7376(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 7384(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 7392(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 7400(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 7408(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 7416(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 7424(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 7432(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 7440(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 7448(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 79
            movapd 112(pA,i256,4), a0
            movddup 7456(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 7464(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 7472(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 7480(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 7488(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 7496(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 7504(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 7512(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 7520(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 7528(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 7536(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 7544(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 80
            movapd -128(pA,i1280), a0
            movddup 7552(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 7560(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 7568(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 7576(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 7584(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 7592(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 7600(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 7608(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 7616(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 7624(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 7632(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 7640(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 81
            movapd -112(pA,i1280), a0
            movddup 7648(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 7656(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 7664(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 7672(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 7680(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 7688(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 7696(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 7704(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 7712(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 7720(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 7728(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 7736(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 82
            movapd -96(pA,i1280), a0
            movddup 7744(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 7752(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 7760(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 7768(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 7776(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 7784(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 7792(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 7800(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 7808(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 7816(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 7824(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 7832(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 83
            movapd -80(pA,i1280), a0
            movddup 7840(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 7848(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 7856(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 7864(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 7872(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 7880(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 7888(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 7896(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 7904(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 7912(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 7920(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 7928(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 84
            movapd -64(pA,i1280), a0
            movddup 7936(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 7944(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 7952(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 7960(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 7968(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 7976(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 7984(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 7992(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 8000(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 8008(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 8016(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 8024(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 85
            movapd -48(pA,i1280), a0
            movddup 8032(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 8040(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 8048(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 8056(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 8064(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 8072(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 8080(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 8088(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 8096(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 8104(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 8112(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 8120(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 86
            movapd -32(pA,i1280), a0
            movddup 8128(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 8136(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 8144(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 8152(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 8160(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 8168(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 8176(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 8184(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 8192(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 8200(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 8208(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 8216(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 87
            movapd -16(pA,i1280), a0
            movddup 8224(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 8232(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 8240(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 8248(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 8256(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 8264(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 8272(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 8280(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 8288(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 8296(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 8304(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 8312(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 88
            movapd (pA,i1280), a0
            movddup 8320(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 8328(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 8336(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 8344(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 8352(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 8360(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 8368(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 8376(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 8384(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 8392(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 8400(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 8408(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 89
            movapd 16(pA,i1280), a0
            movddup 8416(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 8424(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 8432(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 8440(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 8448(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 8456(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 8464(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 8472(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 8480(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 8488(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 8496(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 8504(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 90
            movapd 32(pA,i1280), a0
            movddup 8512(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 8520(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 8528(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 8536(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 8544(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 8552(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 8560(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 8568(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 8576(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 8584(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 8592(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 8600(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 91
            movapd 48(pA,i1280), a0
            movddup 8608(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 8616(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 8624(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 8632(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 8640(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 8648(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 8656(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 8664(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 8672(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 8680(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 8688(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 8696(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 92
            movapd 64(pA,i1280), a0
            movddup 8704(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 8712(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 8720(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 8728(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 8736(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 8744(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 8752(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 8760(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 8768(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 8776(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 8784(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 8792(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 93
            movapd 80(pA,i1280), a0
            movddup 8800(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 8808(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 8816(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 8824(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 8832(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 8840(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 8848(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 8856(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 8864(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 8872(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 8880(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 8888(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 94
            movapd 96(pA,i1280), a0
            movddup 8896(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 8904(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 8912(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 8920(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 8928(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 8936(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 8944(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 8952(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 8960(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 8968(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 8976(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 8984(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 95
            movapd 112(pA,i1280), a0
            movddup 8992(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 9000(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 9008(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 9016(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 9024(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 9032(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 9040(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 9048(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 9056(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 9064(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 9072(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 9080(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 96
            movapd -128(pA,i768,2), a0
            movddup 9088(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 9096(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 9104(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 9112(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 9120(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 9128(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 9136(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 9144(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 9152(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 9160(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 9168(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 9176(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 97
            movapd -112(pA,i768,2), a0
            movddup 9184(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 9192(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 9200(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 9208(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 9216(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 9224(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 9232(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 9240(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 9248(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 9256(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 9264(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 9272(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 98
            movapd -96(pA,i768,2), a0
            movddup 9280(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 9288(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 9296(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 9304(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 9312(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 9320(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 9328(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 9336(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 9344(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 9352(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 9360(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 9368(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 99
            movapd -80(pA,i768,2), a0
            movddup 9376(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 9384(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 9392(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 9400(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 9408(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 9416(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 9424(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 9432(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 9440(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 9448(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 9456(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 9464(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 100
            movapd -64(pA,i768,2), a0
            movddup 9472(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 9480(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 9488(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 9496(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 9504(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 9512(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 9520(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 9528(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 9536(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 9544(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 9552(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 9560(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 101
            movapd -48(pA,i768,2), a0
            movddup 9568(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 9576(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 9584(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 9592(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 9600(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 9608(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 9616(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 9624(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 9632(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 9640(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 9648(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 9656(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 102
            movapd -32(pA,i768,2), a0
            movddup 9664(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 9672(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 9680(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 9688(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 9696(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 9704(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 9712(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 9720(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 9728(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 9736(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 9744(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 9752(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 103
            movapd -16(pA,i768,2), a0
            movddup 9760(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 9768(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 9776(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 9784(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 9792(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 9800(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 9808(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 9816(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 9824(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 9832(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 9840(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 9848(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 104
            movapd (pA,i768,2), a0
            movddup 9856(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 9864(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 9872(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 9880(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 9888(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 9896(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 9904(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 9912(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 9920(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 9928(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 9936(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 9944(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 105
            movapd 16(pA,i768,2), a0
            movddup 9952(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 9960(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 9968(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 9976(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 9984(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 9992(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 10000(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 10008(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 10016(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 10024(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 10032(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 10040(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 106
            movapd 32(pA,i768,2), a0
            movddup 10048(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 10056(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 10064(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 10072(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 10080(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 10088(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 10096(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 10104(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 10112(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 10120(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 10128(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 10136(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 107
            movapd 48(pA,i768,2), a0
            movddup 10144(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 10152(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 10160(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 10168(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 10176(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 10184(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 10192(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 10200(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 10208(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 10216(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 10224(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 10232(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 108
            movapd 64(pA,i768,2), a0
            movddup 10240(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 10248(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 10256(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 10264(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 10272(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 10280(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 10288(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 10296(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 10304(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 10312(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 10320(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 10328(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 109
            movapd 80(pA,i768,2), a0
            movddup 10336(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 10344(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 10352(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 10360(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 10368(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 10376(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 10384(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 10392(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 10400(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 10408(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 10416(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 10424(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 110
            movapd 96(pA,i768,2), a0
            movddup 10432(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 10440(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 10448(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 10456(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 10464(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 10472(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 10480(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 10488(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 10496(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 10504(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 10512(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 10520(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 111
            movapd 112(pA,i768,2), a0
            movddup 10528(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 10536(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 10544(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 10552(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 10560(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 10568(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 10576(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 10584(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 10592(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 10600(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 10608(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 10616(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 112
            movapd -128(pA,i1792), a0
            movddup 10624(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 10632(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 10640(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 10648(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 10656(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 10664(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 10672(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 10680(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 10688(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 10696(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 10704(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 10712(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 113
            movapd -112(pA,i1792), a0
            movddup 10720(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 10728(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 10736(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 10744(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 10752(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 10760(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 10768(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 10776(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 10784(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 10792(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 10800(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 10808(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 114
            movapd -96(pA,i1792), a0
            movddup 10816(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 10824(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 10832(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 10840(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 10848(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 10856(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 10864(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 10872(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 10880(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 10888(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 10896(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 10904(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 115
            movapd -80(pA,i1792), a0
            movddup 10912(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 10920(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 10928(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 10936(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 10944(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 10952(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 10960(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 10968(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 10976(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 10984(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 10992(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 11000(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 116
            movapd -64(pA,i1792), a0
            movddup 11008(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 11016(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 11024(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 11032(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 11040(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 11048(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 11056(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 11064(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 11072(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 11080(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 11088(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 11096(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 117
            movapd -48(pA,i1792), a0
            movddup 11104(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 11112(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 11120(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 11128(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 11136(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 11144(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 11152(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 11160(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 11168(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 11176(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 11184(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 11192(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 118
            movapd -32(pA,i1792), a0
            movddup 11200(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 11208(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 11216(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 11224(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 11232(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 11240(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 11248(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 11256(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 11264(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 11272(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 11280(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 11288(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 119
            movapd -16(pA,i1792), a0
            movddup 11296(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 11304(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 11312(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 11320(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 11328(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 11336(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 11344(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 11352(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 11360(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 11368(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 11376(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 11384(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 120
            movapd (pA,i1792), a0
            movddup 11392(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 11400(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 11408(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 11416(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 11424(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 11432(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 11440(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 11448(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 11456(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 11464(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 11472(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 11480(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 121
            movapd 16(pA,i1792), a0
            movddup 11488(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 11496(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 11504(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 11512(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 11520(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 11528(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 11536(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 11544(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 11552(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 11560(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 11568(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 11576(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 122
            movapd 32(pA,i1792), a0
            movddup 11584(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 11592(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 11600(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 11608(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 11616(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 11624(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 11632(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 11640(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 11648(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 11656(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 11664(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 11672(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 123
            movapd 48(pA,i1792), a0
            movddup 11680(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 11688(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 11696(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 11704(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 11712(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 11720(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 11728(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 11736(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 11744(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 11752(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 11760(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 11768(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 124
            movapd 64(pA,i1792), a0
            movddup 11776(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 11784(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 11792(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 11800(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 11808(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 11816(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 11824(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 11832(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 11840(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 11848(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 11856(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 11864(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 125
            movapd 80(pA,i1792), a0
            movddup 11872(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 11880(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 11888(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 11896(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 11904(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 11912(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 11920(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 11928(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 11936(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 11944(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 11952(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 11960(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 126
            movapd 96(pA,i1792), a0
            movddup 11968(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 11976(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 11984(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 11992(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 12000(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 12008(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 12016(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 12024(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 12032(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 12040(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 12048(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 12056(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 127
            movapd 112(pA,i1792), a0
            movddup 12064(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 12072(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 12080(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 12088(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 12096(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 12104(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 12112(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 12120(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 12128(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 12136(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 12144(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 12152(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 128
            movapd -128(pA,i256,8), a0
            movddup 12160(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 12168(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 12176(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 12184(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 12192(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 12200(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 12208(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 12216(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 12224(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 12232(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 12240(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 12248(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 129
            movapd -112(pA,i256,8), a0
            movddup 12256(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 12264(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 12272(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 12280(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 12288(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 12296(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 12304(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 12312(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 12320(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 12328(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 12336(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 12344(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 130
            movapd -96(pA,i256,8), a0
            movddup 12352(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 12360(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 12368(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 12376(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 12384(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 12392(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 12400(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 12408(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 12416(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 12424(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 12432(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 12440(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 131
            movapd -80(pA,i256,8), a0
            movddup 12448(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 12456(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 12464(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 12472(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 12480(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 12488(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 12496(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 12504(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 12512(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 12520(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 12528(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 12536(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 132
            movapd -64(pA,i256,8), a0
            movddup 12544(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 12552(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 12560(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 12568(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 12576(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 12584(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 12592(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 12600(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 12608(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 12616(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 12624(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 12632(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 133
            movapd -48(pA,i256,8), a0
            movddup 12640(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 12648(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 12656(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 12664(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 12672(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 12680(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 12688(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 12696(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 12704(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 12712(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 12720(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 12728(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 134
            movapd -32(pA,i256,8), a0
            movddup 12736(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 12744(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 12752(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 12760(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 12768(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 12776(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 12784(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 12792(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 12800(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 12808(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 12816(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 12824(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 135
            movapd -16(pA,i256,8), a0
            movddup 12832(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 12840(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 12848(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 12856(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 12864(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 12872(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 12880(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 12888(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 12896(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 12904(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 12912(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 12920(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 136
            movapd (pA,i256,8), a0
            movddup 12928(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 12936(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 12944(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 12952(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 12960(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 12968(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 12976(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 12984(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 12992(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 13000(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 13008(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 13016(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 137
            movapd 16(pA,i256,8), a0
            movddup 13024(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 13032(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 13040(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 13048(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 13056(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 13064(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 13072(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 13080(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 13088(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 13096(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 13104(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 13112(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 138
            movapd 32(pA,i256,8), a0
            movddup 13120(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 13128(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 13136(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 13144(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 13152(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 13160(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 13168(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 13176(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 13184(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 13192(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 13200(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 13208(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 139
            movapd 48(pA,i256,8), a0
            movddup 13216(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 13224(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 13232(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 13240(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 13248(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 13256(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 13264(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 13272(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 13280(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 13288(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 13296(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 13304(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 140
            movapd 64(pA,i256,8), a0
            movddup 13312(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 13320(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 13328(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 13336(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 13344(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 13352(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 13360(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 13368(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 13376(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 13384(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 13392(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 13400(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 141
            movapd 80(pA,i256,8), a0
            movddup 13408(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 13416(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 13424(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 13432(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 13440(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 13448(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 13456(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 13464(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 13472(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 13480(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 13488(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 13496(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 142
            movapd 96(pA,i256,8), a0
            movddup 13504(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 13512(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 13520(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 13528(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 13536(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 13544(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 13552(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 13560(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 13568(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 13576(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 13584(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 13592(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 143
            movapd 112(pA,i256,8), a0
            movddup 13600(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 13608(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 13616(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 13624(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 13632(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 13640(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 13648(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 13656(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 13664(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 13672(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 13680(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 13688(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 144
            movapd 2176(pA), a0
            movddup 13696(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 13704(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 13712(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 13720(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 13728(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 13736(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 13744(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 13752(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 13760(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 13768(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 13776(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 13784(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 145
            movapd 2192(pA), a0
            movddup 13792(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 13800(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 13808(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 13816(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 13824(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 13832(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 13840(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 13848(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 13856(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 13864(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 13872(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 13880(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 146
            movapd 2208(pA), a0
            movddup 13888(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 13896(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 13904(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 13912(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 13920(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 13928(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 13936(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 13944(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 13952(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 13960(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 13968(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 13976(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 147
            movapd 2224(pA), a0
            movddup 13984(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 13992(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 14000(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 14008(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 14016(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 14024(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 14032(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 14040(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 14048(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 14056(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 14064(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 14072(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 148
            movapd 2240(pA), a0
            movddup 14080(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 14088(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 14096(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 14104(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 14112(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 14120(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 14128(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 14136(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 14144(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 14152(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 14160(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 14168(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 149
            movapd 2256(pA), a0
            movddup 14176(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 14184(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 14192(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 14200(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 14208(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 14216(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 14224(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 14232(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 14240(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 14248(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 14256(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 14264(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 150
            movapd 2272(pA), a0
            movddup 14272(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 14280(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 14288(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 14296(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 14304(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 14312(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 14320(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 14328(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 14336(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 14344(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 14352(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 14360(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 151
            movapd 2288(pA), a0
            movddup 14368(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 14376(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 14384(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 14392(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 14400(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 14408(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 14416(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 14424(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 14432(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 14440(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 14448(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 14456(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 152
            movapd 2304(pA), a0
            movddup 14464(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 14472(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 14480(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 14488(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 14496(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 14504(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 14512(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 14520(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 14528(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 14536(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 14544(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 14552(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 153
            movapd 2320(pA), a0
            movddup 14560(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 14568(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 14576(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 14584(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 14592(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 14600(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 14608(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 14616(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 14624(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 14632(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 14640(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 14648(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 154
            movapd 2336(pA), a0
            movddup 14656(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 14664(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 14672(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 14680(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 14688(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 14696(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 14704(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 14712(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 14720(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 14728(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 14736(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 14744(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 155
            movapd 2352(pA), a0
            movddup 14752(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 14760(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 14768(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 14776(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 14784(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 14792(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 14800(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 14808(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 14816(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 14824(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 14832(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 14840(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 156
            movapd 2368(pA), a0
            movddup 14848(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 14856(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 14864(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 14872(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 14880(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 14888(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 14896(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 14904(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 14912(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 14920(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 14928(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 14936(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 157
            movapd 2384(pA), a0
            movddup 14944(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 14952(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 14960(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 14968(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 14976(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 14984(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 14992(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 15000(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 15008(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 15016(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 15024(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 15032(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 158
            movapd 2400(pA), a0
            movddup 15040(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 15048(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 15056(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 15064(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 15072(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 15080(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 15088(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 15096(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 15104(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 15112(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 15120(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 15128(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 159
            movapd 2416(pA), a0
            movddup 15136(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 15144(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 15152(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 15160(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 15168(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 15176(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 15184(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 15192(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 15200(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 15208(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 15216(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 15224(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 160
            movapd -128(pA,i1280,2), a0
            movddup 15232(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 15240(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 15248(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 15256(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 15264(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 15272(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 15280(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 15288(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 15296(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 15304(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 15312(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 15320(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 161
            movapd -112(pA,i1280,2), a0
            movddup 15328(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 15336(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 15344(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 15352(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 15360(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 15368(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 15376(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 15384(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 15392(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 15400(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 15408(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 15416(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 162
            movapd -96(pA,i1280,2), a0
            movddup 15424(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 15432(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 15440(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 15448(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 15456(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 15464(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 15472(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 15480(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 15488(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 15496(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 15504(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 15512(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 163
            movapd -80(pA,i1280,2), a0
            movddup 15520(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 15528(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 15536(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 15544(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 15552(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 15560(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 15568(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 15576(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 15584(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 15592(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 15600(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 15608(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 164
            movapd -64(pA,i1280,2), a0
            movddup 15616(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 15624(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 15632(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 15640(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 15648(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 15656(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 15664(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 15672(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 15680(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 15688(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 15696(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 15704(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 165
            movapd -48(pA,i1280,2), a0
            movddup 15712(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 15720(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 15728(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 15736(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 15744(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 15752(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 15760(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 15768(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 15776(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 15784(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 15792(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 15800(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 166
            movapd -32(pA,i1280,2), a0
            movddup 15808(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 15816(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 15824(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 15832(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 15840(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 15848(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 15856(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 15864(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 15872(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 15880(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 15888(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 15896(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 167
            movapd -16(pA,i1280,2), a0
            movddup 15904(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 15912(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 15920(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 15928(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 15936(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 15944(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 15952(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 15960(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 15968(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 15976(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 15984(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 15992(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 168
            movapd (pA,i1280,2), a0
            movddup 16000(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 16008(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 16016(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 16024(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 16032(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 16040(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 16048(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 16056(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 16064(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 16072(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 16080(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 16088(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 169
            movapd 16(pA,i1280,2), a0
            movddup 16096(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 16104(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 16112(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 16120(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 16128(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 16136(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 16144(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 16152(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 16160(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 16168(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 16176(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 16184(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 170
            movapd 32(pA,i1280,2), a0
            movddup 16192(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 16200(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 16208(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 16216(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 16224(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 16232(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 16240(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 16248(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 16256(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 16264(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 16272(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 16280(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 171
            movapd 48(pA,i1280,2), a0
            movddup 16288(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 16296(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 16304(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 16312(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 16320(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 16328(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 16336(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 16344(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 16352(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 16360(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 16368(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 16376(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 172
            movapd 64(pA,i1280,2), a0
            movddup 16384(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 16392(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 16400(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 16408(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 16416(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 16424(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 16432(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 16440(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 16448(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 16456(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 16464(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 16472(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 173
            movapd 80(pA,i1280,2), a0
            movddup 16480(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 16488(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 16496(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 16504(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 16512(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 16520(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 16528(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 16536(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 16544(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 16552(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 16560(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 16568(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 174
            movapd 96(pA,i1280,2), a0
            movddup 16576(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 16584(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 16592(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 16600(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 16608(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 16616(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 16624(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 16632(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 16640(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 16648(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 16656(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 16664(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 175
            movapd 112(pA,i1280,2), a0
            movddup 16672(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 16680(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 16688(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 16696(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 16704(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 16712(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 16720(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 16728(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 16736(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 16744(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 16752(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 16760(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 176
            movapd 2688(pA), a0
            movddup 16768(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 16776(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 16784(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 16792(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 16800(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 16808(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 16816(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 16824(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 16832(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 16840(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 16848(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 16856(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 177
            movapd 2704(pA), a0
            movddup 16864(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 16872(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 16880(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 16888(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 16896(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 16904(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 16912(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 16920(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 16928(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 16936(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 16944(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 16952(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 178
            movapd 2720(pA), a0
            movddup 16960(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 16968(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 16976(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 16984(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 16992(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 17000(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 17008(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 17016(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 17024(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 17032(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 17040(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 17048(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 179
            movapd 2736(pA), a0
            movddup 17056(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 17064(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 17072(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 17080(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 17088(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 17096(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 17104(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 17112(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 17120(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 17128(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 17136(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 17144(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 180
            movapd 2752(pA), a0
            movddup 17152(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 17160(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 17168(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 17176(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 17184(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 17192(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 17200(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 17208(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 17216(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 17224(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 17232(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 17240(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 181
            movapd 2768(pA), a0
            movddup 17248(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 17256(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 17264(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 17272(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 17280(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 17288(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 17296(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 17304(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 17312(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 17320(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 17328(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 17336(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 182
            movapd 2784(pA), a0
            movddup 17344(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 17352(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 17360(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 17368(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 17376(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 17384(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 17392(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 17400(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 17408(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 17416(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 17424(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 17432(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 183
            movapd 2800(pA), a0
            movddup 17440(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 17448(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 17456(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 17464(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 17472(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 17480(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 17488(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 17496(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 17504(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 17512(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 17520(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 17528(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 184
            movapd 2816(pA), a0
            movddup 17536(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 17544(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 17552(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 17560(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 17568(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 17576(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 17584(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 17592(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 17600(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 17608(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 17616(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 17624(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 185
            movapd 2832(pA), a0
            movddup 17632(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 17640(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 17648(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 17656(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 17664(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 17672(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 17680(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 17688(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 17696(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 17704(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 17712(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 17720(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 186
            movapd 2848(pA), a0
            movddup 17728(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 17736(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 17744(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 17752(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 17760(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 17768(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 17776(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 17784(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 17792(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 17800(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 17808(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 17816(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 187
            movapd 2864(pA), a0
            movddup 17824(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 17832(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 17840(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 17848(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 17856(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 17864(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 17872(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 17880(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 17888(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 17896(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 17904(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 17912(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 188
            movapd 2880(pA), a0
            movddup 17920(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 17928(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 17936(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 17944(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 17952(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 17960(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 17968(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 17976(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 17984(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 17992(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 18000(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 18008(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 189
            movapd 2896(pA), a0
            movddup 18016(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 18024(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 18032(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 18040(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 18048(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 18056(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 18064(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 18072(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 18080(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 18088(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 18096(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 18104(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 190
            movapd 2912(pA), a0
            movddup 18112(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 18120(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 18128(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 18136(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 18144(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 18152(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 18160(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 18168(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 18176(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 18184(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 18192(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 18200(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 191
            movapd 2928(pA), a0
            movddup 18208(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 18216(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 18224(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 18232(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 18240(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 18248(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 18256(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 18264(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 18272(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 18280(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 18288(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 18296(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 192
            movapd -128(pA,i768,4), a0
            movddup 18304(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 18312(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 18320(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 18328(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 18336(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 18344(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 18352(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 18360(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 18368(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 18376(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 18384(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 18392(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 193
            movapd -112(pA,i768,4), a0
            movddup 18400(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 18408(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 18416(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 18424(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 18432(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 18440(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 18448(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 18456(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 18464(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 18472(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 18480(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 18488(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 194
            movapd -96(pA,i768,4), a0
            movddup 18496(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 18504(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 18512(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 18520(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 18528(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 18536(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 18544(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 18552(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 18560(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 18568(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 18576(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 18584(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 195
            movapd -80(pA,i768,4), a0
            movddup 18592(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 18600(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 18608(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 18616(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 18624(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 18632(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 18640(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 18648(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 18656(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 18664(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 18672(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 18680(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 196
            movapd -64(pA,i768,4), a0
            movddup 18688(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 18696(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 18704(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 18712(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 18720(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 18728(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 18736(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 18744(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 18752(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 18760(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 18768(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 18776(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 197
            movapd -48(pA,i768,4), a0
            movddup 18784(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 18792(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 18800(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 18808(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 18816(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 18824(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 18832(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 18840(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 18848(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 18856(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 18864(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 18872(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 198
            movapd -32(pA,i768,4), a0
            movddup 18880(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 18888(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 18896(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 18904(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 18912(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 18920(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 18928(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 18936(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 18944(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 18952(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 18960(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 18968(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 199
            movapd -16(pA,i768,4), a0
            movddup 18976(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 18984(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 18992(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 19000(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 19008(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 19016(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 19024(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 19032(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 19040(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 19048(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 19056(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 19064(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 200
            movapd (pA,i768,4), a0
            movddup 19072(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 19080(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 19088(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 19096(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 19104(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 19112(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 19120(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 19128(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 19136(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 19144(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 19152(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 19160(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 201
            movapd 16(pA,i768,4), a0
            movddup 19168(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 19176(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 19184(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 19192(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 19200(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 19208(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 19216(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 19224(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 19232(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 19240(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 19248(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 19256(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 202
            movapd 32(pA,i768,4), a0
            movddup 19264(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 19272(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 19280(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 19288(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 19296(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 19304(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 19312(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 19320(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 19328(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 19336(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 19344(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 19352(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 203
            movapd 48(pA,i768,4), a0
            movddup 19360(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 19368(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 19376(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 19384(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 19392(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 19400(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 19408(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 19416(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 19424(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 19432(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 19440(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 19448(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 204
            movapd 64(pA,i768,4), a0
            movddup 19456(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 19464(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 19472(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 19480(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 19488(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 19496(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 19504(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 19512(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 19520(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 19528(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 19536(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 19544(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 205
            movapd 80(pA,i768,4), a0
            movddup 19552(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 19560(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 19568(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 19576(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 19584(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 19592(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 19600(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 19608(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 19616(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 19624(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 19632(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 19640(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 206
            movapd 96(pA,i768,4), a0
            movddup 19648(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 19656(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 19664(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 19672(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 19680(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 19688(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 19696(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 19704(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 19712(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 19720(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 19728(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 19736(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 207
            movapd 112(pA,i768,4), a0
            movddup 19744(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 19752(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 19760(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 19768(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 19776(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 19784(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 19792(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 19800(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 19808(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 19816(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 19824(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 19832(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 208
            movapd 3200(pA), a0
            movddup 19840(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 19848(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 19856(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 19864(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 19872(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 19880(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 19888(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 19896(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 19904(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 19912(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 19920(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 19928(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 209
            movapd 3216(pA), a0
            movddup 19936(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 19944(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 19952(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 19960(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 19968(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 19976(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 19984(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 19992(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 20000(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 20008(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 20016(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 20024(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 210
            movapd 3232(pA), a0
            movddup 20032(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 20040(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 20048(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 20056(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 20064(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 20072(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 20080(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 20088(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 20096(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 20104(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 20112(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 20120(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 211
            movapd 3248(pA), a0
            movddup 20128(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 20136(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 20144(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 20152(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 20160(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 20168(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 20176(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 20184(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 20192(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 20200(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 20208(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 20216(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 212
            movapd 3264(pA), a0
            movddup 20224(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 20232(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 20240(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 20248(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 20256(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 20264(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 20272(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 20280(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 20288(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 20296(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 20304(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 20312(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 213
            movapd 3280(pA), a0
            movddup 20320(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 20328(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 20336(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 20344(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 20352(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 20360(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 20368(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 20376(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 20384(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 20392(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 20400(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 20408(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 214
            movapd 3296(pA), a0
            movddup 20416(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 20424(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 20432(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 20440(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 20448(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 20456(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 20464(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 20472(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 20480(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 20488(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 20496(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 20504(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 215
            movapd 3312(pA), a0
            movddup 20512(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 20520(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 20528(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 20536(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 20544(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 20552(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 20560(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 20568(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 20576(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 20584(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 20592(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 20600(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 216
            movapd 3328(pA), a0
            movddup 20608(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 20616(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 20624(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 20632(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 20640(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 20648(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 20656(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 20664(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 20672(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 20680(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 20688(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 20696(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 217
            movapd 3344(pA), a0
            movddup 20704(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 20712(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 20720(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 20728(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 20736(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 20744(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 20752(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 20760(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 20768(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 20776(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 20784(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 20792(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 218
            movapd 3360(pA), a0
            movddup 20800(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 20808(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 20816(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 20824(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 20832(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 20840(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 20848(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 20856(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 20864(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 20872(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 20880(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 20888(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 219
            movapd 3376(pA), a0
            movddup 20896(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 20904(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 20912(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 20920(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 20928(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 20936(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 20944(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 20952(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 20960(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 20968(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 20976(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 20984(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 220
            movapd 3392(pA), a0
            movddup 20992(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 21000(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 21008(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 21016(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 21024(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 21032(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 21040(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 21048(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 21056(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 21064(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 21072(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 21080(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 221
            movapd 3408(pA), a0
            movddup 21088(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 21096(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 21104(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 21112(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 21120(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 21128(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 21136(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 21144(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 21152(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 21160(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 21168(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 21176(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 222
            movapd 3424(pA), a0
            movddup 21184(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 21192(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 21200(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 21208(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 21216(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 21224(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 21232(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 21240(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 21248(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 21256(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 21264(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 21272(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 223
            movapd 3440(pA), a0
            movddup 21280(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 21288(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 21296(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 21304(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 21312(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 21320(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 21328(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 21336(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 21344(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 21352(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 21360(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 21368(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 224
            movapd -128(pA,i1792,2), a0
            movddup 21376(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 21384(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 21392(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 21400(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 21408(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 21416(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 21424(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 21432(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 21440(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 21448(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 21456(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 21464(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 225
            movapd -112(pA,i1792,2), a0
            movddup 21472(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 21480(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 21488(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 21496(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 21504(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 21512(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 21520(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 21528(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 21536(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 21544(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 21552(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 21560(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 226
            movapd -96(pA,i1792,2), a0
            movddup 21568(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 21576(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 21584(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 21592(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 21600(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 21608(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 21616(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 21624(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 21632(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 21640(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 21648(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 21656(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 227
            movapd -80(pA,i1792,2), a0
            movddup 21664(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 21672(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 21680(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 21688(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 21696(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 21704(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 21712(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 21720(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 21728(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 21736(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 21744(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 21752(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 228
            movapd -64(pA,i1792,2), a0
            movddup 21760(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 21768(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 21776(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 21784(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 21792(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 21800(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 21808(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 21816(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 21824(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 21832(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 21840(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 21848(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 229
            movapd -48(pA,i1792,2), a0
            movddup 21856(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 21864(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 21872(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 21880(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 21888(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 21896(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 21904(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 21912(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 21920(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 21928(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 21936(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 21944(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 230
            movapd -32(pA,i1792,2), a0
            movddup 21952(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 21960(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 21968(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 21976(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 21984(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 21992(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 22000(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 22008(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 22016(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 22024(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 22032(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 22040(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 231
            movapd -16(pA,i1792,2), a0
            movddup 22048(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 22056(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 22064(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 22072(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 22080(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 22088(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 22096(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 22104(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 22112(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 22120(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 22128(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 22136(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 232
            movapd (pA,i1792,2), a0
            movddup 22144(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 22152(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 22160(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 22168(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 22176(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 22184(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 22192(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 22200(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 22208(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 22216(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 22224(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 22232(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 233
            movapd 16(pA,i1792,2), a0
            movddup 22240(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 22248(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 22256(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 22264(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 22272(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 22280(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 22288(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 22296(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 22304(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 22312(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 22320(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 22328(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 234
            movapd 32(pA,i1792,2), a0
            movddup 22336(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 22344(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 22352(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 22360(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 22368(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 22376(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 22384(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 22392(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 22400(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 22408(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 22416(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 22424(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 235
            movapd 48(pA,i1792,2), a0
            movddup 22432(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 22440(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 22448(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 22456(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 22464(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 22472(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 22480(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 22488(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 22496(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 22504(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 22512(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 22520(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 236
            movapd 64(pA,i1792,2), a0
            movddup 22528(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 22536(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 22544(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 22552(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 22560(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 22568(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 22576(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 22584(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 22592(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 22600(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 22608(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 22616(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 237
            movapd 80(pA,i1792,2), a0
            movddup 22624(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 22632(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 22640(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 22648(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 22656(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 22664(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 22672(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 22680(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 22688(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 22696(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 22704(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 22712(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 238
            movapd 96(pA,i1792,2), a0
            movddup 22720(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 22728(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 22736(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 22744(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 22752(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 22760(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 22768(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 22776(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 22784(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 22792(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 22800(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 22808(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 239
            movapd 112(pA,i1792,2), a0
            movddup 22816(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 22824(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 22832(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 22840(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 22848(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 22856(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 22864(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 22872(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 22880(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 22888(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 22896(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 22904(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 240
            movapd 3712(pA), a0
            movddup 22912(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 22920(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 22928(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 22936(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 22944(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 22952(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 22960(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 22968(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 22976(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 22984(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 22992(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 23000(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 241
            movapd 3728(pA), a0
            movddup 23008(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 23016(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 23024(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 23032(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 23040(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 23048(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 23056(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 23064(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 23072(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 23080(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 23088(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 23096(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 242
            movapd 3744(pA), a0
            movddup 23104(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 23112(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 23120(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 23128(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 23136(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 23144(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 23152(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 23160(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 23168(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 23176(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 23184(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 23192(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 243
            movapd 3760(pA), a0
            movddup 23200(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 23208(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 23216(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 23224(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 23232(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 23240(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 23248(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 23256(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 23264(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 23272(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 23280(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 23288(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 244
            movapd 3776(pA), a0
            movddup 23296(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 23304(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 23312(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 23320(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 23328(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 23336(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 23344(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 23352(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 23360(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 23368(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 23376(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 23384(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 245
            movapd 3792(pA), a0
            movddup 23392(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 23400(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 23408(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 23416(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 23424(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 23432(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 23440(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 23448(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 23456(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 23464(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 23472(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 23480(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 246
            movapd 3808(pA), a0
            movddup 23488(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 23496(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 23504(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 23512(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 23520(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 23528(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 23536(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 23544(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 23552(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 23560(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 23568(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 23576(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 247
            movapd 3824(pA), a0
            movddup 23584(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 23592(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 23600(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 23608(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 23616(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 23624(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 23632(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 23640(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 23648(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 23656(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 23664(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 23672(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 248
            movapd 3840(pA), a0
            movddup 23680(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 23688(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 23696(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 23704(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 23712(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 23720(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 23728(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 23736(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 23744(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 23752(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 23760(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 23768(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 249
            movapd 3856(pA), a0
            movddup 23776(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 23784(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 23792(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 23800(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 23808(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 23816(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 23824(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 23832(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 23840(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 23848(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 23856(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 23864(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 250
            movapd 3872(pA), a0
            movddup 23872(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 23880(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 23888(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 23896(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 23904(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 23912(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 23920(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 23928(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 23936(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 23944(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 23952(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 23960(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 251
            movapd 3888(pA), a0
            movddup 23968(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 23976(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 23984(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 23992(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 24000(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 24008(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 24016(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 24024(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 24032(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 24040(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 24048(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 24056(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 252
            movapd 3904(pA), a0
            movddup 24064(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 24072(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 24080(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 24088(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 24096(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 24104(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 24112(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 24120(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 24128(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 24136(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 24144(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 24152(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 253
            movapd 3920(pA), a0
            movddup 24160(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 24168(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 24176(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 24184(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 24192(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 24200(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 24208(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 24216(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 24224(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 24232(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 24240(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 24248(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 254
            movapd 3936(pA), a0
            movddup 24256(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 24264(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 24272(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 24280(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 24288(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 24296(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 24304(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 24312(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 24320(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 24328(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 24336(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 24344(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         #if KB > 255
            movapd 3952(pA), a0
            movddup 24352(pB), b0
            mulpd a0, b0
            addpd b0, rC0
            movddup 24360(pB), b0
            mulpd a0, b0
            addpd b0, rC1
            movddup 24368(pB), b0
            mulpd a0, b0
            addpd b0, rC2
            movddup 24376(pB), b0
            mulpd a0, b0
            addpd b0, rC3
            movddup 24384(pB), b0
            mulpd a0, b0
            addpd b0, rC4
            movddup 24392(pB), b0
            mulpd a0, b0
            addpd b0, rC5
            movddup 24400(pB), b0
            mulpd a0, b0
            addpd b0, rC6
            movddup 24408(pB), b0
            mulpd a0, b0
            addpd b0, rC7
            movddup 24416(pB), b0
            mulpd a0, b0
            addpd b0, rC8
            movddup 24424(pB), b0
            mulpd a0, b0
            addpd b0, rC9
            movddup 24432(pB), b0
            mulpd a0, b0
            addpd b0, rC10
            movddup 24440(pB), b0
            mulpd a0, b0
            addpd b0, rC11
         #endif
         add incPF, pfA
         add incPF, pfB
/*
 *       Write answer back out to C
 */
         #ifdef BETA0
            movapd rC0, -128(pC)
            movapd rC1, -112(pC)
            movapd rC2, -96(pC)
            movapd rC3, -80(pC)
            movapd rC4, -64(pC)
            movapd rC5, -48(pC)
            movapd rC6, -32(pC)
            movapd rC7, -16(pC)
            movapd rC8, (pC)
            movapd rC9, 16(pC)
            movapd rC10, 32(pC)
            movapd rC11, 48(pC)
/*
 *          Add running sum in rCx with original C, then store back out
 */
         #else
            BETCOP -128(pC), rC0
            movapd rC0, -128(pC)
            BETCOP -112(pC), rC1
            movapd rC1, -112(pC)
            BETCOP -96(pC), rC2
            movapd rC2, -96(pC)
            BETCOP -80(pC), rC3
            movapd rC3, -80(pC)
            BETCOP -64(pC), rC4
            movapd rC4, -64(pC)
            BETCOP -48(pC), rC5
            movapd rC5, -48(pC)
            BETCOP -32(pC), rC6
            movapd rC6, -32(pC)
            BETCOP -16(pC), rC7
            movapd rC7, -16(pC)
            BETCOP (pC), rC8
            movapd rC8, (pC)
            BETCOP 16(pC), rC9
            movapd rC9, 16(pC)
            BETCOP 32(pC), rC10
            movapd rC10, 32(pC)
            BETCOP 48(pC), rC11
            movapd rC11, 48(pC)
         #endif
         add $12*2*8, pC   /* pC += NU*VECLEN*sizeof */
         add $KB*12*8, pB     /* pB += K*NU*sizeof */
      sub $1, nnu
      jnz MNLOOP

      mov nnu0, nnu
      mov pB0, pB
      add $KB*2*8, pA    /* pA += KB*MU*size */
   sub $1, nmu
   jnz MNLOOP
/* DONE: */
   movq    (%rsp), %rbp
   movq    8(%rsp), %rbx
   movq    16(%rsp), %r12
   movq    24(%rsp), %r13
   movq    32(%rsp), %r14
   movq    40(%rsp), %r15
   add $FSIZE, %rsp
   ret
