#include "atlas_misc.h"
#include "atlas_amm.h"
#include Mstr(Mjoin(AMM_PRE,_sum.h))

void Mjoin(PATL,ammmK)
(
   amminfo_t *mminf,
   const int mb,           /* # of rows of C to compute */
   const int nmu,          /* CEIL(mb/mu) */
   const int nb,
   const int nnu,
   ATL_CINT nfkblks, /* FLOOR(K/kb) */
   const int kb,
   const int kb0,
   const int KB0,
   const TYPE *A,
   const size_t lda,
   const size_t incAk, /* 0: no need to copy A, else incK for cpying A */
   const TYPE *B,
   const size_t ldb,
   const size_t incBk, /* 0: no need to copy B, else incK for cpying A */
   const ablk2cmat_t blk2c,
   TYPE *C,
   const size_t ldc,
   TYPE *a,
   ATL_CINT inca,      /* size of blocks of A, or 0 to reuse space */
   TYPE *b,
   ATL_CINT incb,      /* size of blocks of B, or 0 to reuse space */
   TYPE *rC,
   TYPE *iC,
   const SCALAR alpA,
   const SCALAR alpB,
   const SCALAR alpC,
   const SCALAR beta
)
{
   cm2am_t a2blk=mminf->a2blk, b2blk=mminf->b2blk;
   ammkern_t amm=mminf->amm_b0;
   TYPE *an=a+inca, *bn=b+incb;
   ATL_INT k;
/*
 * Peel first iteration to handle KR and use beta=0
 */
   if (kb0 != kb)
   {
      ATL_CINT ku=mminf->ku;
      amm = mminf->amm_k1_b0;
      if ( ATL_AMMFLG_KRUNTIME(mminf->flag) && (mminf->kbmin <= KB0) &&
           (ATL_AMMFLG_KMAJOR(mminf->flag) || (KB0 == kb0)) &&
           (kb0/ku)*ku == kb0)
         amm = mminf->amm_b0;
      if (incAk)
         a2blk(kb0, mb, alpA, A+incAk*nfkblks, lda, a);
      if (incBk)
         b2blk(kb0, nb, alpB, B+incBk*nfkblks, ldb, b);
      amm(nmu, nnu, KB0, a, b, rC, an, bn, rC);
   }
   else
   {
      if (incAk)
      {
         a2blk(kb0, mb, alpA, A, lda, a);
         A += incAk;
      }
      if (incBk)
      {
         b2blk(kb0, nb, alpB, B, ldb, b);
         B += incBk;
      }
      amm(nmu, nnu, KB0, a, b, rC, an, bn, rC);
   }
   amm = mminf->amm_b1;
   a = an;
   b = bn;

   for (k=0; k < nfkblks; k++)
   {
      if (incAk)
      {
         a2blk(kb, mb, alpA, A, lda, a);
         A += incAk;
      }
      if (incBk)
      {
         b2blk(kb, nb, alpB, B, ldb, b);
         B += incBk;
      }
      an = a + inca;
      bn = b + incb;
      amm(nmu, nnu, kb, a, b, rC, an, bn, rC);
      a = an;
      b = bn;
   }
   if (blk2c)
      blk2c(mb, nb, alpC, rC, beta, C, ldc);
}
