/* ---------------------------------------------------------------------
 *
 * -- Automatically Tuned Linear Algebra Software (ATLAS)
 *    (C) Copyright 1999 All Rights Reserved
 *
 * -- ATLAS routine -- Version 2.0 -- December 25, 1999
 *
 * -- Suggestions,  comments,  bugs reports should be sent to the follo-
 *    wing e-mail address: atlas@cs.utk.edu
 *
 *  Author         : Antoine P. Petitet
 * University of Tennessee - Innovative Computing Laboratory
 * Knoxville TN, 37996-1301, USA.
 *
 * ---------------------------------------------------------------------
 *
 * -- Copyright notice and Licensing terms:
 *
 * Redistribution  and  use in  source and binary forms, with or without
 * modification, are  permitted provided  that the following  conditions
 * are met:
 *
 * 1) Redistributions  of  source  code  must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2) Redistributions in binary form must reproduce  the above copyright
 *    notice,  this list of  conditions and the  following disclaimer in
 *    the documentation and/or other materials provided with the distri-
 *    bution.
 * 3) All advertising materials mentioning features or use of this soft-
 *    ware must display the folowing acknowledgement:
 *    This product includes software developed by the ATLAS group of the
 *    University of Tennesee, Knoxville and its contributors.
 * 4) The names of the  University of Tennessee,  Knoxville,  the  ATLAS
 *    group, or the names of its contributors may not be used to endorse
 *    or  promote products derived  from  this software without specific
 *    prior written permission.
 *
 * -- Disclaimer:
 *
 * The University of Tennessee, Knoxville,  the ATLAS group,  or the au-
 * thors make  no representations about the suitability of this software
 * for any purpose.  This software is provided ``as is'' without express
 * or implied warranty.
 *
 * ---------------------------------------------------------------------
 */
/*
 * Include files
 */
#include "atlas_refmisc.h"
#include "atlas_reflevel3.h"

void ATL_sreftrsm
(
   const enum ATLAS_SIDE      SIDE,
   const enum ATLAS_UPLO      UPLO,
   const enum ATLAS_TRANS     TRANS,
   const enum ATLAS_DIAG      DIAG,
   const int                  M,
   const int                  N,
   const float                ALPHA,
   const float                * A,
   const int                  LDA,
   float                      * B,
   const int                  LDB
)
{
/*
 * Purpose
 * =======
 *
 * ATL_sreftrsm  solves one of the matrix equations
 *
 *    op( A ) * X = alpha * B,   or  X * op( A ) = alpha * B,
 *
 * where alpha is a scalar, X and B are m by n matrices, A is a unit, or
 * non-unit, upper or lower triangular matrix and op( A ) is one of
 *
 *    op( A ) = A   or   op( A ) = A'.
 *
 * The matrix X is overwritten on B.
 *
 * Arguments
 * =========
 *
 * SIDE    (input)                       const enum ATLAS_SIDE
 *         On entry, SIDE  specifies whether op( A ) appears on the left
 *         or right of X as follows:
 *
 *            SIDE = AtlasLeft    op( A ) * X = alpha * B,
 *
 *            SIDE = AtlasRight   X * op( A ) = alpha * B.
 *
 *         Unchanged on exit.
 *
 * UPLO    (input)                       const enum ATLAS_UPLO
 *         On entry, UPLO  specifies whether  the  matrix is an upper or
 *         lower triangular matrix as follows:
 *
 *             UPLO = AtlasUpper   A is an upper triangular matrix.
 *
 *             UPLO = AtlasLower   A is a lower triangular matrix.
 *
 *         Unchanged on exit.
 *
 * TRANSA  (input)                       const enum ATLAS_TRANS
 *         On entry, TRANSA  specifies the form of op( A ) to be used in
 *         the matrix multiplication as follows:
 *
 *            TRANSA = AtlasNoTrans    op( A ) = A,
 *
 *            TRANSA = AtlasTrans      op( A ) = A',
 *
 *            TRANSA = AtlasConjTrans  op( A ) = A'.
 *
 *         Unchanged on exit.
 *
 * DIAG    (input)                       const enum ATLAS_DIAG
 *         On entry, DIAG specifies whether or not A is unit triangu-
 *         lar as follows:
 *
 *            DIAG = AtlasUnit       A is assumed to be unit triangular,
 *
 *            DIAG = AtlasNonUnit    A is not assumed to be unit trian-
 *                                   gular.
 *
 *         Unchanged on exit.
 *
 * M       (input)                       const int
 *         On entry,  M  specifies the number of rows of  the  matrix B.
 *         M  must be at least zero. Unchanged on exit.
 *
 * N       (input)                       const int
 *         On entry, N  specifies the number of columns of the matrix B.
 *         N  must be at least zero. Unchanged on exit.
 *
 * ALPHA   (input)                       const float
 *         On entry,  ALPHA  specifies  the scalar  alpha. When ALPHA is
 *         supplied as zero then the elements of the matrix B need   not
 *         be set on input. Unchanged on exit.
 *
 * A       (input)                       const float *
 *         On entry,  A  points  to an array of size equal to or greater
 *         than   LDA * k * sizeof(   float   ),   where  k  is  m  when
 *         SIDE = AtlasLeft  and  is  n  otherwise.  Before  entry  with
 *         UPLO = AtlasUpper,  the  leading k by k upper triangular part
 *         of the array  A  must contain the upper triangular matrix and
 *         the  strictly lower triangular part of  A  is not referenced.
 *         Before entry with UPLO = AtlasLower, the leading k by k lower
 *         triangular part of the array  A must contain the lower trian-
 *         gular  matrix and the strictly upper triangular part of  A is
 *         not referenced.
 *         Note  that when  DIAG = AtlasUnit,   the diagonal elements of
 *         A  are  not referenced either,  but are assumed to be  unity.
 *         Unchanged on exit.
 *
 * LDA     (input)                       const int
 *         On entry, LDA  specifies the leading dimension of A as decla-
 *         red  in  the  calling  (sub) program.  LDA  must be  at least
 *         MAX( 1, m ) when SIDE = AtlasLeft, and MAX( 1, n ) otherwise.
 *         Unchanged on exit.
 *
 * B       (input/output)                float *
 *         On entry,  B  points  to an array of size equal to or greater
 *         than   LDB * n * sizeof(   float   ).  Before entry, the lea-
 *         ding  m by n  part of the array B must contain the matrix  B,
 *         except when beta is zero,  in which case B need not be set on
 *         entry.  On exit, the array B is overwritten by the m by n so-
 *         lution matrix.
 *
 * LDB     (input)                       const int
 *         On entry, LDB  specifies the leading dimension of B as decla-
 *         red  in  the  calling  (sub) program.  LDB  must be  at least
 *         MAX( 1, m ). Unchanged on exit.
 *
 * ---------------------------------------------------------------------
 */
/*
 * .. Local Variables ..
 */
   int                        i, iaik, iaki, iajk, iakj, ibij, ibik,
                              ibkj, j, jai, jaj, jak, jbj, jbk, k;
   register float             t0;
/* ..
 * .. Executable Statements ..
 *
 */
   if( ( M == 0 ) || ( N == 0 ) ) return;

   if( ALPHA == ATL_sZERO )
   {
      for( j = 0, jbj = 0; j < N; j++, jbj += LDB )
      {
         for( i = 0, ibij = jbj; i < M; i++, ibij += 1 )
         {
            B[ibij] = ATL_sZERO;
         }
      }
      return;
   }

   if( SIDE == AtlasLeft )
   {
      if( TRANS == AtlasNoTrans )
      {
         if( UPLO == AtlasUpper )
         {
            for( j = 0,      jbj  = 0;
                 j < N; j++, jbj += LDB )
            {
               for( i = 0,      ibij  = jbj;
                    i < M; i++, ibij += 1 )
               {
                  B[ibij] *= ALPHA;
               }
               for( k = M-1,     jak  = (M-1)*LDA, ibkj  = M-1+jbj;
                    k >= 0; k--, jak -= LDA,       ibkj -= 1 )
               {
                  if( DIAG == AtlasNonUnit ) B[ibkj] /= A[k+jak];
                  for( i = 0,      iaik  = jak, ibij  = jbj;
                       i < k; i++, iaik += 1,   ibij += 1 )
                  {
                     B[ibij] -= B[ibkj] * A[iaik];
                  }
               }
            }
         }
         else
         {
            for( j = 0,      jbj  = 0;
                 j < N; j++, jbj += LDB )
            {
               for( i = 0,      ibij  = jbj;
                    i < M; i++, ibij += 1 )
               {
                  B[ibij] *= ALPHA;
               }
               for( k = 0,      jak  = 0,   ibkj  = jbj;
                    k < M; k++, jak += LDA, ibkj += 1 )
               {
                  if( DIAG == AtlasNonUnit ) B[ibkj] /= A[k+jak];
                  for( i = k+1,    iaik  = k+1+jak, ibij  = k+1+jbj;
                       i < M; i++, iaik +=1,        ibij += 1 )
                  {
                     B[ibij] -= B[ibkj] * A[iaik];
                  }
               }
            }
         }
      }
      else
      {
         if( UPLO == AtlasUpper )
         {
            for( j = 0,      jbj  = 0;
                 j < N; j++, jbj += LDB )
            {
               for( i = 0,      jai  = 0,   ibij  = jbj;
                    i < M; i++, jai += LDA, ibij += 1 )
               {
                  t0 = ALPHA * B[ibij];
                  for( k = 0,      iaki  = jai, ibkj  = jbj;
                       k < i; k++, iaki += 1,   ibkj += 1 )
                  {
                     t0 -= A[iaki] * B[ibkj];
                  }
                  if( DIAG == AtlasNonUnit ) t0 /= A[i+jai];
                  B[ibij] = t0;
               }
            }
         }
         else
         {
            for( j = 0,      jbj  = 0;
                 j < N; j++, jbj += LDB )
            {
               for( i = M-1,     jai  = (M-1)*LDA, ibij  = M-1+jbj;
                    i >= 0; i--, jai -= LDA,       ibij -= 1 )
               {
                  t0 = ALPHA * B[ibij];
                  for( k = i+1,    iaki  = i+1+jai, ibkj  = i+1+jbj;
                       k < M; k++, iaki += 1,       ibkj += 1 )
                  {
                     t0 -= A[iaki] * B[ibkj];
                  }
                  if( DIAG == AtlasNonUnit ) t0 /= A[i+jai];
                  B[ibij] = t0;
               }
            }
         }
      }
   }
   else
   {
      if( TRANS == AtlasNoTrans )
      {
         if( UPLO == AtlasUpper )
         {
            for( j = 0,      jaj  = 0,   jbj  = 0;
                 j < N; j++, jaj += LDA, jbj += LDB )
            {
               for( i = 0,      ibij  = jbj;
                    i < M; i++, ibij += 1 )
               {
                  B[ibij] *= ALPHA;
               }
               for( k = 0,      iakj  = jaj, jbk  = 0;
                    k < j; k++, iakj += 1,   jbk += LDB )
               {
                  for( i = 0,      ibij  = jbj, ibik  = jbk;
                       i < M; i++, ibij += 1,   ibik += 1 )
                  {
                     B[ibij] -= A[iakj] * B[ibik];
                  }
               }
               if( DIAG == AtlasNonUnit )
               {
                  for( i = 0,      ibij  = jbj;
                       i < M; i++, ibij += 1 )
                  {
                     B[ibij] /= A[j+jaj];
                  }
               }
            }
         }
         else
         {
            for( j = N-1,      jaj  = (N-1)*LDA, jbj  = (N-1)*LDB;
                 j >= 0;  j--, jaj -= LDA,       jbj -= LDB )
            {
               for( i = 0,      ibij  = jbj;
                    i < M; i++, ibij += 1 )
               {
                  B[ibij] *= ALPHA;
               }
               for( k = j+1,    iakj  = j+1+jaj, jbk  = (j+1)*LDB;
                    k < N; k++, iakj += 1,       jbk += LDB )
               {
                  for( i = 0,      ibij  = jbj, ibik  = jbk;
                       i < M; i++, ibij += 1,   ibik += 1 )
                  {
                     B[ibij] -= A[iakj] * B[ibik];
                  }
               }
               if( DIAG == AtlasNonUnit )
               {
                  for( i = 0,      ibij  = jbj;
                       i < M; i++, ibij += 1 )
                  {
                     B[ibij] /= A[j+jaj];
                  }
               }
            }
         }
      }
      else
      {
         if( UPLO == AtlasUpper )
         {
            for( k = N-1,     jak  = (N-1)*LDA, jbk  = (N-1)*LDB;
                 k >= 0; k--, jak -= LDA,       jbk -= LDB )
            {
               if( DIAG == AtlasNonUnit )
               {
                  for( i = 0,      ibik  = jbk;
                       i < M; i++, ibik += 1 )
                  {
                     B[ibik] /= A[k+jak];
                  }
               }
               for( j = 0,      iajk  = jak, jbj  = 0;
                    j < k; j++, iajk += 1,   jbj += LDB )
               {
                  t0 = A[iajk];
                  for( i = 0,      ibij  = jbj, ibik = jbk;
                       i < M; i++, ibij += 1,   ibik += 1 )
                  {
                     B[ibij] -= t0 * B[ibik];
                  }
               }
               for( i = 0,      ibik  = jbk;
                    i < M; i++, ibik += 1 )
               {
                  B[ibik] *= ALPHA;
               }
            }
         }
         else
         {
            for( k = 0,      jak  = 0,   jbk  = 0;
                 k < N; k++, jak += LDA, jbk += LDB )
            {
               if( DIAG == AtlasNonUnit )
               {
                  for( i = 0,      ibik  = jbk;
                       i < M; i++, ibik += 1 )
                  {
                     B[ibik] /= A[k+jak];
                  }
               }
               for( j = k+1,    iajk  = (k+1)+jak, jbj  = (k+1)*LDB;
                    j < N; j++, iajk += 1,         jbj += LDB )
               {
                  t0 = A[iajk];
                  for( i = 0,      ibij  = jbj, ibik  = jbk;
                       i < M; i++, ibij += 1,   ibik += 1 )
                  {
                     B[ibij] -= t0 * B[ibik];
                  }
               }
               for( i = 0,      ibik  = jbk;
                    i < M; i++, ibik += 1 )
               {
                  B[ibik] *= ALPHA;
               }
            }
         }
      }
   }
/*
 * End of ATL_sreftrsm
 */
}
