Logo Search packages:      
Sourcecode: blender version File versions

smyblas2.c


/*
 * -- SuperLU routine (version 2.0) --
 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
 * and Lawrence Berkeley National Lab.
 * November 15, 1997
 *
 */
/*
 * File name:           smyblas2.c
 * Purpose:
 *     Level 2 BLAS operations: solves and matvec, written in C.
 * Note:
 *     This is only used when the system lacks an efficient BLAS library.
 */

/*
 * Solves a dense UNIT lower triangular system. The unit lower 
 * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). 
 * The solution will be returned in the rhs vector.
 */
void slsolve ( int ldm, int ncol, float *M, float *rhs )
{
    int k;
    float x0, x1, x2, x3, x4, x5, x6, x7;
    float *M0;
    register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
    register int firstcol = 0;

    M0 = &M[0];

    while ( firstcol < ncol - 7 ) { /* Do 8 columns */
      Mki0 = M0 + 1;
      Mki1 = Mki0 + ldm + 1;
      Mki2 = Mki1 + ldm + 1;
      Mki3 = Mki2 + ldm + 1;
      Mki4 = Mki3 + ldm + 1;
      Mki5 = Mki4 + ldm + 1;
      Mki6 = Mki5 + ldm + 1;
      Mki7 = Mki6 + ldm + 1;

      x0 = rhs[firstcol];
      x1 = rhs[firstcol+1] - x0 * *Mki0++;
      x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
      x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
      x4 = rhs[firstcol+4] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
                         - x3 * *Mki3++;
      x5 = rhs[firstcol+5] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
                         - x3 * *Mki3++ - x4 * *Mki4++;
      x6 = rhs[firstcol+6] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
                         - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++;
      x7 = rhs[firstcol+7] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
                         - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++
                     - x6 * *Mki6++;

      rhs[++firstcol] = x1;
      rhs[++firstcol] = x2;
      rhs[++firstcol] = x3;
      rhs[++firstcol] = x4;
      rhs[++firstcol] = x5;
      rhs[++firstcol] = x6;
      rhs[++firstcol] = x7;
      ++firstcol;
    
      for (k = firstcol; k < ncol; k++)
      rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
                      - x2 * *Mki2++ - x3 * *Mki3++
                        - x4 * *Mki4++ - x5 * *Mki5++
                  - x6 * *Mki6++ - x7 * *Mki7++;
 
      M0 += 8 * ldm + 8;
    }

    while ( firstcol < ncol - 3 ) { /* Do 4 columns */
      Mki0 = M0 + 1;
      Mki1 = Mki0 + ldm + 1;
      Mki2 = Mki1 + ldm + 1;
      Mki3 = Mki2 + ldm + 1;

      x0 = rhs[firstcol];
      x1 = rhs[firstcol+1] - x0 * *Mki0++;
      x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
      x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;

      rhs[++firstcol] = x1;
      rhs[++firstcol] = x2;
      rhs[++firstcol] = x3;
      ++firstcol;
    
      for (k = firstcol; k < ncol; k++)
      rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
                      - x2 * *Mki2++ - x3 * *Mki3++;
 
      M0 += 4 * ldm + 4;
    }

    if ( firstcol < ncol - 1 ) { /* Do 2 columns */
      Mki0 = M0 + 1;
      Mki1 = Mki0 + ldm + 1;

      x0 = rhs[firstcol];
      x1 = rhs[firstcol+1] - x0 * *Mki0++;

      rhs[++firstcol] = x1;
      ++firstcol;
    
      for (k = firstcol; k < ncol; k++)
      rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++;
 
    }
    
}

/*
 * Solves a dense upper triangular system. The upper triangular matrix is
 * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
 * in the rhs vector.
 */
void
susolve ( ldm, ncol, M, rhs )
int ldm;    /* in */
int ncol;   /* in */
float *M;   /* in */
float *rhs; /* modified */
{
    float xj;
    int jcol, j, irow;

    jcol = ncol - 1;

    for (j = 0; j < ncol; j++) {

      xj = rhs[jcol] / M[jcol + jcol*ldm];            /* M(jcol, jcol) */
      rhs[jcol] = xj;
      
      for (irow = 0; irow < jcol; irow++)
          rhs[irow] -= xj * M[irow + jcol*ldm]; /* M(irow, jcol) */

      jcol--;

    }
}


/*
 * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
 * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
 */
void smatvec ( ldm, nrow, ncol, M, vec, Mxvec )

int ldm;    /* in -- leading dimension of M */
int nrow;   /* in */ 
int ncol;   /* in */
float *M;   /* in */
float *vec; /* in */
float *Mxvec;     /* in/out */

{
    float vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
    float *M0;
    register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
    register int firstcol = 0;
    int k;

    M0 = &M[0];
    while ( firstcol < ncol - 7 ) { /* Do 8 columns */

      Mki0 = M0;
      Mki1 = Mki0 + ldm;
        Mki2 = Mki1 + ldm;
        Mki3 = Mki2 + ldm;
      Mki4 = Mki3 + ldm;
      Mki5 = Mki4 + ldm;
      Mki6 = Mki5 + ldm;
      Mki7 = Mki6 + ldm;

      vi0 = vec[firstcol++];
      vi1 = vec[firstcol++];
      vi2 = vec[firstcol++];
      vi3 = vec[firstcol++];  
      vi4 = vec[firstcol++];
      vi5 = vec[firstcol++];
      vi6 = vec[firstcol++];
      vi7 = vec[firstcol++];  

      for (k = 0; k < nrow; k++) 
          Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
                  + vi2 * *Mki2++ + vi3 * *Mki3++ 
                  + vi4 * *Mki4++ + vi5 * *Mki5++
                  + vi6 * *Mki6++ + vi7 * *Mki7++;

      M0 += 8 * ldm;
    }

    while ( firstcol < ncol - 3 ) { /* Do 4 columns */

      Mki0 = M0;
      Mki1 = Mki0 + ldm;
      Mki2 = Mki1 + ldm;
      Mki3 = Mki2 + ldm;

      vi0 = vec[firstcol++];
      vi1 = vec[firstcol++];
      vi2 = vec[firstcol++];
      vi3 = vec[firstcol++];  
      for (k = 0; k < nrow; k++) 
          Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
                  + vi2 * *Mki2++ + vi3 * *Mki3++ ;

      M0 += 4 * ldm;
    }

    while ( firstcol < ncol ) {           /* Do 1 column */

      Mki0 = M0;
      vi0 = vec[firstcol++];
      for (k = 0; k < nrow; k++)
          Mxvec[k] += vi0 * *Mki0++;

      M0 += ldm;
    }
      
}


Generated by  Doxygen 1.6.0   Back to index