/* {{{1 GNU General Public License

Program Tops - a stack-based computing environment
Copyright (C) 1999-2005  Dale R. Williamson

Author and copyright holder of spmult.c, spmult_cc.c, spmult_cr.c,
spmult_rc.c, spmult_rr.c:  Al Danial <al.danial@gmail.com>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
1}}} */
/* headers {{{1 */
#include <stdio.h>
#include <stdlib.h>  /* random, RAND_MAX */
#include <string.h>  /* strncmp */
#include <math.h>    /* fabs */

#include "main.h"
#include "stk.h"
#include "exe.h"     /* xmain */
#include "mat.h"     /* bend  */
#include "tag.h"
#include "sparse.h"

#include "ctrl.h"
#include "inpo.h"
#include "mem.h"


#define  TRUE      1
#define  FALSE     0
#ifdef BLAS
#ifdef LAPACK
#ifdef FORT_UDSC
#define DAXPY daxpy_
#define DCOPY dcopy_
#define DSCAL dscal_
#define DDOT  ddot_ 
#else
#define DAXPY daxpy
#define DCOPY dcopy
#define DSCAL dscal
#define DDOT  ddot
#endif
#endif
#endif

/* headers 1}}} */

/* sparse x dense  */
int  spmult_sd_[RR:rr:RR][RC:rc:RC][CR:cr:CR][CC:cc:CC]()     /* {{{1 */
/* 
 *   Multiply a sparse and a dense matrix :    [C] = [A][B]
 */ 
{
int DEBUG = 0;
    int          c, r, s, j, end_row,
                 nRows_B, nCols_B, nRows_C, nCols_C, cmplx_C, n_ptr;
    SparseMatrix A;
    double      *B, *C;
    char   *name = "_spmult_sd_[RR:rr:RR][RC:rc:RC][CR:cr:CR][CC:cc:CC]";

/*   Slice tag CX means RC, CR, or CC.
[RR:
     [A] is real,    sparse    [B] is real   , dense      [C] is real   , dense
:RR]
[RC:
     [A] is real,    sparse    [B] is complex, dense      [C] is complex, dense
:RC]
[CR:
     [A] is complex, sparse    [B] is real   , dense      [C] is complex, dense
:CR]
[CC:
     [A] is complex, sparse    [B] is complex, dense      [C] is complex, dense
:CC]
*/
if (DEBUG) printf("top of %s\n", name);

    A = sparse_overlay(tos-1);      /* index 1 -> matrix [A] */

    cmplx_C = [RR:0:RR][CX:1:CX];

[RR:
    nRows_B = tos->row;
:RR]
[RC:
    nRows_B = tos->row/2;
:RC]
[CR:
    nRows_B = tos->row;
:CR]
[CC:
    nRows_B = tos->row/2;
:CC]
    nCols_B = tos->col;
    B       = tos->mat;
if (DEBUG)
printf("%s: A cols=%d  B rows=%d\n", name, A.H[COLS], nRows_B);
    if (A.H[COLS] != nRows_B) {
        stkerr(" spmult: ",MATSNOTC); 
        return 0;
    }

    /* declare memory for product [C] and put it on the stack */
    nRows_C = A.H[ROWS][CX:*2:CX];
    nCols_C = nCols_B;
    if (!matstk(nRows_C, nCols_C, name)) return 0;
[CX:
    set_complex(tos);
:CX]
    C       = tos->mat;
    for (j = 0; j < nRows_C*nCols_C; j++) 
        C[j] = 0.0;
if (DEBUG) {
printf("%s A %2d x %2d\n", name, A.H[ROWS], A.H[COLS]);
printf("%s B %2d x %2d\n", name, nRows_B,   nCols_B);
printf("%s C %2d x %2d\n", name, nRows_C,   nCols_C);
}
    n_ptr = 0;
    for (c = 0; c < A.H[COLS]; c++) {
        for (s = A.S_start[c]; s < A.S_start[c+1]; s++) {
            end_row = A.S[s].start_row + A.S[s].len - 1;
            n_ptr   = A.S[s].N_idx;
if (DEBUG) printf("column %d string %d  rows %d:%d (len=%d)  nptr=%d\n",
c, s, A.S[s].start_row, end_row, A.S[s].len, n_ptr);
            for (r = A.S[s].start_row; r <= end_row; r++) {
                for (j = 0; j < nCols_B; j++) {
[RR:
if (DEBUG) printf("C[%d,%d] += A[%d,%d](% 8.4le) * B[%d,%d](% 8.4le)\n",
r,j, r,c, A.N[n_ptr],  c,j, B[c + j*nRows_B]);
                    C[r + j*nRows_C] += A.N[ n_ptr ] * B[c + j*nRows_B];
:RR]
[RC:
if (DEBUG) printf("C[%d,%d] += A[%d,%d](% 8.4le) * B[%d,%d](% 8.4le)\n",
2*r,  j, r,c, A.N[n_ptr],  2*c,  j, B[2*(c + j*nRows_B)]);
if (DEBUG) printf("C[%d,%d] += A[%d,%d](% 8.4le) * B[%d,%d](% 8.4le)\n",
2*r+1,j, r,c, A.N[n_ptr],  2*c+1,j, B[2*(c + j*nRows_B)+1]);
                    C[2*r+j*nRows_C]   += A.N[ n_ptr ] * B[2*(c+j*nRows_B)];
                    C[2*r+j*nRows_C+1] += A.N[ n_ptr ] * B[2*(c+j*nRows_B)+1];
:RC]
[CR:
if (DEBUG) printf("C[%d,%d] += Ar[%d,%d](% 8.4le) * B[%d,%d](% 8.4le)\n",
2*r,  j, r,c, A.N[n_ptr],  c,j, B[c + j*nRows_B]);
if (DEBUG) printf("C[%d,%d] += Ai[%d,%d](% 8.4le) * B[%d,%d](% 8.4le)\n",
2*r+1,j, r,c, A.N[n_ptr+1],c,j, B[c + j*nRows_B]);
                    C[2*r+j*nRows_C]   += A.N[ n_ptr  ] * B[c + j*nRows_B];
                    C[2*r+j*nRows_C+1] += A.N[ n_ptr+1] * B[c + j*nRows_B];
:CR]
[CC:
if (DEBUG) printf("C[%d,%d] += Ar[%d,%d](% 8.4le) * B[%d,%d](% 8.4le) -\n",
2*r,  j, r,c, A.N[n_ptr],  2*c,j, B[2*(c + j*nRows_B)]);
if (DEBUG) printf("          Ai[%d,%d](% 8.4le) * B[%d,%d](% 8.4le)\n",
         r,c, A.N[n_ptr+1],2*c+1,j, B[2*(c + j*nRows_B)+1]);
if (DEBUG) printf("C[%d,%d] += Ar[%d,%d](% 8.4le) * B[%d,%d](% 8.4le) +\n",
2*r+1,  j, r,c, A.N[n_ptr],  2*c+1,j, B[2*(c + j*nRows_B)+1]);
if (DEBUG) printf("          Ai[%d,%d](% 8.4le) * B[%d,%d](% 8.4le)\n",
         r,c, A.N[n_ptr+1],2*c,j, B[2*c   + j*nRows_B]);
                    C[2*r+j*nRows_C]   += A.N[ n_ptr  ]* B[2*(c+j*nRows_B)] 
                                        - A.N[ n_ptr+1]* B[2*(c+j*nRows_B)+1];
                    C[2*r+j*nRows_C+1] += A.N[ n_ptr  ]* B[2*(c+j*nRows_B)+1] 
                                        + A.N[ n_ptr+1]* B[2*(c+j*nRows_B)];
:CC]
                }
[RR:
                n_ptr++;
:RR]
[RC:
                n_ptr++;
:RC]
[CR:
                n_ptr += 2;
:CR]
[CC:
                n_ptr += 2;
:CC]
            }
        }
    }

    pushq2("_C",2); naming();
    lop(); /* drops B */
    lop(); /* drops A */

    return 1;
}
/* 1}}} */

/* sparse x sparse */
int  spmult_ss_[RR:rr:RR][RC:rc:RC][CR:cr:CR][CC:cc:CC]()     /* {{{1 */
/* 
 *   Multiply two sparse matrices :    [C] = [A][B]
 */ 
{
int DEBUG = 0;
    int          i, rA = 0, cB, found_term = 0, prev_row = -2,
                 A_offset, B_offset, overlap_len, overlap,
                 A_n_str, A_s_ptr, A_N_ptr,  
                 B_n_str, B_s_ptr, B_N_ptr, 
                 C_N_ptr, nRows_C, nCols_C, cmplx_C, nStr_C, nNZ_C;
    SparseMatrix A, B, C;
    char        *name = "_spmult_ss_[RR:rr:RR][RC:rc:RC][CR:cr:CR][CC:cc:CC]";

/*   Slice tag CX means RC, CR, or CC.
[RR:
     [A] is real,    sparse    [B] is real   , sparse     [C] is real   , sparse
:RR]
[RC:
     [A] is real,    sparse    [B] is complex, sparse     [C] is complex, sparse
:RC]
[CR:
     [A] is complex, sparse    [B] is real   , sparse     [C] is complex, sparse
:CR]
[CC:
     [A] is complex, sparse    [B] is complex, sparse     [C] is complex, sparse
:CC]
*/
if (DEBUG) printf("top of %s\n", name);

    swap();
    spbend(); /* Transpose [A].  If complex, will need to recover signs on [Ai]. */
    swap();

    A = sparse_overlay(tos-1);      /* index 1 -> matrix [A] */
    /* spbend flipped signs on [A]'s imaginary terms; restore original signs */
    if (A.H[COMPLX])
        for (i = 0; i < A.H[n_NONZ]; i++)
            A.N[2*i + 1] = -A.N[2*i + 1];

    B = sparse_overlay(tos);        /* index 0 -> matrix [B] */
if (DEBUG) {
gprintf("%s A' %2d x %2d\n", name, A.H[ROWS], A.H[COLS]);
gprintf("%s B  %2d x %2d\n", name, B.H[ROWS], B.H[COLS]);
}

    cmplx_C = [RR:0:RR][CX:1:CX];

if (DEBUG) gprintf("%s: A cols=%d  B rows=%d\n", name, A.H[COLS], B.H[ROWS]);
    if (A.H[ROWS] != B.H[ROWS]) {
        stkerr(name, MATSNOTC); 
        return 0;
    }

    nRows_C = A.H[COLS]; /* [CX:*2:CX]; */
    nCols_C = B.H[COLS];
[CX:
    set_complex(tos);
:CX]

if (DEBUG) gprintf("%s C %2d x %2d\n", name, nRows_C,   nCols_C);

    /* pass 1:  determine number of strings and non-zeros in product {{{2 */
    nStr_C = 0;
    nNZ_C  = 0;
    for (cB = 0; cB < B.H[COLS]; cB++) {  /* cB -> columns of [B] */
        B_n_str = B.S_start[cB+1] - B.S_start[cB];

        prev_row = -2;

        for (rA = 0; rA < A.H[COLS]; rA++) { /* rA -> cols of A' == rows of A */
            found_term = 0;
            A_n_str = A.S_start[rA+1] - A.S_start[rA];

            A_s_ptr = 0;
            B_s_ptr = 0;
if (DEBUG) gprintf("testing A row %2d (A_s_ptr=%d)  B col %2d (B_s_ptr=%d)\n", 
rA, A_s_ptr, cB, B_s_ptr);
            if (f_col_overlap(A_n_str,  &A.S[ A.S_start[rA] ], 
                              B_n_str,  &B.S[ B.S_start[cB] ], 0,
                             &A_s_ptr,  &B_s_ptr,
                             &A_offset, &B_offset, &overlap_len)) {
if (DEBUG) gprintf("[C] has term at %2d, %2d\n", rA, cB);
                ++nNZ_C;
                if (rA != (prev_row + 1)) { /* starts new string in [C] col cB */
                    ++nStr_C;
                }
                prev_row = rA;
            }
        }
    }

if (DEBUG) {
gprintf("after symbolic phase, C has %d strings, %d non-zeros\n", nStr_C, nNZ_C);
}
    /* 2}}} */

    /*   Allocate memory for product and work arrays. Put [C] on stack. {{{2 */
    if (!sparse_stk(nRows_C    , /* in  */
                    nCols_C    , /* in  */
                    nStr_C     , /* in  number of strings       */
                    nNZ_C      , /* in  number of nonzero terms */
                    cmplx_C    , /* in  0=real  1=complex       */
                    0          , /* in not internally indexed   */
                    name       , /* in  */
                   &C)) {        /* out */
        return 0;
    }

    for (i = 0; i < [CX:2*:CX]nNZ_C; i++)
        C.N[i] = 0.0;
    /* 2}}} */

    /* pass 2:  compute the product {{{2 */
    nStr_C  = 0;
    nNZ_C   = 0;
    C_N_ptr = 0;
    for (cB = 0; cB < B.H[COLS]; cB++) {  /* cB -> columns of [B] */
        B_n_str       = B.S_start[cB+1] - B.S_start[cB];
        prev_row      = -2;
        C.S_start[cB] = nStr_C;
        C.N_start[cB] = C_N_ptr;

        for (rA = 0; rA < A.H[COLS]; rA++) { /* rA -> cols of A' == rows of A */
            found_term = 0;
            A_n_str = A.S_start[rA+1] - A.S_start[rA];

if (DEBUG) gprintf("testing A row %2d (A_s_ptr=%d)  B col %2d (B_s_ptr=%d)\n", 
rA, A_s_ptr, cB, B_s_ptr);
            A_s_ptr = 0;
            B_s_ptr = 0;
            while ((overlap = f_col_overlap(A_n_str,  &A.S[ A.S_start[rA] ], 
                                            B_n_str,  &B.S[ B.S_start[cB] ], 0,
                                           &A_s_ptr,  &B_s_ptr,
                                           &A_offset, &B_offset, &overlap_len))) {
if (DEBUG) gprintf("[C] has term at %2d, %2d\n", rA, cB);

                found_term = 1;
                A_N_ptr = A.S[ A.S_start[rA] + A_s_ptr ].N_idx 
                          + [CR:2*:CR][CC:2*:CC]A_offset;
                B_N_ptr = B.S[ B.S_start[cB] + B_s_ptr ].N_idx 
                          + [RC:2*:RC][CC:2*:CC]B_offset;
if (DEBUG) gprintf("rA=%d,cB=%d  A_nptr=%d  B_nptr=%d  nTerms=%d\n", 
rA, cB, A_N_ptr, B_N_ptr, overlap_len);
                for (i = 0; i < overlap_len; i++) {
[RR:
if (DEBUG) gprintf("C.N[%d] = % 10.5e += A(% 10.5e) * B(% 10.5e)\n",
C_N_ptr, C.N[C_N_ptr], A.N[A_N_ptr + i], B.N[B_N_ptr + i]);
                    C.N[C_N_ptr] += A.N[A_N_ptr + i] * B.N[B_N_ptr + i];
:RR]

[RC:
                    C.N[C_N_ptr  ] += A.N[A_N_ptr + i] * B.N[B_N_ptr + 2*i];
                    C.N[C_N_ptr+1] += A.N[A_N_ptr + i] * B.N[B_N_ptr + 2*i+1];
:RC]
[CR:
                    C.N[C_N_ptr  ] += A.N[A_N_ptr + 2*i]   * B.N[B_N_ptr + i];
                    C.N[C_N_ptr+1] += A.N[A_N_ptr + 2*i+1] * B.N[B_N_ptr + i];
:CR]
[CC:
if (DEBUG) gprintf("C.Nr[%d] = % 10.5e += Ar(% 10.5e) * Br(% 10.5e) - \n",
C_N_ptr  , C.N[C_N_ptr  ], A.N[A_N_ptr + 2*i  ], B.N[B_N_ptr + 2*i  ]);
if (DEBUG) gprintf("                      Ai(% 10.5e) * Bi(% 10.5e)\n",
                           A.N[A_N_ptr + 2*i+1], B.N[B_N_ptr + 2*i+1]);
if (DEBUG) gprintf("C.Ni[%d] = % 10.5e += Ai(% 10.5e) * Br(% 10.5e) + \n",
C_N_ptr+1, C.N[C_N_ptr+1], A.N[A_N_ptr + 2*i+1], B.N[B_N_ptr + 2*i  ]);
if (DEBUG) gprintf("                      Ar(% 10.5e) * Bi(% 10.5e)\n",
                           A.N[A_N_ptr + 2*i  ], B.N[B_N_ptr + 2*i+1]);
                    C.N[C_N_ptr  ] += A.N[A_N_ptr + 2*i]   * B.N[B_N_ptr + 2*i] 
                                    - A.N[A_N_ptr + 2*i+1] * B.N[B_N_ptr + 2*i+1];
                    C.N[C_N_ptr+1] += A.N[A_N_ptr + 2*i+1] * B.N[B_N_ptr + 2*i] 
                                    + A.N[A_N_ptr + 2*i]   * B.N[B_N_ptr + 2*i+1];
:CC]
                }

                /* advance pointer for the least advanced string */
                switch (overlap) {
                    case 1:
                    case 3:
                        ++A_s_ptr;
                        break;
                    case 2:
                    case 4:
                        ++B_s_ptr;
                        break;
                }

            }
            if (found_term) {  /* [C] has a non-zero term at (rA,cB) */
                if (rA != (prev_row + 1)) { /* starts new string in [C] col cB */
                    C.S[nStr_C].start_row = rA;
                    C.S[nStr_C].N_idx     = [CX:2*:CX]nNZ_C;
                    C.S[nStr_C].len       = 1;
                    ++nStr_C;
                } else {
                    ++C.S[nStr_C - 1].len;
                }
                ++C_N_ptr; [CX: ++C_N_ptr; :CX]
                ++nNZ_C;
                prev_row = rA;
            }
        }
    }

if (DEBUG) {
gprintf("after numeric phase, C has %d strings, %d non-zeros\n", nStr_C, nNZ_C);
}

    /* 2}}} */

    C.S_start[ C.H[COLS] ] = nStr_C; 
    C.N_start[ C.H[COLS] ] = [CX:2*:CX]nNZ_C;

    pushq2("_C",2); naming();
    lop(); /* drops B */
    lop(); /* drops A */

    return 1;
}
/* 1}}} */

/* sparse symmetric using lower triangle x dense  */
int  spmult_sym_low_sd_[RR:rr:RR][RC:rc:RC][CR:cr:CR][CC:cc:CC]()     /* {{{1 */
/*
 * man entry:  spmult_sym_low_sd_[RR:rr:RR][RC:rc:RC][CR:cr:CR][CC:cc:CC] {{{2
 * (hA_sp hB --- hC)  Multiply a symmetric sparse matrix and a dense matrix:    [C] = [A][B] The sparse matrix must have terms only on the diagonal and below; upper triangular terms appear in the computations as the transpose of the lower triangular terms.
 * category: math::matrix::sparse
 * related:  sparse, spmult, spbend, star, tril, triu
 * 2}}}
 */
{
/* 
     In matlab the code would look like this:

        C = zeros(nRows,nRHS);
        for c = 1:nRows   % loop over columns of [A]
            for t = 1:nRHS   % loop over columns of [B]
                % step 1
                C(c:nRows,t) = C(c:nRows,t) + B(c,t)*A(c:nRows,c);
                % step 2
                if (c < nRows)
                    C(c,t) = C(c,t) + A(c+1:nRows,c)' * B(c+1:nRows, t);
                end
            end
        end

 */ 
int DEBUG = 0;
    int          c, r, s, j, end_row,
                 nRows_B, nCols_B, nRows_C, nCols_C, cmplx_C, n_ptr;
    SparseMatrix A;
    double      *B, *C, *dot_prod;
    char   *name = "_spmult_sym_low_sd_[RR:rr:RR][RC:rc:RC][CR:cr:CR][CC:cc:CC]";

/*   Slice tag CX means RC, CR, or CC.
[RR:
     [A] is real,    sparse    [B] is real   , dense      [C] is real   , dense
:RR]
[RC:
     [A] is real,    sparse    [B] is complex, dense      [C] is complex, dense
:RC]
[CR:
     [A] is complex, sparse    [B] is real   , dense      [C] is complex, dense
:CR]
[CC:
     [A] is complex, sparse    [B] is complex, dense      [C] is complex, dense
:CC]
*/
if (DEBUG) printf("top of %s\n", name);

    if (!is_sparse(tos-1)) {
        stkerr(" spmult_sym_low_sd_rr: [A] ",SPARSENOT);
        return 0;
    }
    if (tos->typ != MAT) {
        stkerr(" spmult_sym_low_sd_rr: [B] ", MATNOT);
        return 0;
    }

    A = sparse_overlay(tos-1);      /* index 1 -> matrix [A] */

    cmplx_C = [RR:0:RR][CX:1:CX];

[RR:
    nRows_B = tos->row;
:RR]
[RC:
    stkerr(" spmult_sym_low_sd_rc: ", "RC code not yet implemented" );
    return 0;
    nRows_B = tos->row/2;
:RC]
[CR:
    stkerr(" spmult_sym_low_sd_cr: ", "CR code not yet implemented" );
    return 0;
    nRows_B = tos->row;
:CR]
[CC:
    stkerr(" spmult_sym_low_sd_cc: ", "CC code not yet implemented" );
    return 0;
    nRows_B = tos->row/2;
:CC]
    nCols_B = tos->col;
    B       = tos->mat;
if (DEBUG)
printf("%s: A cols=%d  B rows=%d\n", name, A.H[COLS], nRows_B);
    if (A.H[COLS] != nRows_B) {
        stkerr(" spmult_sym_low_sd_rr: ",MATSNOTC); 
        return 0;
    }

    /* declare memory for an array to hold dot products of columns of [B]
     * with rows of the upper triangle of [A] ( = transposed columns
     * from the lower triangle) */
    if ((dot_prod = (double *) malloc( nCols_B * sizeof(double))) == NULL) {
        stkerr(" spmult_sym_low_sd_rr: (dot_prod) ",MEMNOT);
        return 0;
    }

    /* declare memory for product [C] and put it on the stack */
    nRows_C = A.H[ROWS][CX:*2:CX];
    nCols_C = nCols_B;
    if (!matstk(nRows_C, nCols_C, name)) return 0;
[CX:
    set_complex(tos);
:CX]
    C       = tos->mat;
    for (j = 0; j < nRows_C*nCols_C; j++) 
        C[j] = 0.0;
if (DEBUG) {
printf("%s A %2d x %2d\n", name, A.H[ROWS], A.H[COLS]);
printf("%s B %2d x %2d\n", name, nRows_B,   nCols_B);
printf("%s C %2d x %2d\n", name, nRows_C,   nCols_C);
}
    n_ptr = 0;
    for (c = 0; c < A.H[COLS]; c++) {
        for (j = 0; j < nCols_B; j++)
            dot_prod[j] = 0.0;
        for (s = A.S_start[c]; s < A.S_start[c+1]; s++) {
            if (A.S[s].start_row < c) {
                stkerr(" spmult_sym_low_sd_rr: ","[A] has terms in the upper triangle"); 
                drop(); /* get rid of [C] */
                free(dot_prod);
                return 0;
            }
            end_row = A.S[s].start_row + A.S[s].len - 1;
            n_ptr   = A.S[s].N_idx;
if (DEBUG) printf("column %d string %d  rows %d:%d (len=%d)  nptr=%d\n",
c, s, A.S[s].start_row, end_row, A.S[s].len, n_ptr);
            for (r = A.S[s].start_row; r <= end_row; r++) {
                for (j = 0; j < nCols_B; j++) {
[RR:
if (DEBUG) printf("C[%d,%d] += A[%d,%d](% 8.4le) * B[%d,%d](% 8.4le)\n",
r,j, r,c, A.N[n_ptr],  c,j, B[c + j*nRows_B]);
                    C[r + j*nRows_C] += A.N[ n_ptr ] * B[c + j*nRows_B];
                    if (r > c) { /* below diagonal; do transposed dot product */
                        dot_prod[j] += A.N[ n_ptr ] * B[r + j*nRows_B];
                    }
:RR]
[RC:
if (DEBUG) printf("C[%d,%d] += A[%d,%d](% 8.4le) * B[%d,%d](% 8.4le)\n",
2*r,  j, r,c, A.N[n_ptr],  2*c,  j, B[2*(c + j*nRows_B)]);
if (DEBUG) printf("C[%d,%d] += A[%d,%d](% 8.4le) * B[%d,%d](% 8.4le)\n",
2*r+1,j, r,c, A.N[n_ptr],  2*c+1,j, B[2*(c + j*nRows_B)+1]);
                    C[2*r+j*nRows_C]   += A.N[ n_ptr ] * B[2*(c+j*nRows_B)];
                    C[2*r+j*nRows_C+1] += A.N[ n_ptr ] * B[2*(c+j*nRows_B)+1];
:RC]
[CR:
if (DEBUG) printf("C[%d,%d] += Ar[%d,%d](% 8.4le) * B[%d,%d](% 8.4le)\n",
2*r,  j, r,c, A.N[n_ptr],  c,j, B[c + j*nRows_B]);
if (DEBUG) printf("C[%d,%d] += Ai[%d,%d](% 8.4le) * B[%d,%d](% 8.4le)\n",
2*r+1,j, r,c, A.N[n_ptr+1],c,j, B[c + j*nRows_B]);
                    C[2*r+j*nRows_C]   += A.N[ n_ptr  ] * B[c + j*nRows_B];
                    C[2*r+j*nRows_C+1] += A.N[ n_ptr+1] * B[c + j*nRows_B];
:CR]
[CC:
if (DEBUG) printf("C[%d,%d] += Ar[%d,%d](% 8.4le) * B[%d,%d](% 8.4le) -\n",
2*r,  j, r,c, A.N[n_ptr],  2*c,j, B[2*(c + j*nRows_B)]);
if (DEBUG) printf("          Ai[%d,%d](% 8.4le) * B[%d,%d](% 8.4le)\n",
         r,c, A.N[n_ptr+1],2*c+1,j, B[2*(c + j*nRows_B)+1]);
if (DEBUG) printf("C[%d,%d] += Ar[%d,%d](% 8.4le) * B[%d,%d](% 8.4le) +\n",
2*r+1,  j, r,c, A.N[n_ptr],  2*c+1,j, B[2*(c + j*nRows_B)+1]);
if (DEBUG) printf("          Ai[%d,%d](% 8.4le) * B[%d,%d](% 8.4le)\n",
         r,c, A.N[n_ptr+1],2*c,j, B[2*c   + j*nRows_B]);
                    C[2*r+j*nRows_C]   += A.N[ n_ptr  ]* B[2*(c+j*nRows_B)] 
                                        - A.N[ n_ptr+1]* B[2*(c+j*nRows_B)+1];
                    C[2*r+j*nRows_C+1] += A.N[ n_ptr  ]* B[2*(c+j*nRows_B)+1] 
                                        + A.N[ n_ptr+1]* B[2*(c+j*nRows_B)];
:CC]
                }
[RR:
                n_ptr++;
:RR]
[RC:
                n_ptr++;
:RC]
[CR:
                n_ptr += 2;
:CR]
[CC:
                n_ptr += 2;
:CC]
            }
        }
        for (j = 0; j < nCols_B; j++) {
[RR:
            C[c + j*nRows_C] += dot_prod[j];
:RR]
[RC:
:RC]
[CR:
:CR]
[CC:
:CC]
        }
    }

    pushq2("_C",2); naming();
    lop(); /* drops B */
    lop(); /* drops A */

    free(dot_prod);

    return 1;
}
/* 1}}} */
