Multiplicação de Matrizes


Os programas abaixo ilustram o uso de threads em linguagem C, para aumentar a velocidade de uma simples multiplicação de matrizes.

Exemplo 1: seqüencial


O código abaixo é usado para comparações. Calcula a multiplicação de matrizes da mesma forma que o exemplo seguinte, porém de modo seqüencial.

/***************************************************************************
 *   Copyright (C) 2007 by Ruben Carlo Benante                             *
 *   beco@bulldog                                                          *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/
 
//Thread test...
//Multiplying a matrix C=A*B with 2 threads to make it faster
//By Ruben Carlo Benante (dr.beco at gmail etc)
//GNU/GPL piece of code
//Shared with you. Feel free to use and learn.
 
//To use pthread
//Kdevelop:
//Project > project options > configuration options > link flags (LDFLAGS)
// -lpthread -lm -lutil
 
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
 
#include <stdio.h>   /*printf*/
#include <stdlib.h>  /*exit*/
#include <time.h>    /*clock*/
 
//You can change the size of the matrix here:
//COL = columns; LIN = lines
#define ALIN 1000
#define BCOL 1000
#define ACOLBLIN 1000
#define SLEEPTIME 10 /*seconds*/
 
//Here is to be sure the matrix size is compatible. Do not change.
//ACOL==BLIN; ALIN==CLIN; BCOL==CCOL;
#define ACOL ACOLBLIN
#define BLIN ACOLBLIN
#define CLIN ALIN
#define CCOL BCOL
 
//Global: how to pass parameters to thread?
float A[ALIN][ACOL];
float B[BLIN][BCOL];
float C[CLIN][CCOL];
 
void *fill_a ( void *); //passing A matrix
void *fill_b ( void *); //passing B matrix
void *multiply_first ( void *); //passing all matrix?
void *multiply_second ( void *); //passing all matrix?
 
void print_results(char m, void *V, int linmax, int colmax);
 
int main ( int argc, char *argv[] )
{
    clock_t ini,fim;
    int sec;
    //int l, c, cl, cc, ac, bl;
 
    ini=clock();
    fill_a ( (void *)A );
    fill_b ( (void *)B );
    multiply_first ( NULL );
    multiply_second ( NULL );
    //Print results
    printf ( "With no thread:\n" );
    print_results('A', (void *)A, ALIN, ACOL);
    print_results('B', (void *)B, BLIN, BCOL);
    print_results('C', (void *)C, CLIN, CCOL);
    fim=clock();
    sec= ( fim-ini ) / CLOCKS_PER_SEC;
    printf ( "Elapsed tics: %d\n", ( fim-ini ) );
    printf ( "Elapsed sec: %d\n\n", sec );
 
    return EXIT_SUCCESS;
}
 
void *fill_a ( void *v )
{
    float *a;
    int l,c;
 
    a = (float *)v;
    //fill A
    for ( l=0; l<ALIN; l++ )
        for ( c=0; c<ACOL; c++ )
            a[l*ACOL + c]=l+c;
}
 
void *fill_b ( void *v )
{
    float *b;
    int l,c;
 
    b = (float *)v;
    //fill B
    for ( l=0; l<BLIN; l++ )
        for ( c=0; c<BCOL; c++ )
            b[l*BCOL + c]=l*c;
}
 
void *multiply_first ( void *v)
{
    //float ( *a ) [ACOL], float ( *b ) [BCOL], float ( *c ) [CCOL] )
    int cl, cc, ac, bl;
    //Multiply C - first thread: the first half top lines
    for ( cl=0;cl<int ( ALIN/2 );cl++ )
        for ( cc=0; cc<BCOL; cc++ )
        {
            C[cl][cc]=0.0;
            for ( ac=0, bl=0; ac<ACOL, bl<BLIN; ac++, bl++ )
                C[cl][cc]+= ( A[cl][ac]*B[bl][cc] );
        }
}
 
void *multiply_second ( void *v)
{
    //float ( *a ) [ACOL], float ( *b ) [BCOL], float ( *c ) [CCOL] )
    int cl, cc, ac, bl;
    //Multiply C - second thread: the second half bottom lines
    for ( cl=int ( ALIN/2 );cl<ALIN;cl++ )
        for ( cc=0; cc<BCOL; cc++ )
        {
            C[cl][cc]=0.0;
            for ( ac=0, bl=0; ac<ACOL, bl<BLIN; ac++, bl++ )
                C[cl][cc]+= ( A[cl][ac]*B[bl][cc] );
        }
}
 
void print_results(char m, void *V, int linmax, int colmax)
{
/*    int l, c;
    float *M;
    M=(float *)V;
    printf ("%c:\n", m );
    for ( l=0; l<linmax; l++ )
    {
        for ( c=0; c<colmax; c++ )
            printf ( "%.1f ", M[l*colmax + c] );
        printf ( "\n" );
    }*/
}

Exemplo 2: paralelo


O código abaixo utiliza threads para calcular a multiplicação da matriz em paralelo.


/***************************************************************************
 *   Copyright (C) 2007 by Ruben Carlo Benante                             *
 *   beco@bulldog                                                          *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/
 
//Thread test...
//Multiplying a matrix C=A*B with 2 threads to make it faster
//By Ruben Carlo Benante (dr.beco at gmail etc)
//GNU/GPL piece of code
//Shared with you. Feel free to use and learn.
 
//To use pthread
//Kdevelop:
//Project > project options > configuration options > link flags (LDFLAGS)
// -lpthread -lm -lutil
 
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
 
#include <stdio.h>   /*printf*/
#include <stdlib.h>  /*exit*/
#include <time.h>    /*clock*/
#include <pthread.h> /*threads*/
#include <unistd.h>  /*funcao sleep*/
 
//You can change the size of the matrix here:
//COL = columns; LIN = lines
#define ALIN 600
#define BCOL 600
#define ACOLBLIN 600
/*seconds between the first (sequential) and the second (parallel) test*/
#define SLEEPTIME 0
 
 
//Here is to be sure the matrix size is compatible. Do not change.
//ACOL==BLIN; ALIN==CLIN; BCOL==CCOL;
#define ACOL ACOLBLIN
#define BLIN ACOLBLIN
#define CLIN ALIN
#define CCOL BCOL
 
//Global: how to pass parameters to thread?
float A[ALIN][ACOL];
float B[BLIN][BCOL];
float C[CLIN][CCOL];
 
void *fill_a ( void *); //passing A matrix
void *fill_b ( void *); //passing B matrix
void *multiply_first ( void *); //passing all matrix?
void *multiply_second ( void *); //passing all matrix?
 
void print_results(char m, void *V, int linmax, int colmax);
 
int main ( int argc, char *argv[] )
{
    pthread_t thread1, thread2;
    int iret1, iret2;
    clock_t ini, fim;
    time_t tini, tfim;
    float sec, tsec;
    //int l, c, cl, cc, ac, bl;
 
    ini=clock();
    tini=time(NULL);
    printf ( "Starting sequential:\nticks:%d\ntime: %s", ini, asctime(localtime(&tini)));
    fill_a ( (void *)A );
    fill_b ( (void *)B );
    multiply_first ( NULL );
    multiply_second ( NULL );
    //Print results
    print_results('A', (void *)A, ALIN, ACOL);
    print_results('B', (void *)B, BLIN, BCOL);
    print_results('C', (void *)C, CLIN, CCOL);
    tfim = time(NULL);
    fim = clock();
    printf ( "Finishing:\nticks:%d\ntime: %s\n", fim, asctime(localtime(&tfim)));
    sec = (float)( fim-ini ) / (float)CLOCKS_PER_SEC;
    tsec = difftime(tfim, tini);
    printf ( "Elapsed tics: %d\n", ( fim-ini ) );
    printf ( "Elapsed sec with clock_t: %.2f\n", sec );
    printf ( "Elapsed sec with time_t: %.2f\n\n", tsec );
 
    sleep(SLEEPTIME); //pausa para iniciar o proximo teste
    //---------------------------------------------------------------------------
    ini=clock();
    tini=time(NULL);
    printf ( "Starting parallel:\nticks:%d\ntime: %s", ini, asctime(localtime(&tini)));
    //  iret2 = pthread_create( &thread2, NULL, print_message_function, (void*) message2);
    iret1 = pthread_create( &thread1, NULL, fill_a, (void *)A );
    iret1 = pthread_create( &thread2, NULL, fill_b, (void *)B );
    pthread_join ( thread1, NULL );  /* Wait till threads are... */
    pthread_join ( thread2, NULL );  /* ...complete before main continues. */
 
    iret1 = pthread_create( &thread1, NULL, multiply_first, NULL );
    iret1 = pthread_create( &thread2, NULL, multiply_second, NULL );
    pthread_join ( thread1, NULL );
  pthread_join ( thread2, NULL );
 
    //Print results
//    printf ( "Thread 1 returns: %d\n",iret1 );
//    printf ( "Thread 2 returns: %d\n",iret2 );
    print_results('A', (void *)A, ALIN, ACOL);
    print_results('B', (void *)B, BLIN, BCOL);
    print_results('C', (void *)C, CLIN, CCOL);
    tfim = time(NULL);
    fim = clock();
    printf ( "Finishing:\nticks:%d\ntime: %s\n", fim, asctime(localtime(&tfim)));
    sec = (float)( fim-ini ) / (float)CLOCKS_PER_SEC;
    tsec = difftime(tfim, tini);
    printf ( "Elapsed tics: %d\n", ( fim-ini ) );
    printf ( "Elapsed sec with clock_t: %.2f\n", sec );
    printf ( "Elapsed sec with time_t: %.2f\n\n", tsec );
    return EXIT_SUCCESS;
}
 
 
//chamada: fill_a(A)
//cabecalho: void fill_a(float a[][ACOL])
//cabecalho: void fill_a(float (*a)[ACOL])
//dentro: a[l][c]=0;
//cabecalho: void fill_a(float *a)
//chamada:
//dentro: a[l*ACOL + c]
 
void *fill_a ( void *v )
{
    float *a;
    int l,c;
 
    a = (float *)v;
    //fill A
    for ( l=0; l<ALIN; l++ )
        for ( c=0; c<ACOL; c++ )
            a[l*ACOL + c]=l+c;
}
 
void *fill_b ( void *v )
{
    float *b;
    int l,c;
 
    b = (float *)v;
    //fill B
    for ( l=0; l<BLIN; l++ )
        for ( c=0; c<BCOL; c++ )
            b[l*BCOL + c]=l*c;
}
 
void *multiply_first ( void *v)
{
    //float ( *a ) [ACOL], float ( *b ) [BCOL], float ( *c ) [CCOL] )
    int cl, cc, ac, bl;
    //Multiply C - first thread: the first half top lines
    for ( cl=0;cl<int ( ALIN/2 );cl++ )
        for ( cc=0; cc<BCOL; cc++ )
        {
            C[cl][cc]=0.0;
            for ( ac=0, bl=0; ac<ACOL, bl<BLIN; ac++, bl++ )
                C[cl][cc]+= ( A[cl][ac]*B[bl][cc] );
        }
}
 
void *multiply_second ( void *v)
{
    //float ( *a ) [ACOL], float ( *b ) [BCOL], float ( *c ) [CCOL] )
    int cl, cc, ac, bl;
    //Multiply C - second thread: the second half bottom lines
    for ( cl=int ( ALIN/2 );cl<ALIN;cl++ )
        for ( cc=0; cc<BCOL; cc++ )
        {
            C[cl][cc]=0.0;
            for ( ac=0, bl=0; ac<ACOL, bl<BLIN; ac++, bl++ )
                C[cl][cc]+= ( A[cl][ac]*B[bl][cc] );
        }
}
 
void print_results(char m, void *V, int linmax, int colmax)
{
/*    int l, c;
    float *M;
    M=(float *)V;
    printf ("%c:\n", m );
    for ( l=0; l<linmax; l++ )
    {
        for ( c=0; c<colmax; c++ )
            printf ( "%.1f ", M[l*colmax + c] );
        printf ( "\n" );
    }*/
}

Threads usando a CPU


A imagem abaixo mostra o momento em que as threads usam simultaneamente ambos núcleos de uma máquina dual core.

cpu_usage_3test.jpg