CUDA and Matlab2010b

  • Follow


Hi 

I've got a Sony vaio with a NVIDIA GeForce 310M with CUDA driver 3.0.1

I've installed Matlab2010b with VC++ Express edition 2010 + SDK and Matlab CUDA 1.1

when I try compile the simple mex file reported below with the command 

mex square_cuda.c -IC:\cuda\include -LC:\cuda\lib -lcudart

I get the following error

square_cuda.c 
square_cuda.c(4) : error C2054: expected '(' to follow '__global__' 
square_cuda.c(4) : error C2085: 'square_elements' : not in formal parameter list 
square_cuda.c(4) : error C2143: syntax error : missing ';' before '{' 
square_cuda.c(20) : error C2143: syntax error : missing ';' before 'for' 
square_cuda.c(23) : warning C4267: '=' : conversion from 'size_t' to 'int', possible loss of data 
square_cuda.c(24) : warning C4267: '=' : conversion from 'size_t' to 'int', possible loss of data 
square_cuda.c(28) : warning C4013: 'cudaMalloc' undefined; assuming extern returning int 
square_cuda.c(38) : warning C4013: 'cudaMemcpy' undefined; assuming extern returning int 
square_cuda.c(39) : error C2065: 'cudaMemcpyHostToDevice' : undeclared identifier 
square_cuda.c(50) : error C2065: 'cudaMemcpyHostToDevice' : undeclared identifier 
square_cuda.c(54) : error C2065: 'dim3' : undeclared identifier 
square_cuda.c(54) : error C2146: syntax error : missing ';' before identifier 'dimBlock' 
square_cuda.c(54) : warning C4013: 'dimBlock' undefined; assuming extern returning int 
square_cuda.c(55) : error C2065: 'dim3' : undeclared identifier 
square_cuda.c(55) : error C2146: syntax error : missing ';' before identifier 'dimGrid' 
square_cuda.c(55) : warning C4013: 'dimGrid' undefined; assuming extern returning int 
square_cuda.c(55) : error C2224: left of '.x' must have struct/union type 
square_cuda.c(56) : error C2224: left of '.x' must have struct/union type 
square_cuda.c(58) : error C2065: 'square_elements' : undeclared identifier 
square_cuda.c(58) : error C2059: syntax error : '<' 
square_cuda.c(62) : error C2065: 'cudaMemcpyDeviceToHos' : undeclared identifier 
square_cuda.c(62) : error C2146: syntax error : missing ')' before identifier 'data2' 
square_cuda.c(71) : warning C4013: 'cudaFree' undefined; assuming extern returning int 
 
  C:\PROGRA~1\MATLAB\R2010B\BIN\MEX.PL: Error: Compile of 'square_cuda.c' failed. 


Could someone help me ?

Thanks



The code is 



#include "cuda.h"
#include "mex.h"
/* Kernel to square elements of the array on the GPU */
__global__ void square_elements(float* in, float* out, int N){
    int idx = blockIdx.x*blockDim.x+threadIdx.x;
    if ( idx < N) out[idx]=in[idx]*in[idx];
}

/* Gateway function */
void mexFunction(int nlhs, mxArray *plhs[],
        int nrhs, const mxArray *prhs[]) {
    int i, j, m, n;
    double *data1, *data2;
    float *data1f, *data2f;
    float *data1f_gpu, *data2f_gpu;
    mxClassID category;
    if (nrhs != nlhs)
        mexErrMsgTxt("The number of input and output arguments must be the same.")
        for (i = 0; i < nrhs; i++) {
            /* Find the dimensions of the data */
            m = mxGetM(prhs[i]);
            n = mxGetN(prhs[i]);
            /* Create an mxArray for the output data */
            plhs[i] = mxCreateDoubleMatrix(m, n, mxREAL);
            /* Create an input and output data array on the GPU*/
            cudaMalloc( (void **) &data1f_gpu, sizeof(float)*m*n);
            cudaMalloc( (void **) &data2f_gpu, sizeof(float)*m*n);
            /* Retrieve the input data */
            data1 = mxGetPr(prhs[i]);
            /* Check if the input array is single or double precision */
            category = mxGetClassID(prhs[i]);
            if( category == mxSINGLE_CLASS) {
                /* The input array is single precision, it can be sent directly to the
                 * card */
                cudaMemcpy( data1f_gpu, data1, sizeof(float)*m*n,
                        cudaMemcpyHostToDevice);
            }
            if( category == mxDOUBLE_CLASS) {
                /* The input array is in double precision, it needs to be converted t
                 * floats before being sent to the card */
                data1f = (float *) mxMalloc(sizeof(float)*m*n);
                for (j = 0; j < m*n; j++) {
                    data1f[j] = (float) data1[j];
                }
                cudaMemcpy( data1f_gpu, data1f, sizeof(float)*n*m, cudaMemcpyHostToDevice);
            }
            data2f = (float *) mxMalloc(sizeof(float)*m*n);
            /* Compute execution configuration using 128 threads per block */
            dim3 dimBlock(128);
            dim3 dimGrid((m*n)/dimBlock.x);
            if ( (n*m) % 128 !=0 ) dimGrid.x+=1;
            /* Call function on GPU */
            square_elements<<<dimGrid, dimBlock>>>(data1f_gpu, data2f_gpu, n*m);
            /* Copy result back to host */
            cudaMemcpy( data2f, data2f_gpu, sizeof(float)*n*m, cudaMemcpyDeviceToHos
                    /* Create a pointer to the output data */
                    data2 = mxGetPr(plhs[i]);
            /* Convert from single to double before returning */
            for (j = 0; j < m*n; j++) {
                data2[j] = (double) data2f[j];
            }
            /* Clean-up memory on device and host */
            mxFree(data1f);
            mxFree(data2f);
            cudaFree(data1f_gpu);
            cudaFree(data2f_gpu);
        }
}
0
Reply Gianmaria 11/29/2010 10:48:06 PM

Hi Gianmaria,

Have you considered using the Jacket SDK for this work?  There are a lot of reasons why this might be a better option for you for both performance and ease of use reasons, see http://blog.accelereyes.com/blog/2010/10/29/jacket_sdk_trumps_mex/

Just wanted to point that out in case you hadn't seen it.

Best,

John
0
Reply John 11/30/2010 2:55:07 AM


1 Replies
531 Views

(page loaded in 0.032 seconds)

Similiar Articles:













7/25/2012 9:52:39 PM


Reply: