Hi
I've got a Sony vaio with a NVIDIA GeForce 310M with CUDA driver 3.0.1
I've installed Matlab2010b with VC++ Express edition 2010 + SDK and Matlab CUDA 1.1
when I try compile the simple mex file reported below with the command
mex square_cuda.c -IC:\cuda\include -LC:\cuda\lib -lcudart
I get the following error
square_cuda.c
square_cuda.c(4) : error C2054: expected '(' to follow '__global__'
square_cuda.c(4) : error C2085: 'square_elements' : not in formal parameter list
square_cuda.c(4) : error C2143: syntax error : missing ';' before '{'
square_cuda.c(20) : error C2143: syntax error : missing ';' before 'for'
square_cuda.c(23) : warning C4267: '=' : conversion from 'size_t' to 'int', possible loss of data
square_cuda.c(24) : warning C4267: '=' : conversion from 'size_t' to 'int', possible loss of data
square_cuda.c(28) : warning C4013: 'cudaMalloc' undefined; assuming extern returning int
square_cuda.c(38) : warning C4013: 'cudaMemcpy' undefined; assuming extern returning int
square_cuda.c(39) : error C2065: 'cudaMemcpyHostToDevice' : undeclared identifier
square_cuda.c(50) : error C2065: 'cudaMemcpyHostToDevice' : undeclared identifier
square_cuda.c(54) : error C2065: 'dim3' : undeclared identifier
square_cuda.c(54) : error C2146: syntax error : missing ';' before identifier 'dimBlock'
square_cuda.c(54) : warning C4013: 'dimBlock' undefined; assuming extern returning int
square_cuda.c(55) : error C2065: 'dim3' : undeclared identifier
square_cuda.c(55) : error C2146: syntax error : missing ';' before identifier 'dimGrid'
square_cuda.c(55) : warning C4013: 'dimGrid' undefined; assuming extern returning int
square_cuda.c(55) : error C2224: left of '.x' must have struct/union type
square_cuda.c(56) : error C2224: left of '.x' must have struct/union type
square_cuda.c(58) : error C2065: 'square_elements' : undeclared identifier
square_cuda.c(58) : error C2059: syntax error : '<'
square_cuda.c(62) : error C2065: 'cudaMemcpyDeviceToHos' : undeclared identifier
square_cuda.c(62) : error C2146: syntax error : missing ')' before identifier 'data2'
square_cuda.c(71) : warning C4013: 'cudaFree' undefined; assuming extern returning int
C:\PROGRA~1\MATLAB\R2010B\BIN\MEX.PL: Error: Compile of 'square_cuda.c' failed.
Could someone help me ?
Thanks
The code is
#include "cuda.h"
#include "mex.h"
/* Kernel to square elements of the array on the GPU */
__global__ void square_elements(float* in, float* out, int N){
int idx = blockIdx.x*blockDim.x+threadIdx.x;
if ( idx < N) out[idx]=in[idx]*in[idx];
}
/* Gateway function */
void mexFunction(int nlhs, mxArray *plhs[],
int nrhs, const mxArray *prhs[]) {
int i, j, m, n;
double *data1, *data2;
float *data1f, *data2f;
float *data1f_gpu, *data2f_gpu;
mxClassID category;
if (nrhs != nlhs)
mexErrMsgTxt("The number of input and output arguments must be the same.")
for (i = 0; i < nrhs; i++) {
/* Find the dimensions of the data */
m = mxGetM(prhs[i]);
n = mxGetN(prhs[i]);
/* Create an mxArray for the output data */
plhs[i] = mxCreateDoubleMatrix(m, n, mxREAL);
/* Create an input and output data array on the GPU*/
cudaMalloc( (void **) &data1f_gpu, sizeof(float)*m*n);
cudaMalloc( (void **) &data2f_gpu, sizeof(float)*m*n);
/* Retrieve the input data */
data1 = mxGetPr(prhs[i]);
/* Check if the input array is single or double precision */
category = mxGetClassID(prhs[i]);
if( category == mxSINGLE_CLASS) {
/* The input array is single precision, it can be sent directly to the
* card */
cudaMemcpy( data1f_gpu, data1, sizeof(float)*m*n,
cudaMemcpyHostToDevice);
}
if( category == mxDOUBLE_CLASS) {
/* The input array is in double precision, it needs to be converted t
* floats before being sent to the card */
data1f = (float *) mxMalloc(sizeof(float)*m*n);
for (j = 0; j < m*n; j++) {
data1f[j] = (float) data1[j];
}
cudaMemcpy( data1f_gpu, data1f, sizeof(float)*n*m, cudaMemcpyHostToDevice);
}
data2f = (float *) mxMalloc(sizeof(float)*m*n);
/* Compute execution configuration using 128 threads per block */
dim3 dimBlock(128);
dim3 dimGrid((m*n)/dimBlock.x);
if ( (n*m) % 128 !=0 ) dimGrid.x+=1;
/* Call function on GPU */
square_elements<<<dimGrid, dimBlock>>>(data1f_gpu, data2f_gpu, n*m);
/* Copy result back to host */
cudaMemcpy( data2f, data2f_gpu, sizeof(float)*n*m, cudaMemcpyDeviceToHos
/* Create a pointer to the output data */
data2 = mxGetPr(plhs[i]);
/* Convert from single to double before returning */
for (j = 0; j < m*n; j++) {
data2[j] = (double) data2f[j];
}
/* Clean-up memory on device and host */
mxFree(data1f);
mxFree(data2f);
cudaFree(data1f_gpu);
cudaFree(data2f_gpu);
}
}
|