Matlab integration

JeanLucPons · JeanLucPons · commit ceeb86b81013 · 2024-02-01T12:24:58.000+01:00
diff --git a/atgpu/AbstractInterface.cpp b/atgpu/AbstractInterface.cpp
@@ -10,6 +10,10 @@ void AbstractInterface::setHandler(AbstractInterface *obj) {
   handler = obj;
 }
 
+bool AbstractInterface::isValidHandler() {
+  return handler != nullptr;
+}
+
 AbstractInterface *AbstractInterface::getInstance() {
   if( handler== nullptr )
     throw string("AbstractInterface: handler not set");
diff --git a/atgpu/AbstractInterface.h b/atgpu/AbstractInterface.h
@@ -28,6 +28,7 @@ class AbstractInterface {
   // Return handle to singleton class
   static AbstractInterface *getInstance();
   static void setHandler(AbstractInterface *obj);
+  static bool isValidHandler();
 
   // Get shape as string
   static std::string getShapeStr(std::vector<int64_t>& shape);
diff --git a/atgpu/Lattice.cpp b/atgpu/Lattice.cpp
@@ -296,7 +296,8 @@ void Lattice::fillGPUMemory() {
 
 void Lattice::run(uint64_t nbTurn,uint32_t nbParticles,AT_FLOAT *rin,AT_FLOAT *rout,uint32_t nbRef,
                   uint32_t *refPts,uint32_t nbElemOffset,uint32_t *elemOffsets,
-                  uint32_t *lostAtTurn,uint32_t *lostAtElem,AT_FLOAT *lostAtCoord) {
+                  uint32_t *lostAtTurn,uint32_t *lostAtElem,AT_FLOAT *lostAtCoord,
+                  bool updateRin) {
 
 #ifdef _PROFILE
   double t0 = AbstractGPU::get_ticks();
@@ -382,7 +383,7 @@ void Lattice::run(uint64_t nbTurn,uint32_t nbParticles,AT_FLOAT *rin,AT_FLOAT *r
 
   // Get back data
   gpu->deviceToHost(lost,gpuLost,lostSize);
-  gpu->deviceToHost(rin, gpuRin, nbParticles * 6 * sizeof(AT_FLOAT));
+  if(updateRin) gpu->deviceToHost(rin, gpuRin, nbParticles * 6 * sizeof(AT_FLOAT));
   if( routSize ) gpu->deviceToHost(rout,gpuRout,routSize);
   if( lostAtElem ) gpu->deviceToHost(lostAtElem,gpuLostAtElem, nbParticles * sizeof(uint32_t));
   if( lostAtCoord ) gpu->deviceToHost(lostAtCoord,gpuLostAtCoord, nbParticles * 6 * sizeof(AT_FLOAT));
diff --git a/atgpu/Lattice.h b/atgpu/Lattice.h
@@ -27,7 +27,8 @@ class Lattice {
   void generateGPUKernel();
   // Run the simulation
   void run(uint64_t nbTurn,uint32_t nbParticles,AT_FLOAT *rin,AT_FLOAT *rout,uint32_t nbRef,uint32_t *refPts,
-           uint32_t nbElemOffset,uint32_t *elemOffsets,uint32_t *lostAtTurn,uint32_t *lostAtElem,AT_FLOAT *lostAtCoord);
+           uint32_t nbElemOffset,uint32_t *elemOffsets,uint32_t *lostAtTurn,uint32_t *lostAtElem,AT_FLOAT *lostAtCoord,
+           bool updateRin);
   // Return handle to the GPU context
   GPUContext *getGPUContext();
   // Get ring length
diff --git a/atgpu/MatlabInterface.cpp b/atgpu/MatlabInterface.cpp
@@ -0,0 +1,82 @@
+#include "MatlabInterface.h"
+#include "AbstractGPU.h"
+
+using namespace std;
+
+void MatlabInterface::setObject(mxArray *obj) {
+  elem = obj;
+}
+
+mxArray *MatlabInterface::getField(const mxArray *pm, const std::string& name) {
+
+  mxArray *field;
+  if (name[0] == '_') {
+    // replace leading '_' by trailing '_'
+    string newName = name.substr(1) + '_';
+    field = mxGetField(pm,0,newName.c_str());
+  } else {
+    field = mxGetField(pm,0,name.c_str());
+  }
+  return field;
+
+}
+
+int MatlabInterface::getInt(const std::string& name) {
+
+  mxArray *field=getField(elem,name);
+  if (!field)
+    throw string("The required attribute " + name + " is missing.");
+  return (int)mxGetScalar(field);
+
+}
+
+std::string MatlabInterface::getString(const std::string& name) {
+
+  mxArray *field=getField(elem,name);
+  if (!field)
+    throw string("The required attribute " + name + " is missing.");
+
+  const char *valueStr = mxArrayToString(field);
+  string ret = string(valueStr);
+  mxFree((void *)valueStr);
+  return ret;
+
+}
+
+double MatlabInterface::getDouble(const std::string& name) {
+
+  mxArray *field=getField(elem,name);
+  if (!field)
+    throw string("The required attribute " + name + " is missing.");
+  return mxGetScalar(field);
+
+}
+
+double *MatlabInterface::getNativeDoubleArray(const std::string& name,std::vector<int64_t>& shape) {
+
+  mxArray *field=getField(elem,name);
+  if (!field)
+    throw string("The required attribute " + name + " is missing.");
+
+  size_t nDim = mxGetNumberOfDimensions(field);
+  shape.resize(nDim);
+  for(int i=0;i<nDim;i++)
+    shape[i] = mxGetDimensions(field)[i];
+
+  // Convert 1,x array to single dimension array
+  if( shape[0]==1 )
+    shape.erase(shape.begin());
+
+  double *ptr = mxGetDoubles(field);
+  return ptr;
+
+}
+
+float *MatlabInterface::getNativeFloatArray(const std::string& name,std::vector<int64_t>& shape) {
+
+  throw string(name + ": float32 array not supported in MATLAB");
+
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
diff --git a/atgpu/MatlabInterface.h b/atgpu/MatlabInterface.h
@@ -0,0 +1,25 @@
+#ifndef AT_GPU_MATLABINTERFACE_H
+#define AT_GPU_MATLABINTERFACE_H
+#include "AbstractInterface.h"
+#include <mex.h>
+
+class MatlabInterface: public AbstractInterface {
+
+public:
+
+    std::string getString(const std::string& name) override;
+    int getInt(const std::string& name) override;
+    double getDouble(const std::string& name) override;
+    double *getNativeDoubleArray(const std::string& name,std::vector<int64_t>& shape) override;
+    float *getNativeFloatArray(const std::string& name,std::vector<int64_t>& shape) override;
+
+    void setObject(mxArray *obj);
+
+private:
+
+    mxArray *getField(const mxArray *pm, const std::string& name);
+    mxArray *elem = nullptr;
+
+};
+
+#endif //AT_GPU_MATLABINTERFACE_H
diff --git a/atgpu/PyInterface.cpp b/atgpu/PyInterface.cpp
@@ -304,7 +304,7 @@ static PyObject *at_gpupass(PyObject *self, PyObject *args, PyObject *kwargs) {
 
   try {
 
-    cout << "Tracking " << num_particles << " particles on " << gpuLattice->getGPUContext()->name() << " #" << gpuId << endl;
+    //cout << "Tracking " << num_particles << " particles on " << gpuLattice->getGPUContext()->name() << " #" << gpuId << endl;
 
     npy_intp outdims[4] = {6,(npy_intp)(num_particles),num_refs,num_turns};
     PyObject *rout = PyArray_EMPTY(4, outdims, floatType, 1);
@@ -325,7 +325,8 @@ static PyObject *at_gpupass(PyObject *self, PyObject *args, PyObject *kwargs) {
       bool *xlostPtr = (bool *)PyArray_DATA((PyArrayObject *)xlost);
       AT_FLOAT *xlostcoordPtr = (AT_FLOAT *)PyArray_DATA((PyArrayObject *)xlostcoord);
 
-      gpuLattice->run(num_turns,num_particles,drin,drout,num_refs,ref_pts,num_starts,track_starts,xnturnPtr,xnelemPtr,xlostcoordPtr);
+      gpuLattice->run(num_turns,num_particles,drin,drout,num_refs,ref_pts,num_starts,track_starts,
+                      xnturnPtr,xnelemPtr,xlostcoordPtr,true);
 
       // Format result for AT
       for(uint32_t i=0;i<num_particles;i++) {
@@ -349,7 +350,8 @@ static PyObject *at_gpupass(PyObject *self, PyObject *args, PyObject *kwargs) {
 
     } else {
 
-      gpuLattice->run(num_turns,num_particles,drin,drout,num_refs,ref_pts,num_starts,track_starts,nullptr,nullptr,nullptr);
+      gpuLattice->run(num_turns,num_particles,drin,drout,num_refs,ref_pts,num_starts,track_starts,
+                      nullptr,nullptr,nullptr,true);
       return rout;
 
     }
diff --git a/atgpu/main.cpp b/atgpu/main.cpp
@@ -134,7 +134,7 @@ void integratorTest(int gpu,string latticeName) {
       // Choose an arc close to 1mm where unexpected tune drift is observed when step size in too small (EBS lattice)
       AT_FLOAT *rin = createArc(0.001,M_PI/2.0,-M_PI/2.0,nbPart);
 
-      l->run(nbTurn, nbPart, rin, rout, nbRef, refs, 0, nullptr, nullptr, nullptr, nullptr);
+      l->run(nbTurn, nbPart, rin, rout, nbRef, refs, 0, nullptr, nullptr, nullptr, nullptr, false);
 
       double err = 0;
       double max = 0;
@@ -222,7 +222,7 @@ void performanceTest(int gpu,string latticeName) {
       AT_FLOAT *lostAtCoord = new AT_FLOAT[nbPart * 6];
 
       t0 = AbstractGPU::get_ticks();
-      l->run(nbTurn, nbPart, rin, rout, nbRef, refs, nbStride, starts, lostAtTurn, lostAtElem, lostAtCoord);
+      l->run(nbTurn, nbPart, rin, rout, nbRef, refs, nbStride, starts, lostAtTurn, lostAtElem, lostAtCoord,false);
       t1 = AbstractGPU::get_ticks();
 
       //int pIdx = 0;
diff --git a/atmat/atmexall.m b/atmat/atmexall.m
@@ -9,6 +9,7 @@ function atmexall(varargin)
 %   -fail       Throw an exception if compiling any passmethod fails
 %               (By defaults compilation goes on)
 %	-openmp     Build the integrators for OpenMP parallelisation
+%	-cuda CUDA_PATH Build the GPU tracking support using Cuda
 %   -c_only     Do no compile C++ passmethods
 %   -DOMP_PARTICLE_THRESHOLD=n
 %               Set the parallelisation threshold to n particles
@@ -25,6 +26,7 @@ function atmexall(varargin)
 
 pdir=fullfile(fileparts(atroot),'atintegrators');
 [openmp,varargs]=getflag(varargin,'-openmp');
+[cuda,varargs]=getoption(varargs,'-cuda','None');
 [miss_only,varargs]=getflag(varargs,'-missing');
 [c_only,varargs]=getflag(varargs,'-c_only');
 [fail,varargs]=getflag(varargs,'-fail');
@@ -92,6 +94,49 @@ function atmexall(varargin)
 compile([alloptions, {passinclude}, LIBDL, ompoptions], fullfile(cdir,'atpass.c'));
 compile([atoptions, ompoptions],fullfile(cdir,'coptions.c'))
 
+% gpuextensions
+if ~strcmp(cuda,'None')
+    gpudir=fullfile(fileparts(atroot),'atgpu','');
+    if ispc()
+        % TODO
+        error('AT:atmexall', 'GPU windows not supported');
+    elseif ismac()
+        % TODO
+        error('AT:atmexall', 'GPU ismac not supported');
+    else
+        gpuflags = {sprintf('-I"%s"',gpudir),...
+                    sprintf('-I"%s/include"',cuda),...
+                    sprintf('-L"%s/lib64"',cuda),...
+                    sprintf('LDFLAGS=$LDFLAGS -Wl,-rpath,"%s/lib64"',cuda),...
+                    '-DCUDA'};
+    end
+    compile([alloptions, {passinclude}, gpuflags], ...
+        fullfile(cdir,'gpuinfo.cpp'),...
+        fullfile(gpudir,'MatlabInterface.cpp'), ...
+        fullfile(gpudir,'AbstractInterface.cpp'), ...
+        fullfile(gpudir,'CudaGPU.cpp'), ...
+        fullfile(gpudir,'AbstractGPU.cpp'), ...
+        '-lcuda','-lnvrtc');
+    compile([alloptions, {passinclude}, gpuflags], ...
+        fullfile(cdir,'gpupass.cpp'),...
+        fullfile(gpudir,'AbstractGPU.cpp'), ...
+        fullfile(gpudir,'CudaGPU.cpp'), ...
+        fullfile(gpudir,'AbstractInterface.cpp'), ...
+        fullfile(gpudir,'MatlabInterface.cpp'), ...
+        fullfile(gpudir,'Lattice.cpp'), ...
+        fullfile(gpudir,'PassMethodFactory.cpp'), ...
+        fullfile(gpudir,'SymplecticIntegrator.cpp'), ...
+        fullfile(gpudir,'IdentityPass.cpp'), ...
+        fullfile(gpudir,'DriftPass.cpp'), ...
+        fullfile(gpudir,'StrMPoleSymplectic4Pass.cpp'), ...
+        fullfile(gpudir,'BndMPoleSymplectic4Pass.cpp'), ...
+        fullfile(gpudir,'StrMPoleSymplectic4RadPass.cpp'), ...
+        fullfile(gpudir,'BndMPoleSymplectic4RadPass.cpp'), ...
+        fullfile(gpudir,'CavityPass.cpp'), ...
+        fullfile(gpudir,'RFCavityPass.cpp'), ...
+        '-lcuda','-lnvrtc');
+end
+
 [warnmess,warnid]=lastwarn; %#ok<ASGLU>
 if strcmp(warnid,'MATLAB:mex:GccVersion_link')
     warning('Disabling the compiler warning');
diff --git a/atmat/attrack/gpuinfo.cpp b/atmat/attrack/gpuinfo.cpp
@@ -0,0 +1,36 @@
+#include <mex.h>
+#include <MatlabInterface.h>
+#include <AbstractGPU.h>
+
+using namespace std;
+
+// Return GPU Info
+const char * gpuInfoNames[] = {"Name","Version","CoreNumber","Platform"};
+
+void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
+
+  if(nlhs != 1 || nrhs !=0)
+    mexErrMsgIdAndTxt("AT:WrongParameter","gpuinfo() must have only one output argument");
+
+  try {
+    vector<GPU_INFO> gpuInfos;
+    gpuInfos = AbstractGPU::getInstance()->getDeviceList();
+    const mwSize dims[] = {(mwSize)gpuInfos.size()};
+    plhs[0] = mxCreateCellArray(1,dims);
+    for(int i=0;i<gpuInfos.size();i++) {
+      mxArray *s = mxCreateStructMatrix(1, 1, 4, gpuInfoNames);
+      mxArray *gpuName = mxCreateString(gpuInfos[i].name.c_str());
+      mxArray *gpuVersion = mxCreateString(gpuInfos[i].version.c_str());
+      mxArray *gpuCore = mxCreateDoubleScalar(gpuInfos[i].mpNumber);
+      mxArray *gpuPlatform = mxCreateString(gpuInfos[i].platform.c_str());
+      mxSetField(s, 0, gpuInfoNames[0], gpuName);
+      mxSetField(s, 0, gpuInfoNames[1], gpuVersion);
+      mxSetField(s, 0, gpuInfoNames[2], gpuCore);
+      mxSetField(s, 0, gpuInfoNames[3], gpuPlatform);
+      mxSetCell(plhs[0],i,s);
+    }
+  } catch (string& errorStr) {
+    mexErrMsgIdAndTxt("AT:Error",errorStr.c_str());
+  }
+
+}
diff --git a/atmat/attrack/gpuinfo.m b/atmat/attrack/gpuinfo.m
@@ -0,0 +1,9 @@
+function varargout=at_gpuinfo() %#ok<STOUT>
+% INFO = at_gpuinfo()
+% INFO          1xn structure with the following fields:
+%               Name: GPU name
+%               Version: CUDA compute capability (? for OpenCL)
+%               CoreNumber: Multi processor number
+%               Platform: Platform name
+error('at:missingMex','missing MEX file.');
+end
diff --git a/atmat/attrack/gpupass.cpp b/atmat/attrack/gpupass.cpp
diff --git a/atmat/attrack/gpupass.m b/atmat/attrack/gpupass.m
diff --git a/atmat/attrack/ringpass.m b/atmat/attrack/ringpass.m