#include <mex.h>
#include <string.h>

// im2col(X,[s1 s2],overlap);

// implements image-patch extraction operator E(.)
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//
// This function is part of the DOLPHIn package (version 1.10)
// last modified: 02/06/2016, A. M. Tillmann
//
// You may freely use and modify the code for academic purposes, though we
// would appreciate if you could let us know (particularly should you find 
// a bug); if you use DOLPHIn for your own work, please cite the paper
//
//   "DOLPHIn -- Dictionary Learning for Phase Retrieval",
//   Andreas M. Tillmann, Yonina C. Eldar and Julien Mairal, 2016.
//   http://arxiv.org/abs/1602.02263
//
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

template <typename T>
inline mxArray* createMatrix(int m, int n);

/// Create a m x n double matrix
template <> inline mxArray* createMatrix<double>(int m, int n) {
   return mxCreateNumericMatrix(static_cast<mwSize>(m),
         static_cast<mwSize>(n),mxDOUBLE_CLASS,mxREAL);
};

/// Create a m x n float matrix
template <> inline mxArray* createMatrix<float>(int m, int n) {
   return mxCreateNumericMatrix(static_cast<mwSize>(m),
         static_cast<mwSize>(n),mxSINGLE_CLASS,mxREAL);
};


template <typename T>
inline void callFunction(mxArray* plhs[], const mxArray*prhs[]) {
   T* X = reinterpret_cast<T*>(mxGetPr(prhs[0]));
   const mwSize* dimsX=mxGetDimensions(prhs[0]);
   int h=static_cast<int>(dimsX[0]);
   int w=static_cast<int>(dimsX[1]);

   T* S = reinterpret_cast<T*>(mxGetPr(prhs[1]));
   int s1 = static_cast<int>(S[0]);
   int s2 = static_cast<int>(S[1]);

   const bool overlap=static_cast<bool>(mxGetScalar(prhs[2]));
   const int n = overlap ? (h-s1+1)*(w-s2+1) : (h/s1)*(w/s2);
   const int s = s1*s2;

   plhs[0]=createMatrix<T>(s1*s2,n);
   T* Y=reinterpret_cast<T*>(mxGetPr(plhs[0]));

   if (overlap) {
      const int n1=h-s1+1; 
      const int n2=w-s2+1;
      int num_patch=0;
      for (int jj=0; jj<n2; ++jj) {
         for (int ii=0; ii<n1; ++ii) {
            for (int kk=0; kk<s2; ++kk) {
               memcpy(Y+num_patch*s+kk*s1,X+(jj+kk)*h+ii,s1*sizeof(T));
            }
            ++num_patch;
         }
      }
   } else {
      const int n1=h/s1; 
      const int n2=w/s2;
      int num_patch=0;
      for (int jj=0; jj<n2; ++jj) {
         for (int ii=0; ii<n1; ++ii) {
            for (int kk=0; kk<s2; ++kk) {
               memcpy(Y+num_patch*s+kk*s1,X+(jj*s2+kk)*h+ii*s1,s1*sizeof(T));
            }
            ++num_patch;
         }
      }
   }
}

void mexFunction(int nlhs, mxArray *plhs[],int nrhs, const mxArray *prhs[]) {
   if (nrhs != 3)
      mexErrMsgTxt("Bad number of inputs arguments");

   if (nlhs != 1)
      mexErrMsgTxt("Bad number of output arguments");

   if (mxGetClassID(prhs[0]) == mxDOUBLE_CLASS) {
      callFunction<double>(plhs,prhs);
   } else {
      callFunction<float>(plhs,prhs);
   }
}

