Select Git revision
nrf52-drv8825.ino
bonds_stress_strain2.cu 20.37 KiB
//
// bonds_stress_strain.cu
// input bonds, apply stress, output strain
// (c) MIT CBA Neil Gershenfeld 6/30/20
//
// includes
//
#include <iostream>
#include <string>
#include <cstring>
#include <chrono>
using namespace std;
//
// variables
//
class vars {
public:
float3 *hp,*hf; // host position, force
float3 *dp,*dv,*df; // device position, velocity, force
float *s,*ds; // strain
float d,dmax,sd; // particle pitch, max, sort
float step; // strain step size
float bond; // bond break multiplier
char load[100]; // load type
float percent; // zlimit
float cxmin,czmin,crmin,cxmax,czmax,crmax; // ycylinder
float lxmin,lzmin,lymin,lxmax,lzmax,lymax; // loadbox
float sxmin,szmin,symin,sxmax,szmax,symax; // supportbox
float stress,strain; // stress, strain accumulators
int lpp; // links per particle
float pxmin,pxmax,pymin,pymax,pzmin,pzmax; // particle limits
float xmin,xmax,ymin,ymax,zmin,zmax; // sort limits
float spring,mass,diss,dt; // integration
int nloop,grid,block; // update loop, GPU threads
int ms = 10; // maxsort
int np,nx,ny,nz; // number of particles, sort
int *dsort,*dnsort; // sort
int *links,*nlinks,*dlinks,*dnlinks; // links
char *type,*dtype; // particle type
// p:particle b:bottom t:top
};
vars v;
//
// assign particles
//
void assign_particles(vars v) {
if (strcmp(v.load,"zlimit") == 0) {
float zbot = v.pzmin+(v.pzmax-v.pzmin)*v.percent/100.0;
float ztop = v.pzmax-(v.pzmax-v.pzmin)*v.percent/100.0;
for (int i = 0; i < v.np; ++i){
if (v.hp[i].z < zbot){
v.type[i] = 'b';
}
else if (v.hp[i].z > ztop){
v.type[i] = 't';
}
else
v.type[i] = 'p';
}
}
else if (strcmp(v.load,"ycylinder") == 0) {
for (int i = 0; i < v.np; ++i){
float rmin = sqrt(pow(v.hp[i].z-v.czmin,2)
+pow(v.hp[i].x-v.cxmin,2));
float rmax = sqrt(pow(v.hp[i].z-v.czmax,2)
+pow(v.hp[i].x-v.cxmax,2));
if (rmin < v.crmin){
v.type[i] = 'b';
}
else if (rmax < v.crmax){
v.type[i] = 't';
}
else
v.type[i] = 'p';
}
}
else if (strcmp(v.load,"loadbox") == 0) {
// float zbot = v.pzmin+(v.pzmax-v.pzmin)*v.percent/100.0;
// float ztop = v.pzmax-(v.pzmax-v.pzmin)*v.percent/100.0;
cout << "v.pzmin:" <<v.pzmin<< endl;
cout << "v.pzmax:" <<v.pzmax<< endl;
for (int i = 0; i < v.np; ++i){
if (v.hp[i].z <= v.szmax &&v.hp[i].z >= v.szmin
&& v.hp[i].y <= v.symax &&v.hp[i].y >= v.symin
&& v.hp[i].x <= v.sxmax &&v.hp[i].x >= v.sxmin){
v.type[i] = 'b';
}
else if (v.hp[i].z <= v.lzmax &&v.hp[i].z >= v.lzmin
&& v.hp[i].y <= v.lymax &&v.hp[i].y >= v.lymin
&& v.hp[i].x <= v.lxmax &&v.hp[i].x >= v.lxmin){
v.type[i] = 't';
}
else
v.type[i] = 'p';
}
}
else {
cerr << "bonds_stress_strain: load type not defined" << endl;
exit(1);
}
}
//
// sort particles
//
__global__ void sort_particles(vars v) {
int index = blockIdx.x*blockDim.x+threadIdx.x;
int nindex = gridDim.x*blockDim.x;
int start = (index/float(nindex))*v.np;
int stop = ((index+1)/float(nindex))*v.np;
for (int i = start; i < stop; ++i) {
int ix = (v.nx-1)*(v.dp[i].x-v.xmin)/(v.xmax-v.xmin);
if ((ix < 0) || (ix > v.nx-1)) continue;
int iy = (v.ny-1)*(v.dp[i].y-v.ymin)/(v.ymax-v.ymin);
if ((iy < 0) || (iy > v.ny-1)) continue;
int iz = (v.nz-1)*(v.dp[i].z-v.zmin)/(v.zmax-v.zmin);
if ((iz < 0) || (iz > v.nz-1)) continue;
int isort = v.nz*v.ny*ix+v.nz*iy+iz;
v.dsort[v.ms*isort+atomicAdd(&v.dnsort[isort],1)] = i;
}
}
//
// test for link
//
__device__ bool linked(int i,int j,vars v) {
for (int l = 0; l < v.dnlinks[i]; ++l) {
if (j == v.dlinks[v.lpp*i+l])
return true;
}
return false;
}
//
// find force
//
__device__ void find_force(int i,int j,vars v) {
float dx = v.dp[i].x-v.dp[j].x;
float dy = v.dp[i].y-v.dp[j].y;
float dz = v.dp[i].z-v.dp[j].z;
if (linked(i,j,v)) {
float d = sqrt(dx*dx+dy*dy+dz*dz);
float nx = dx/d;
float ny = dy/d;
float nz = dz/d;
float f = -v.spring*(d-v.d)/v.d;
float dvx = v.dv[i].x-v.dv[j].x;
float dvy = v.dv[i].y-v.dv[j].y;
float dvz = v.dv[i].z-v.dv[j].z;
f -= v.diss*(dvx*dx+dvy*dy+dvz*dz)/d;
atomicAdd(&(v.df[i].x),f*nx);
atomicAdd(&(v.df[i].y),f*ny);
atomicAdd(&(v.df[i].z),f*nz);
atomicAdd(&(v.df[j].x),-f*nx);
atomicAdd(&(v.df[j].y),-f*ny);
atomicAdd(&(v.df[j].z),-f*nz);
}
else {
float d = sqrt(dx*dx+dy*dy+dz*dz);
if (d < v.d) {
float nx = dx/d;
float ny = dy/d;
float nz = dz/d;
float f = -v.spring*(d-v.d)/v.d;
float dvx = v.dv[i].x-v.dv[j].x;
float dvy = v.dv[i].y-v.dv[j].y;
float dvz = v.dv[i].z-v.dv[j].z;
f -= v.diss*(dvx*dx+dvy*dy+dvz*dz)/d;
atomicAdd(&(v.df[i].x),f*nx);
atomicAdd(&(v.df[i].y),f*ny);
atomicAdd(&(v.df[i].z),f*nz);
atomicAdd(&(v.df[j].x),-f*nx);
atomicAdd(&(v.df[j].y),-f*ny);
atomicAdd(&(v.df[j].z),-f*nz);
}
}
}
//
// calculate forces
//
__global__ void calculate_forces(vars v) {
int index = blockIdx.x*blockDim.x+threadIdx.x;
int nindex = gridDim.x*blockDim.x;
int start = (index/float(nindex))*v.np;
int stop = ((index+1)/float(nindex))*v.np;
for (int i = start; i < stop; ++i) {
float x = v.dp[i].x;
float y = v.dp[i].y;
float z = v.dp[i].z;
int ix = (v.nx-1)*(v.dp[i].x-v.xmin)/(v.xmax-v.xmin);
int iy = (v.ny-1)*(v.dp[i].y-v.ymin)/(v.ymax-v.ymin);
int iz = (v.nz-1)*(v.dp[i].z-v.zmin)/(v.zmax-v.zmin);
//
// loop over links to find strain
//
v.ds[i] = 0;
int norm = 0;
for (int l = 0; l < v.dnlinks[i]; ++l) {
int j = v.dlinks[v.lpp*i+l];
if (j == -1)
//
// ignore deleted link
//
continue;
float dx = x-v.dp[j].x;
float dy = y-v.dp[j].y;
float dz = z-v.dp[j].z;
float d = sqrtf(dx*dx+dy*dy+dz*dz);
//
// accumulate
//
v.ds[i] += abs(d-v.d)/v.d;
norm += 1;
//
// check for broken links
//
if (d > v.dmax)
//
// yes, mark for deletion
//
v.dlinks[v.lpp*i+l] = -1;
}
v.ds[i] = v.ds[i]/norm;
//
// loop over sort to find forces
//
for (int dx = -1; dx <= 1; ++dx) {
if (((ix+dx) < 0) || ((ix+dx) > (v.nx-1))) continue;
for (int dy = -1; dy <= 1; ++dy) {
if (((iy+dy) < 0) || ((iy+dy) > (v.ny-1))) continue;
for (int dz = -1; dz <= 1; ++dz) {
if (((iz+dz) < 0) || ((iz+dz) > (v.nz-1))) continue;
int isort = v.nz*v.ny*(ix+dx)+v.nz*(iy+dy)+(iz+dz);
for (int s = 0; s < v.dnsort[isort]; ++s) {
int j = v.dsort[v.ms*isort+s];
if (i < j)
find_force(i,j,v);
}
}
}
}
}
}
//
// integrate positions
//
__global__ void integrate_positions(vars v) {
int index = blockIdx.x*blockDim.x+threadIdx.x;
int nindex = gridDim.x*blockDim.x;
int start = (index/float(nindex))*v.np;
int stop = ((index+1)/float(nindex))*v.np;
float ax,ay,az;
for (int i = start; i < stop; ++i) {
if (v.dtype[i] == 'b')
v.dp[i].z -= v.step;
else if (v.dtype[i] == 't')
v.dp[i].z += v.step;
else {
ax = v.df[i].x/v.mass;
ay = v.df[i].y/v.mass;
az = v.df[i].z/v.mass;
v.dv[i].x += v.dt*ax;
v.dv[i].y += v.dt*ay;
v.dv[i].z += v.dt*az;
v.dp[i].x += v.dt*v.dv[i].x;
v.dp[i].y += v.dt*v.dv[i].y;
v.dp[i].z += v.dt*v.dv[i].z;
}
}
}
//
// CUDA error check
//
void cudaCheck(string msg) {
cudaError err;
err = cudaGetLastError();
if (cudaSuccess != err)
cerr << msg << ": " << cudaGetErrorString(err) << endl;
}
//
// main
//
int main(int argc, char** argv) {
if (argc == 1) {
cerr << "command line: bonds source | bonds_stress_strain [arguments] | strain sink" << endl;
cerr << " grid=(grid size)" << endl;
cerr << " block=(block size)" << endl;
cerr << " spring=(spring constant)" << endl;
cerr << " mass=(particle mass)" << endl;
cerr << " dissipation=(damping)" << endl;
cerr << " dt=(time step)" << endl;
cerr << " loop=(steps between display)" << endl;
cerr << " step=(strain step)" << endl;
cerr << " bond=(bond breaking distance, lattice units)" << endl;
cerr << " load=(type)" << endl;
cerr << " zlimit" << endl;
cerr << " percent=(top and bottom z percent)" << endl;
cerr << " ycylinder" << endl;
cerr << " xmin=(bottom x)" << endl;
cerr << " zmin=(bottom z)" << endl;
cerr << " rmin=(bottom r)" << endl;
cerr << " xmax=(top x)" << endl;
cerr << " zmax=(top z)" << endl;
cerr << " rmax=(top r)" << endl;
return 1;
}
for (int i = 1; i < argc; ++i) {
if (strstr(argv[i],"grid=") != nullptr)
v.grid = stoi(argv[i]+5);
else if (strstr(argv[i],"block=") != nullptr)
v.block = stoi(argv[i]+6);
else if (strstr(argv[i],"spring=") != nullptr)
v.spring = stof(argv[i]+7);
else if (strstr(argv[i],"mass=") != nullptr)
v.mass = stof(argv[i]+5);
else if (strstr(argv[i],"dissipation=") != nullptr)
v.diss = stof(argv[i]+12);
else if (strstr(argv[i],"dt=") != nullptr)
v.dt = stof(argv[i]+3);
else if (strstr(argv[i],"loop=") != nullptr)
v.nloop = stoi(argv[i]+5);
else if (strstr(argv[i],"step=") != nullptr)
v.step = stof(argv[i]+5);
else if (strstr(argv[i],"bond=") != nullptr)
v.bond = stof(argv[i]+5);
else if (strstr(argv[i],"load=") != nullptr)
strcpy(v.load,argv[i]+5);
else if (strstr(argv[i],"lxmin=") != nullptr)
v.lxmin = stof(argv[i]+6);
else if (strstr(argv[i],"lymin=") != nullptr)
v.lymin = stof(argv[i]+6);
else if (strstr(argv[i],"lzmin=") != nullptr)
v.lzmin = stof(argv[i]+6);
else if (strstr(argv[i],"lxmax=") != nullptr)
v.lxmax = stof(argv[i]+6);
else if (strstr(argv[i],"lymax=") != nullptr)
v.lymax = stof(argv[i]+6);
else if (strstr(argv[i],"lzmax=") != nullptr)
v.lzmax = stof(argv[i]+6);
else if (strstr(argv[i],"sxmin=") != nullptr)
v.sxmin = stof(argv[i]+6);
else if (strstr(argv[i],"symin=") != nullptr)
v.symin = stof(argv[i]+6);
else if (strstr(argv[i],"szmin=") != nullptr)
v.szmin = stof(argv[i]+6);
else if (strstr(argv[i],"sxmax=") != nullptr)
v.sxmax = stof(argv[i]+6);
else if (strstr(argv[i],"symax=") != nullptr)
v.symax = stof(argv[i]+6);
else if (strstr(argv[i],"szmax=") != nullptr)
v.szmax = stof(argv[i]+6);
else if (strstr(argv[i],"percent=") != nullptr)
v.percent = stof(argv[i]+8);
else if (strstr(argv[i],"xmin=") != nullptr)
v.cxmin = stof(argv[i]+5);
else if (strstr(argv[i],"zmin=") != nullptr)
v.czmin = stof(argv[i]+5);
else if (strstr(argv[i],"rmin=") != nullptr)
v.crmin = stof(argv[i]+5);
else if (strstr(argv[i],"xmax=") != nullptr)
v.cxmax = stof(argv[i]+5);
else if (strstr(argv[i],"zmax=") != nullptr)
v.czmax = stof(argv[i]+5);
else if (strstr(argv[i],"rmax=") != nullptr)
v.crmax = stof(argv[i]+5);
else {
cerr << "bonds_stress_strain argument not recognized" << endl;
return 1;
}
}
//
// read and parse inputs
//
string s;
while (1) {
getline(cin,s);
if (s.compare("xmin:") == 0) {
getline(cin,s);
v.pxmin = stof(s);
}
else if (s.compare("xmax:") == 0) {
getline(cin,s);
v.pxmax = stof(s);
}
else if (s.compare("ymin:") == 0) {
getline(cin,s);
v.pymin = stof(s);
}
else if (s.compare("ymax:") == 0) {
getline(cin,s);
v.pymax = stof(s);
}
else if (s.compare("zmin:") == 0) {
getline(cin,s);
v.pzmin = stof(s);
}
else if (s.compare("zmax:") == 0) {
getline(cin,s);
v.pzmax = stof(s);
}
else if (s.compare("pitch:") == 0) {
getline(cin,s);
v.d = stof(s);
v.dmax = v.bond*v.d; // bond breaking
v.sd = 1.01*v.d; // sort bins bigger to avoid roundoff at edges
float xmid = (v.pxmax+v.pxmin)/2.0;
float ymid = (v.pymax+v.pymin)/2.0;
float zmid = (v.pzmax+v.pzmin)/2.0;
v.xmin = xmid+1.25*(v.pxmin-xmid); // enlarge sort volume
v.xmax = xmid+1.25*(v.pxmax-xmid);
v.ymin = ymid+1.25*(v.pymin-ymid);
v.ymax = ymid+1.25*(v.pymax-ymid);
v.zmin = zmid+1.25*(v.pzmin-zmid);
v.zmax = zmid+1.25*(v.pzmax-zmid);
v.nx = (v.xmax-v.xmin)/v.sd;
v.ny = (v.ymax-v.ymin)/v.sd;
v.nz = (v.zmax-v.zmin)/v.sd;
cudaMalloc(&v.dsort,v.nz*v.ny*v.nx*v.ms*sizeof(int));
cudaMalloc(&v.dnsort,v.nz*v.ny*v.nx*sizeof(int));
cudaMemset(v.dnsort,0,v.nz*v.ny*v.nx*sizeof(int));
cudaCheck("malloc pitch");
}
else if (s.compare("links per particle:") == 0) {
getline(cin,s);
v.lpp = stoi(s);
}
else if (s.compare("number:") == 0) {
getline(cin,s);
v.np = stoi(s); // number of particles
cudaMallocHost(&v.hp,v.np*sizeof(float3)); // host arrays
cudaMallocHost(&v.hf,v.np*sizeof(float3));
cudaMallocHost(&v.s,v.np*sizeof(float));
v.links = new int[v.lpp*v.np];
v.nlinks = new int[v.np];
v.type = new char[v.np];
cudaMalloc(&v.dp,v.np*sizeof(float3)); // device arrays
cudaMalloc(&v.dv,v.np*sizeof(float3));
cudaMalloc(&v.df,v.np*sizeof(float3));
cudaMalloc(&v.ds,v.np*sizeof(float));
cudaMalloc(&v.dlinks,v.lpp*v.np*sizeof(int));
cudaMalloc(&v.dnlinks,v.np*sizeof(int));
cudaMemset(v.dnlinks,0,v.np*sizeof(int));
cudaMalloc(&v.dtype,v.np*sizeof(char));
cudaCheck("malloc number");
}
else if (s.compare("particles:") == 0) {
for (int i = 0; i < v.np ; ++i) {
cin.read((char *)&v.hp[i].x,4);
cin.read((char *)&v.hp[i].y,4);
cin.read((char *)&v.hp[i].z,4);
}
}
else if (s.compare("nlinks:") == 0) {
for (int i = 0; i < v.np ; ++i)
cin.read((char *)&v.nlinks[i],4);
}
else if (s.compare("links:") == 0) {
for (int i = 0; i < v.lpp*v.np ; ++i)
cin.read((char *)&v.links[i],4);
}
else if (s.compare("end:") == 0)
break;
}
cerr << "bonds_stress_strain:" << endl;
cerr << " input " << v.np << " particles" << endl;
//
// assign particle types
//
assign_particles(v);
//
// copy data to GPU
//
cudaMemcpy(v.dp,v.hp,v.np*sizeof(float3),
cudaMemcpyHostToDevice);
cudaMemcpy(v.dnlinks,v.nlinks,v.np*sizeof(int),
cudaMemcpyHostToDevice);
cudaMemcpy(v.dlinks,v.links,v.lpp*v.np*sizeof(int),
cudaMemcpyHostToDevice);
cudaMemcpy(v.dtype,v.type,v.np*sizeof(char),
cudaMemcpyHostToDevice);
cudaCheck("copy to GPU");
//
// start output
//
cout << "type:" << endl;
cout << "bonds" << endl;
cout << "format:" << endl;
cout << "xyz float32" << endl;
cout << "pitch:" << endl;
cout << v.d << endl;
cout << "links per particle:" << endl;
cout << v.lpp << endl;
cout << "number:" << endl;
cout << v.np << endl;
cout << "xmin:" << endl;
cout << v.xmin << endl;
cout << "xmax:" << endl;
cout << v.xmax << endl;
cout << "ymin:" << endl;
cout << v.ymin << endl;
cout << "ymax:" << endl;
cout << v.ymax << endl;
cout << "zmin:" << endl;
cout << v.zmin << endl;
cout << "zmax:" << endl;
cout << v.zmax << endl;
cout << "particles:" << endl;
for (int i = 0; i < v.np; ++i) {
cout.write((char*)&v.hp[i].y,4);
cout.write((char*)&v.hp[i].y,4);
cout.write((char*)&v.hp[i].y,4);
}
cout << endl << "type:" << endl;
for (int i = 0; i < v.np; ++i)
cout.write(&v.type[i],1);
cout << endl << "continue:" << endl;
//
// start main loop
//
while (1) {
//
// update loop
//
cudaDeviceSynchronize();
auto t0 = chrono::high_resolution_clock::now();
cout << "outer loop" << endl;
for (int i = 0; i < v.nloop; ++i) {
cout << "inner loop" << endl;
//
// sort
//
cudaMemset(v.dnsort,0,v.nz*v.ny*v.nx*sizeof(int));
sort_particles<<<v.grid,v.block>>>(v);
cudaCheck("sort");
//
// forces
//
cudaMemset(v.df,0,v.np*sizeof(float3));
calculate_forces<<<v.grid,v.block>>>(v);
cudaCheck("forces");
//
// integrate
//
integrate_positions<<<v.grid,v.block>>>(v);
cudaCheck("integrate");
}
cudaDeviceSynchronize();
auto t1 = chrono::high_resolution_clock::now();
float dt = chrono::duration_cast<std::chrono::microseconds>(t1-t0).count();
fprintf(stderr,"\r mps: %.0f",(v.nloop*v.np)/dt);
//
// measure timing
//
t0 = chrono::high_resolution_clock::now();
cudaMemset(v.dnsort,0,v.nz*v.ny*v.nx*sizeof(int));
sort_particles<<<v.grid,v.block>>>(v);
cudaCheck("time sort");
cudaDeviceSynchronize();
t1 = chrono::high_resolution_clock::now();
dt = chrono::duration_cast<std::chrono::microseconds>(t1-t0).count();
cerr << " srt: " << dt;
//
t0 = chrono::high_resolution_clock::now();
cudaMemset(v.df,0,v.np*sizeof(float3));
calculate_forces<<<v.grid,v.block>>>(v);
cudaCheck("time forces");
cudaDeviceSynchronize();
t1 = chrono::high_resolution_clock::now();
dt = chrono::duration_cast<std::chrono::microseconds>(t1-t0).count();
cerr << " frc: " << dt;
//
t0 = chrono::high_resolution_clock::now();
integrate_positions<<<v.grid,v.block>>>(v);
cudaCheck("time integrate");
cudaDeviceSynchronize();
t1 = chrono::high_resolution_clock::now();
dt = chrono::duration_cast<std::chrono::microseconds>(t1-t0).count();
cerr << " int: " << dt;
//
// copy data to CPU
//
t0 = chrono::high_resolution_clock::now();
cudaMemcpy(v.hp,v.dp,v.np*sizeof(float3),cudaMemcpyDeviceToHost);
cudaMemcpy(v.hf,v.df,v.np*sizeof(float3),cudaMemcpyDeviceToHost);
cudaMemcpy(v.s,v.ds,v.np*sizeof(int),cudaMemcpyDeviceToHost);
cudaDeviceSynchronize();
cudaCheck("copy to CPU");
t1 = chrono::high_resolution_clock::now();
dt = chrono::duration_cast<std::chrono::microseconds>(t1-t0).count();
cerr << " cpy: " << dt;
//
// stress
//
v.stress = 0;
for (int i = 0; i < v.np; ++i) {
if (v.type[i] == 'b')
v.stress -= v.hf[i].z;
else if (v.type[i] == 't')
v.stress += v.hf[i].z;
}
//cerr << endl << v.stress << endl;
//
// output
//
t0 = chrono::high_resolution_clock::now();
cout << "particles:" << endl;
for (int i = 0; i < v.np; ++i) {
cout.write((char*)&v.hp[i].x,4);
cout.write((char*)&v.hp[i].y,4);
cout.write((char*)&v.hp[i].z,4);
}
cout << endl << "strain:" << endl;
for (int i = 0; i < v.np; ++i)
cout.write((char*)&v.s[i],4);
cout << endl << "continue:" << endl;
t1 = chrono::high_resolution_clock::now();
dt = chrono::duration_cast<std::chrono::microseconds>(t1-t0).count();
fprintf(stderr," out: %.3g ",dt);
//
cerr << "(us)" << flush;
}
}