123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430 |
- /*
- Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
- Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
- This software is provided 'as-is', without any express or implied warranty.
- In no event will the authors be held liable for any damages arising from the use of this software.
- Permission is granted to anyone to use this software for any purpose,
- including commercial applications, and to alter it and redistribute it freely,
- subject to the following restrictions:
- 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
- 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
- 3. This notice may not be removed or altered from any source distribution.
- */
- //----------------------------------------------------------------------------------------
- //----------------------------------------------------------------------------------------
- //----------------------------------------------------------------------------------------
- //----------------------------------------------------------------------------------------
- //----------------------------------------------------------------------------------------
- // K E R N E L F U N C T I O N S
- //----------------------------------------------------------------------------------------
- //----------------------------------------------------------------------------------------
- //----------------------------------------------------------------------------------------
- //----------------------------------------------------------------------------------------
- //----------------------------------------------------------------------------------------
- //----------------------------------------------------------------------------------------
- // calculate position in uniform grid
- BT_GPU___device__ int3 bt3DGrid_calcGridPos(float4 p)
- {
- int3 gridPos;
- gridPos.x = (int)floor((p.x - BT_GPU_params.m_worldOriginX) / BT_GPU_params.m_cellSizeX);
- gridPos.y = (int)floor((p.y - BT_GPU_params.m_worldOriginY) / BT_GPU_params.m_cellSizeY);
- gridPos.z = (int)floor((p.z - BT_GPU_params.m_worldOriginZ) / BT_GPU_params.m_cellSizeZ);
- return gridPos;
- } // bt3DGrid_calcGridPos()
- //----------------------------------------------------------------------------------------
- // calculate address in grid from position (clamping to edges)
- BT_GPU___device__ uint bt3DGrid_calcGridHash(int3 gridPos)
- {
- gridPos.x = BT_GPU_max(0, BT_GPU_min(gridPos.x, (int)BT_GPU_params.m_gridSizeX - 1));
- gridPos.y = BT_GPU_max(0, BT_GPU_min(gridPos.y, (int)BT_GPU_params.m_gridSizeY - 1));
- gridPos.z = BT_GPU_max(0, BT_GPU_min(gridPos.z, (int)BT_GPU_params.m_gridSizeZ - 1));
- return BT_GPU___mul24(BT_GPU___mul24(gridPos.z, BT_GPU_params.m_gridSizeY), BT_GPU_params.m_gridSizeX) + BT_GPU___mul24(gridPos.y, BT_GPU_params.m_gridSizeX) + gridPos.x;
- } // bt3DGrid_calcGridHash()
- //----------------------------------------------------------------------------------------
- // calculate grid hash value for each body using its AABB
- BT_GPU___global__ void calcHashAABBD(bt3DGrid3F1U* pAABB, uint2* pHash, uint numBodies)
- {
- int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
- if(index >= (int)numBodies)
- {
- return;
- }
- bt3DGrid3F1U bbMin = pAABB[index*2];
- bt3DGrid3F1U bbMax = pAABB[index*2 + 1];
- float4 pos;
- pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
- pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
- pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
- // get address in grid
- int3 gridPos = bt3DGrid_calcGridPos(pos);
- uint gridHash = bt3DGrid_calcGridHash(gridPos);
- // store grid hash and body index
- pHash[index] = BT_GPU_make_uint2(gridHash, index);
- } // calcHashAABBD()
- //----------------------------------------------------------------------------------------
- BT_GPU___global__ void findCellStartD(uint2* pHash, uint* cellStart, uint numBodies)
- {
- int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
- if(index >= (int)numBodies)
- {
- return;
- }
- uint2 sortedData = pHash[index];
- // Load hash data into shared memory so that we can look
- // at neighboring body's hash value without loading
- // two hash values per thread
- BT_GPU___shared__ uint sharedHash[257];
- sharedHash[BT_GPU_threadIdx.x+1] = sortedData.x;
- if((index > 0) && (BT_GPU_threadIdx.x == 0))
- {
- // first thread in block must load neighbor body hash
- volatile uint2 prevData = pHash[index-1];
- sharedHash[0] = prevData.x;
- }
- BT_GPU___syncthreads();
- if((index == 0) || (sortedData.x != sharedHash[BT_GPU_threadIdx.x]))
- {
- cellStart[sortedData.x] = index;
- }
- } // findCellStartD()
- //----------------------------------------------------------------------------------------
- BT_GPU___device__ uint cudaTestAABBOverlap(bt3DGrid3F1U min0, bt3DGrid3F1U max0, bt3DGrid3F1U min1, bt3DGrid3F1U max1)
- {
- return (min0.fx <= max1.fx)&& (min1.fx <= max0.fx) &&
- (min0.fy <= max1.fy)&& (min1.fy <= max0.fy) &&
- (min0.fz <= max1.fz)&& (min1.fz <= max0.fz);
- } // cudaTestAABBOverlap()
-
- //----------------------------------------------------------------------------------------
- BT_GPU___device__ void findPairsInCell( int3 gridPos,
- uint index,
- uint2* pHash,
- uint* pCellStart,
- bt3DGrid3F1U* pAABB,
- uint* pPairBuff,
- uint2* pPairBuffStartCurr,
- uint numBodies)
- {
- if ( (gridPos.x < 0) || (gridPos.x > (int)BT_GPU_params.m_gridSizeX - 1)
- || (gridPos.y < 0) || (gridPos.y > (int)BT_GPU_params.m_gridSizeY - 1)
- || (gridPos.z < 0) || (gridPos.z > (int)BT_GPU_params.m_gridSizeZ - 1))
- {
- return;
- }
- uint gridHash = bt3DGrid_calcGridHash(gridPos);
- // get start of bucket for this cell
- uint bucketStart = pCellStart[gridHash];
- if (bucketStart == 0xffffffff)
- {
- return; // cell empty
- }
- // iterate over bodies in this cell
- uint2 sortedData = pHash[index];
- uint unsorted_indx = sortedData.y;
- bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2);
- bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
- uint handleIndex = min0.uw;
- uint2 start_curr = pPairBuffStartCurr[handleIndex];
- uint start = start_curr.x;
- uint curr = start_curr.y;
- uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
- uint curr_max = start_curr_next.x - start - 1;
- uint bucketEnd = bucketStart + BT_GPU_params.m_maxBodiesPerCell;
- bucketEnd = (bucketEnd > numBodies) ? numBodies : bucketEnd;
- for(uint index2 = bucketStart; index2 < bucketEnd; index2++)
- {
- uint2 cellData = pHash[index2];
- if (cellData.x != gridHash)
- {
- break; // no longer in same bucket
- }
- uint unsorted_indx2 = cellData.y;
- if (unsorted_indx2 < unsorted_indx) // check not colliding with self
- {
- bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2);
- bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2 + 1);
- if(cudaTestAABBOverlap(min0, max0, min1, max1))
- {
- uint handleIndex2 = min1.uw;
- uint k;
- for(k = 0; k < curr; k++)
- {
- uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
- if(old_pair == handleIndex2)
- {
- pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
- break;
- }
- }
- if(k == curr)
- {
- if(curr >= curr_max)
- { // not a good solution, but let's avoid crash
- break;
- }
- pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
- curr++;
- }
- }
- }
- }
- pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
- return;
- } // findPairsInCell()
- //----------------------------------------------------------------------------------------
- BT_GPU___global__ void findOverlappingPairsD( bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart,
- uint* pPairBuff, uint2* pPairBuffStartCurr, uint numBodies)
- {
- int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
- if(index >= (int)numBodies)
- {
- return;
- }
- uint2 sortedData = pHash[index];
- uint unsorted_indx = sortedData.y;
- bt3DGrid3F1U bbMin = BT_GPU_FETCH(pAABB, unsorted_indx*2);
- bt3DGrid3F1U bbMax = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
- float4 pos;
- pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
- pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
- pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
- // get address in grid
- int3 gridPos = bt3DGrid_calcGridPos(pos);
- // examine only neighbouring cells
- for(int z=-1; z<=1; z++) {
- for(int y=-1; y<=1; y++) {
- for(int x=-1; x<=1; x++) {
- findPairsInCell(gridPos + BT_GPU_make_int3(x, y, z), index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numBodies);
- }
- }
- }
- } // findOverlappingPairsD()
- //----------------------------------------------------------------------------------------
- BT_GPU___global__ void findPairsLargeD( bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart, uint* pPairBuff,
- uint2* pPairBuffStartCurr, uint numBodies, uint numLarge)
- {
- int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
- if(index >= (int)numBodies)
- {
- return;
- }
- uint2 sortedData = pHash[index];
- uint unsorted_indx = sortedData.y;
- bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2);
- bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
- uint handleIndex = min0.uw;
- uint2 start_curr = pPairBuffStartCurr[handleIndex];
- uint start = start_curr.x;
- uint curr = start_curr.y;
- uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
- uint curr_max = start_curr_next.x - start - 1;
- for(uint i = 0; i < numLarge; i++)
- {
- uint indx2 = numBodies + i;
- bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, indx2*2);
- bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, indx2*2 + 1);
- if(cudaTestAABBOverlap(min0, max0, min1, max1))
- {
- uint k;
- uint handleIndex2 = min1.uw;
- for(k = 0; k < curr; k++)
- {
- uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
- if(old_pair == handleIndex2)
- {
- pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
- break;
- }
- }
- if(k == curr)
- {
- pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
- if(curr >= curr_max)
- { // not a good solution, but let's avoid crash
- break;
- }
- curr++;
- }
- }
- }
- pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
- return;
- } // findPairsLargeD()
- //----------------------------------------------------------------------------------------
- BT_GPU___global__ void computePairCacheChangesD(uint* pPairBuff, uint2* pPairBuffStartCurr,
- uint* pPairScan, bt3DGrid3F1U* pAABB, uint numBodies)
- {
- int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
- if(index >= (int)numBodies)
- {
- return;
- }
- bt3DGrid3F1U bbMin = pAABB[index * 2];
- uint handleIndex = bbMin.uw;
- uint2 start_curr = pPairBuffStartCurr[handleIndex];
- uint start = start_curr.x;
- uint curr = start_curr.y;
- uint *pInp = pPairBuff + start;
- uint num_changes = 0;
- for(uint k = 0; k < curr; k++, pInp++)
- {
- if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
- {
- num_changes++;
- }
- }
- pPairScan[index+1] = num_changes;
- } // computePairCacheChangesD()
- //----------------------------------------------------------------------------------------
- BT_GPU___global__ void squeezeOverlappingPairBuffD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan,
- uint* pPairOut, bt3DGrid3F1U* pAABB, uint numBodies)
- {
- int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
- if(index >= (int)numBodies)
- {
- return;
- }
- bt3DGrid3F1U bbMin = pAABB[index * 2];
- uint handleIndex = bbMin.uw;
- uint2 start_curr = pPairBuffStartCurr[handleIndex];
- uint start = start_curr.x;
- uint curr = start_curr.y;
- uint* pInp = pPairBuff + start;
- uint* pOut = pPairOut + pPairScan[index];
- uint* pOut2 = pInp;
- uint num = 0;
- for(uint k = 0; k < curr; k++, pInp++)
- {
- if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
- {
- *pOut = *pInp;
- pOut++;
- }
- if((*pInp) & BT_3DGRID_PAIR_ANY_FLG)
- {
- *pOut2 = (*pInp) & (~BT_3DGRID_PAIR_ANY_FLG);
- pOut2++;
- num++;
- }
- }
- pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, num);
- } // squeezeOverlappingPairBuffD()
- //----------------------------------------------------------------------------------------
- //----------------------------------------------------------------------------------------
- //----------------------------------------------------------------------------------------
- //----------------------------------------------------------------------------------------
- // E N D O F K E R N E L F U N C T I O N S
- //----------------------------------------------------------------------------------------
- //----------------------------------------------------------------------------------------
- //----------------------------------------------------------------------------------------
- //----------------------------------------------------------------------------------------
- extern "C"
- {
- //----------------------------------------------------------------------------------------
- void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash, unsigned int numBodies)
- {
- int numThreads, numBlocks;
- BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
- // execute the kernel
- BT_GPU_EXECKERNEL(numBlocks, numThreads, calcHashAABBD, (pAABB, (uint2*)hash, numBodies));
- // check if kernel invocation generated an error
- BT_GPU_CHECK_ERROR("calcHashAABBD kernel execution failed");
- } // calcHashAABB()
- //----------------------------------------------------------------------------------------
- void BT_GPU_PREF(findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells))
- {
- int numThreads, numBlocks;
- BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
- BT_GPU_SAFE_CALL(BT_GPU_Memset(cellStart, 0xffffffff, numCells*sizeof(uint)));
- BT_GPU_EXECKERNEL(numBlocks, numThreads, findCellStartD, ((uint2*)hash, (uint*)cellStart, numBodies));
- BT_GPU_CHECK_ERROR("Kernel execution failed: findCellStartD");
- } // findCellStart()
- //----------------------------------------------------------------------------------------
- void BT_GPU_PREF(findOverlappingPairs(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies))
- {
- #if B_CUDA_USE_TEX
- BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, numBodies * 2 * sizeof(bt3DGrid3F1U)));
- #endif
- int numThreads, numBlocks;
- BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
- BT_GPU_EXECKERNEL(numBlocks, numThreads, findOverlappingPairsD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies));
- BT_GPU_CHECK_ERROR("Kernel execution failed: bt_CudaFindOverlappingPairsD");
- #if B_CUDA_USE_TEX
- BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
- #endif
- } // findOverlappingPairs()
- //----------------------------------------------------------------------------------------
- void BT_GPU_PREF(findPairsLarge(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge))
- {
- #if B_CUDA_USE_TEX
- BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, (numBodies+numLarge) * 2 * sizeof(bt3DGrid3F1U)));
- #endif
- int numThreads, numBlocks;
- BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
- BT_GPU_EXECKERNEL(numBlocks, numThreads, findPairsLargeD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies,numLarge));
- BT_GPU_CHECK_ERROR("Kernel execution failed: btCuda_findPairsLargeD");
- #if B_CUDA_USE_TEX
- BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
- #endif
- } // findPairsLarge()
- //----------------------------------------------------------------------------------------
- void BT_GPU_PREF(computePairCacheChanges(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies))
- {
- int numThreads, numBlocks;
- BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
- BT_GPU_EXECKERNEL(numBlocks, numThreads, computePairCacheChangesD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,pAABB,numBodies));
- BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD");
- } // computePairCacheChanges()
- //----------------------------------------------------------------------------------------
- void BT_GPU_PREF(squeezeOverlappingPairBuff(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies))
- {
- int numThreads, numBlocks;
- BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
- BT_GPU_EXECKERNEL(numBlocks, numThreads, squeezeOverlappingPairBuffD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,(uint*)pPairOut,pAABB,numBodies));
- BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD");
- } // btCuda_squeezeOverlappingPairBuff()
- //------------------------------------------------------------------------------------------------
- } // extern "C"
- //------------------------------------------------------------------------------------------------
- //------------------------------------------------------------------------------------------------
- //------------------------------------------------------------------------------------------------
|