diff options
author | V3n3RiX <venerix@redcorelinux.org> | 2020-04-12 03:41:30 +0100 |
---|---|---|
committer | V3n3RiX <venerix@redcorelinux.org> | 2020-04-12 03:41:30 +0100 |
commit | 623ee73d661e5ed8475cb264511f683407d87365 (patch) | |
tree | 993eb27c93ec7a2d2d19550300d888fc1fed9e69 /sci-chemistry/vmd | |
parent | ceeeb463cc1eef97fd62eaee8bf2196ba04bc384 (diff) |
gentoo Easter resync : 12.04.2020
Diffstat (limited to 'sci-chemistry/vmd')
-rw-r--r-- | sci-chemistry/vmd/Manifest | 2 | ||||
-rw-r--r-- | sci-chemistry/vmd/files/vmd-1.9.3-cuda.patch | 426 | ||||
-rw-r--r-- | sci-chemistry/vmd/vmd-1.9.3-r5.ebuild | 272 |
3 files changed, 700 insertions, 0 deletions
diff --git a/sci-chemistry/vmd/Manifest b/sci-chemistry/vmd/Manifest index 92abd3614dc7..1cd5411f26dc 100644 --- a/sci-chemistry/vmd/Manifest +++ b/sci-chemistry/vmd/Manifest @@ -1,6 +1,7 @@ AUX vmd-1.9.1-cuda-device_ptr.patch 894 BLAKE2B 96aeab7c2b9d29e5cd3558e69d1365d6b7dc2d78284bb1185d63a57300a83a4c7c0b8f1c1718027ff33333d34bba116d6898fbee74ab23dfd1a77c560160d659 SHA512 9a561773562873dad077f0f2903299952cb935c69ae4bd1463a5f79d6f27ae3845ac5a24ee295ff3569d9c65ae00a9fe3974f4ead003109246f97d130c2c48bf AUX vmd-1.9.2-format-security.patch 550 BLAKE2B d50d05d8697c345f5888b1344c827be1282c5b2e06cfddac022d562688550a728a91a943f9b31c4247f683dcd062ab8fe8ed360aa97a202f20af0467a3148cd3 SHA512 81a71ee4c4c438d18f3a393ebad5c9a08f1a7bb95fa58b78a8798955a34ae2494f80fa074b973a1b595862080e9103d21e509443742cfe2bc3cf77f10c0e3c9d AUX vmd-1.9.3-configure-libtachyon.patch 613 BLAKE2B 5717e941102c4d759d6985087f74724df21312a61313d397da1bbac1969b76590117c70bc3b7fac6c7a84483df433d03613b245a05db8bb10bf7b8807369f415 SHA512 1eafa38022588e9b9bba3a15394a432f66693e64ba62c27e7aebf759d279cb04326f76bcc6545ef34cbcbd7446aead1a05a18072e99aa7df0940f11173ab3c00 +AUX vmd-1.9.3-cuda.patch 15569 BLAKE2B a26ce893fd2808b7ce1ad36f6d2d690be482d24805f44030434fd3aadaa9f9a834ae8b73d7f1c759161e20041c2372181b90ae17f71eab50c94e6b38e3b133a3 SHA512 ba6f435601a53d5ed26ceab9ab86a9734e6a3b60f9cb61be5c9f59676a5145a4c64a538ddadf446e82608ade5a3d86ec6b8cb7257d9c402e245f845371feb571 AUX vmd-1.9.3-tmpdir.patch 673 BLAKE2B 074456a8a68a36f16a804aeb8caee9abf14505cb76a2661b03ad97119bf4afb13a994c9cf22eeb952a79a980fcf03bd3dee5c93cf63bf82a19831f71bcbd83c2 SHA512 f7f570c729259f9133fb6411047266312c3d5f802d4cffe316cfdc4094f0fd0830c3eaae6408f56ea17dadba283e1a1fdf86fec858a5211b1c00ed927f160ea4 DIST vmd-1.9.2-gentoo-patches.tar.xz 14896 BLAKE2B eb7c7fb982602f37bd8374bf8458a0a2ea2202161248caffd0d4e0f8124d627755304bb778976e1cf5598b6bfbbe5d8c8e6016c9ec562204d6782aaaf179b185 SHA512 0516b9e57b256ce822a64df420ebf3ab7391722757caeaa7e615526242043e9375fee8f4ee86fb4eb8de7a50d5cfc8f8d973ace5fa082e6f9010eb692cd6ee51 DIST vmd-1.9.2.src.tar.gz 34903084 BLAKE2B f7f7048ef7bb72a8b0dcae955c7f90691085158c38a59229d5b929a4b52d3148bdb2f40084cf8c4544f01cb8bcb57de1d98a90bd17568a07c4ebb8f6985cec9b SHA512 b232b743d4ea97dcf76e3005e1c8716d7138602edc078c4846026ab67759c8559af956182cb1acf3c85890db10b6f658ac9c269d432f2595612cf376d6d505ef @@ -9,5 +10,6 @@ DIST vmd-1.9.3.src.tar 129566720 BLAKE2B 848f3280d51765dcd18722dd233e0cdee3b7c0a EBUILD vmd-1.9.2.ebuild 7255 BLAKE2B d59ea537a43555104edd2effc973274abae483d739bf5051efce7c1ad15e131ff0156ce2129cf882c88194f359c69eb163571e23f47025ae26cc6f85ab4999a2 SHA512 bafeb7c0448920feaa77855d29cffd2e3d9df1994c528bb738ac01551d6eb5fea4d3e681f5d4c189366e8dff973c9f8c5633f18648c31026ce749041eb5358ee EBUILD vmd-1.9.3-r1.ebuild 7612 BLAKE2B 62e0208cbe8fac390d942885f1e12713a88dc29eac68aa708dacfa9a2c0c0eff94982795ddc3bce47c459e8153692978744efdf520518705db0892208761dd9d SHA512 4529071928c4b63ebc35d92ff9ef7713a38d49d5579cf4351a979806ae81eb9ce3e02c438a9ce127eba217039341ae2c53e74710aa0151d4e9b0db865458facb EBUILD vmd-1.9.3-r4.ebuild 7793 BLAKE2B 9f023e8efb5a095d2d6b46198a4137298d7e61c697e7f1eeb57ecfa5f1df7102840b9c42f4ceeb8c5052c59031d572a50eda0a2799a11432487b15a21e97eb5c SHA512 22326601bd6908b3495ebef7e9b45f3482dbd73c8b6bd54582d02e4721bcc483ef02ea67d8e582b59777cbfc923e4464a707281a06b6459de64a7f1e301d9268 +EBUILD vmd-1.9.3-r5.ebuild 7822 BLAKE2B 2a3933c1008dcde65b37846ae5cfc993036d387a9a62a4cb207865187a22e8cf41401000171d083e059abc17c43adf773490aafcfbc576480b241fd359f6fb88 SHA512 9abc8facc673387ee0fcc77b27e4f421f2c7cb590b451b5d8ce4165a66be27ad1b69a45d9c939718a4359f1c76591855219bdb5956947aa2408d61f338eb78ca EBUILD vmd-1.9.3.ebuild 7210 BLAKE2B a4c7a26b5c137c67adb5ea4b49acf0b8866a5b5a30840816a7c5a62c12355dfbb3bf17b2dfff39501c9d63e5bf3b479624c9186f7eaef65db455a9f29fe543a3 SHA512 fea1bf7019ae20f796be0c9fb9a5f912a491de16ecabeb6327b366120621c65401f8b4d32a575cf8d231b2c40d54f7c38537f3ce6a217856e1ec44fe49420e38 MISC metadata.xml 778 BLAKE2B 6c4202950f57a4d7a3004e966c761325a0a31ab19fe06a6bd69882bd685e41424953104ff653c03d6cbbcb60bb9aba45fbcec43cd228c4bb57fcb1c63a2bd881 SHA512 b967b71fb9f1b47a9d3b69e6958dfb0314a9f071dade7ed1876e91dfa73188748faf11ef832d240a71745b36a30422bde79de2e9ff8e5ec6b6da4b11b40d19c9 diff --git a/sci-chemistry/vmd/files/vmd-1.9.3-cuda.patch b/sci-chemistry/vmd/files/vmd-1.9.3-cuda.patch new file mode 100644 index 000000000000..258efb777caf --- /dev/null +++ b/sci-chemistry/vmd/files/vmd-1.9.3-cuda.patch @@ -0,0 +1,426 @@ +--- a/src/CUDAMarchingCubes.cu 2018-03-30 18:52:25.467189457 +0300 ++++ b/src/CUDAMarchingCubes.cu 2018-03-30 18:52:02.387136244 +0300 +@@ -10,7 +10,7 @@ + * + * $RCSfile: CUDAMarchingCubes.cu,v $ + * $Author: johns $ $Locker: $ $State: Exp $ +- * $Revision: 1.30 $ $Date: 2016/11/28 03:04:58 $ ++ * $Revision: 1.32 $ $Date: 2018/02/15 05:15:02 $ + * + *************************************************************************** + * DESCRIPTION: +@@ -25,14 +25,17 @@ + // + // Description: This class computes an isosurface for a given density grid + // using a CUDA Marching Cubes (MC) alorithm. +-// The implementation is based on the MC demo from the +-// Nvidia GPU Computing SDK, but has been improved +-// and extended. This implementation achieves higher +-// performance by reducing the number of temporary memory +-// buffers, reduces the number of scan calls by using vector +-// integer types, and allows extraction of per-vertex normals +-// optionally computes per-vertex colors if provided with a +-// volumetric texture map. ++// ++// The implementation is loosely based on the MC demo from ++// the Nvidia GPU Computing SDK, but the design has been ++// improved and extended in several ways. ++// ++// This implementation achieves higher performance ++// by reducing the number of temporary memory ++// buffers, reduces the number of scan calls by using ++// vector integer types, and allows extraction of ++// per-vertex normals and optionally computes ++// per-vertex colors if a volumetric texture map is provided. + // + // Author: Michael Krone <michael.krone@visus.uni-stuttgart.de> + // John Stone <johns@ks.uiuc.edu> +@@ -48,7 +51,7 @@ + #include <thrust/functional.h> + + // +-// Restrict macro to make it easy to do perf tuning tess ++// Restrict macro to make it easy to do perf tuning tests + // + #if 0 + #define RESTRICT __restrict__ +@@ -171,6 +174,11 @@ + texture<float, 3, cudaReadModeElementType> volumeTex; + + // sample volume data set at a point p, p CAN NEVER BE OUT OF BOUNDS ++// XXX The sampleVolume() call underperforms vs. peak memory bandwidth ++// because we don't strictly enforce coalescing requirements in the ++// layout of the input volume presently. If we forced X/Y dims to be ++// warp-multiple it would become possible to use wider fetches and ++// a few other tricks to improve global memory bandwidth + __device__ float sampleVolume(const float * RESTRICT data, + uint3 p, uint3 gridSize) { + return data[(p.z*gridSize.x*gridSize.y) + (p.y*gridSize.x) + p.x]; +@@ -592,6 +600,30 @@ + cudaBindTextureToArray(volumeTex, d_vol, desc); + } + ++#if CUDART_VERSION >= 9000 ++// ++// XXX CUDA 9.0RC breaks the usability of Thrust scan() prefix sums when ++// used with the built-in uint2 vector integer types. To workaround ++// the problem we have to define our own type and associated conversion ++// routines etc. ++// ++ ++// XXX workaround for uint2 breakage in CUDA 9.0RC ++struct myuint2 : uint2 { ++ __host__ __device__ myuint2() : uint2(make_uint2(0, 0)) {} ++ __host__ __device__ myuint2(int val) : uint2(make_uint2(val, val)) {} ++ __host__ __device__ myuint2(uint2 val) : uint2(make_uint2(val.x, val.y)) {} ++}; ++ ++void ThrustScanWrapperUint2(uint2* output, uint2* input, unsigned int numElements) { ++ const uint2 zero = make_uint2(0, 0); ++ thrust::exclusive_scan(thrust::device_ptr<myuint2>((myuint2*)input), ++ thrust::device_ptr<myuint2>((myuint2*)input + numElements), ++ thrust::device_ptr<myuint2>((myuint2*)output), ++ (myuint2) zero); ++} ++ ++#else + + void ThrustScanWrapperUint2(uint2* output, uint2* input, unsigned int numElements) { + const uint2 zero = make_uint2(0, 0); +@@ -601,6 +633,7 @@ + zero); + } + ++#endif + + void ThrustScanWrapperArea(float* output, float* input, unsigned int numElements) { + thrust::inclusive_scan(thrust::device_ptr<float>(input), +@@ -639,11 +672,9 @@ + } + + +-/////////////////////////////////////////////////////////////////////////////// + // + // class CUDAMarchingCubes + // +-/////////////////////////////////////////////////////////////////////////////// + + CUDAMarchingCubes::CUDAMarchingCubes() { + // initialize values +@@ -713,9 +744,6 @@ + } + + +-//////////////////////////////////////////////////////////////////////////////// +-//! Run the Cuda part of the computation +-//////////////////////////////////////////////////////////////////////////////// + void CUDAMarchingCubes::computeIsosurfaceVerts(float3* vertOut, unsigned int maxverts, dim3 & grid3) { + // check if data is available + if (!this->setdata) + +--- a/src/CUDAMDFF.cu 2016-12-01 10:11:56.000000000 +0300 ++++ b/src/CUDAMDFF.cu 2018-03-30 18:56:44.352937599 +0300 +@@ -11,7 +11,7 @@ + * + * $RCSfile: CUDAMDFF.cu,v $ + * $Author: johns $ $Locker: $ $State: Exp $ +- * $Revision: 1.75 $ $Date: 2015/04/07 20:41:26 $ ++ * $Revision: 1.78 $ $Date: 2018/02/19 07:10:37 $ + * + *************************************************************************** + * DESCRIPTION: +@@ -28,12 +28,16 @@ + #include <stdlib.h> + #include <string.h> + #include <cuda.h> +-#include <float.h> // FLT_MAX etc +- ++#if CUDART_VERSION >= 9000 ++#include <cuda_fp16.h> // need to explicitly include for CUDA 9.0 ++#endif + #if CUDART_VERSION < 4000 + #error The VMD MDFF feature requires CUDA 4.0 or later + #endif + ++#include <float.h> // FLT_MAX etc ++ ++ + #include "Inform.h" + #include "utilities.h" + #include "WKFThreads.h" +@@ -588,6 +592,43 @@ + } + + ++ ++// #define VMDUSESHUFFLE 1 ++#if defined(VMDUSESHUFFLE) && __CUDA_ARCH__ >= 300 && CUDART_VERSION >= 9000 ++// New warp shuffle-based CC sum reduction for Kepler and later GPUs. ++inline __device__ void cc_sumreduction(int tid, int totaltb, ++ float4 &total_cc_sums, ++ float &total_lcc, ++ int &total_lsize, ++ float4 *tb_cc_sums, ++ float *tb_lcc, ++ int *tb_lsize) { ++ total_cc_sums = make_float4(0.0f, 0.0f, 0.0f, 0.0f); ++ total_lcc = 0.0f; ++ total_lsize = 0; ++ ++ // use precisely one warp to do the final reduction ++ if (tid < warpSize) { ++ for (int i=tid; i<totaltb; i+=warpSize) { ++ total_cc_sums += tb_cc_sums[i]; ++ total_lcc += tb_lcc[i]; ++ total_lsize += tb_lsize[i]; ++ } ++ ++ // perform intra-warp parallel reduction... ++ // general loop version of parallel sum-reduction ++ for (int mask=warpSize/2; mask>0; mask>>=1) { ++ total_cc_sums.x += __shfl_xor_sync(0xffffffff, total_cc_sums.x, mask); ++ total_cc_sums.y += __shfl_xor_sync(0xffffffff, total_cc_sums.y, mask); ++ total_cc_sums.z += __shfl_xor_sync(0xffffffff, total_cc_sums.z, mask); ++ total_cc_sums.w += __shfl_xor_sync(0xffffffff, total_cc_sums.w, mask); ++ total_lcc += __shfl_xor_sync(0xffffffff, total_lcc, mask); ++ total_lsize += __shfl_xor_sync(0xffffffff, total_lsize, mask); ++ } ++ } ++} ++#else ++// shared memory based CC sum reduction + inline __device__ void cc_sumreduction(int tid, int totaltb, + float4 &total_cc_sums, + float &total_lcc, +@@ -629,6 +670,7 @@ + total_lcc = tb_lcc[0]; + total_lsize = tb_lsize[0]; + } ++#endif + + + inline __device__ void thread_cc_sum(float ref, float density, +@@ -750,6 +792,92 @@ + } + + ++#if defined(VMDUSESHUFFLE) && __CUDA_ARCH__ >= 300 && CUDART_VERSION >= 9000 ++ // all threads write their local sums to shared memory... ++ __shared__ float2 tb_cc_means_s[TOTALBLOCKSZ]; ++ __shared__ float2 tb_cc_squares_s[TOTALBLOCKSZ]; ++ __shared__ float tb_lcc_s[TOTALBLOCKSZ]; ++ __shared__ int tb_lsize_s[TOTALBLOCKSZ]; ++ ++ tb_cc_means_s[tid] = thread_cc_means; ++ tb_cc_squares_s[tid] = thread_cc_squares; ++ tb_lcc_s[tid] = thread_lcc; ++ tb_lsize_s[tid] = thread_lsize; ++ __syncthreads(); // all threads must hit syncthreads call... ++ ++ // use precisely one warp to do the thread-block-wide reduction ++ if (tid < warpSize) { ++ float2 tmp_cc_means = make_float2(0.0f, 0.0f); ++ float2 tmp_cc_squares = make_float2(0.0f, 0.0f); ++ float tmp_lcc = 0.0f; ++ int tmp_lsize = 0; ++ for (int i=tid; i<TOTALBLOCKSZ; i+=warpSize) { ++ tmp_cc_means += tb_cc_means_s[i]; ++ tmp_cc_squares += tb_cc_squares_s[i]; ++ tmp_lcc += tb_lcc_s[i]; ++ tmp_lsize += tb_lsize_s[i]; ++ } ++ ++ // perform intra-warp parallel reduction... ++ // general loop version of parallel sum-reduction ++ for (int mask=warpSize/2; mask>0; mask>>=1) { ++ tmp_cc_means.x += __shfl_xor_sync(0xffffffff, tmp_cc_means.x, mask); ++ tmp_cc_means.y += __shfl_xor_sync(0xffffffff, tmp_cc_means.y, mask); ++ tmp_cc_squares.x += __shfl_xor_sync(0xffffffff, tmp_cc_squares.x, mask); ++ tmp_cc_squares.y += __shfl_xor_sync(0xffffffff, tmp_cc_squares.y, mask); ++ tmp_lcc += __shfl_xor_sync(0xffffffff, tmp_lcc, mask); ++ tmp_lsize += __shfl_xor_sync(0xffffffff, tmp_lsize, mask); ++ } ++ ++ // write per-thread-block partial sums to global memory, ++ // if a per-thread-block CC output array is provided, write the ++ // local CC for this thread block out, and finally, check if we ++ // are the last thread block to finish, and finalize the overall ++ // CC results for the entire grid of thread blocks. ++ if (tid == 0) { ++ unsigned int bid = blockIdx.z * gridDim.x * gridDim.y + ++ blockIdx.y * gridDim.x + blockIdx.x; ++ ++ tb_cc_sums[bid] = make_float4(tmp_cc_means.x, tmp_cc_means.y, ++ tmp_cc_squares.x, tmp_cc_squares.y); ++ tb_lcc[bid] = tmp_lcc; ++ tb_lsize[bid] = tmp_lsize; ++ ++ if (tb_CC != NULL) { ++ float cc = calc_cc(tb_cc_means_s[0].x, tb_cc_means_s[0].y, ++ tb_cc_squares_s[0].x, tb_cc_squares_s[0].y, ++ tb_lsize_s[0], tb_lcc_s[0]); ++ ++ // write local per-thread-block CC to global memory ++ tb_CC[bid] = cc; ++ } ++ ++ __threadfence(); ++ ++ unsigned int value = atomicInc(&tbcatomic[0], totaltb); ++ isLastBlockDone = (value == (totaltb - 1)); ++ } ++ } ++ __syncthreads(); ++ ++ if (isLastBlockDone) { ++ float4 total_cc_sums; ++ float total_lcc; ++ int total_lsize; ++ cc_sumreduction(tid, totaltb, total_cc_sums, total_lcc, total_lsize, ++ tb_cc_sums, tb_lcc, tb_lsize); ++ ++ if (tid == 0) { ++ tb_cc_sums[totaltb] = total_cc_sums; ++ tb_lcc[totaltb] = total_lcc; ++ tb_lsize[totaltb] = total_lsize; ++ } ++ ++ reset_atomic_counter(&tbcatomic[0]); ++ } ++ ++#else ++ + // all threads write their local sums to shared memory... + __shared__ float2 tb_cc_means_s[TOTALBLOCKSZ]; + __shared__ float2 tb_cc_squares_s[TOTALBLOCKSZ]; +@@ -794,6 +922,7 @@ + } + __syncthreads(); // all threads must hit syncthreads call... + } ++//#endif + + // write per-thread-block partial sums to global memory, + // if a per-thread-block CC output array is provided, write the +@@ -847,6 +976,7 @@ + } + #endif + } ++#endif + } + + + +--- a/src/CUDAQuickSurf.cu 2016-12-01 10:11:56.000000000 +0300 ++++ b/src/CUDAQuickSurf.cu 2018-03-30 19:01:38.777196233 +0300 +@@ -11,7 +11,7 @@ + * + * $RCSfile: CUDAQuickSurf.cu,v $ + * $Author: johns $ $Locker: $ $State: Exp $ +- * $Revision: 1.81 $ $Date: 2016/04/20 04:57:46 $ ++ * $Revision: 1.84 $ $Date: 2018/02/15 04:59:15 $ + * + *************************************************************************** + * DESCRIPTION: +@@ -22,6 +22,9 @@ + #include <stdlib.h> + #include <string.h> + #include <cuda.h> ++#if CUDART_VERSION >= 9000 ++#include <cuda_fp16.h> // need to explicitly include for CUDA 9.0 ++#endif + + #if CUDART_VERSION < 4000 + #error The VMD QuickSurf feature requires CUDA 4.0 or later +@@ -130,14 +133,14 @@ + #define GUNROLL 1 + #endif + +-#if __CUDA_ARCH__ >= 300 + #define MAXTHRDENS ( GBLOCKSZX * GBLOCKSZY * GBLOCKSZZ ) +-#define MINBLOCKDENS 1 ++#if __CUDA_ARCH__ >= 600 ++#define MINBLOCKDENS 16 ++#elif __CUDA_ARCH__ >= 300 ++#define MINBLOCKDENS 16 + #elif __CUDA_ARCH__ >= 200 +-#define MAXTHRDENS ( GBLOCKSZX * GBLOCKSZY * GBLOCKSZZ ) + #define MINBLOCKDENS 1 + #else +-#define MAXTHRDENS ( GBLOCKSZX * GBLOCKSZY * GBLOCKSZZ ) + #define MINBLOCKDENS 1 + #endif + +@@ -150,7 +153,7 @@ + // + template<class DENSITY, class VOLTEX> + __global__ static void +-// __launch_bounds__ ( MAXTHRDENS, MINBLOCKDENS ) ++__launch_bounds__ ( MAXTHRDENS, MINBLOCKDENS ) + gaussdensity_fast_tex_norm(int natoms, + const float4 * RESTRICT sorted_xyzr, + const float4 * RESTRICT sorted_color, +@@ -217,6 +220,8 @@ + for (yab=yabmin; yab<=yabmax; yab++) { + for (xab=xabmin; xab<=xabmax; xab++) { + int abcellidx = zab * acplanesz + yab * acncells.x + xab; ++ // this biggest latency hotspot in the kernel, if we could improve ++ // packing of the grid cell map, we'd likely improve performance + uint2 atomstartend = cellStartEnd[abcellidx]; + if (atomstartend.x != GRID_CELL_EMPTY) { + unsigned int atomid; +@@ -296,7 +301,7 @@ + + + __global__ static void +-// __launch_bounds__ ( MAXTHRDENS, MINBLOCKDENS ) ++__launch_bounds__ ( MAXTHRDENS, MINBLOCKDENS ) + gaussdensity_fast_tex3f(int natoms, + const float4 * RESTRICT sorted_xyzr, + const float4 * RESTRICT sorted_color, +@@ -363,6 +368,8 @@ + for (yab=yabmin; yab<=yabmax; yab++) { + for (xab=xabmin; xab<=xabmax; xab++) { + int abcellidx = zab * acplanesz + yab * acncells.x + xab; ++ // this biggest latency hotspot in the kernel, if we could improve ++ // packing of the grid cell map, we'd likely improve performance + uint2 atomstartend = cellStartEnd[abcellidx]; + if (atomstartend.x != GRID_CELL_EMPTY) { + unsigned int atomid; +@@ -550,7 +557,6 @@ + + // per-GPU handle with various memory buffer pointers, etc. + typedef struct { +- /// max grid sizes and attributes the current allocations will support + int verbose; + long int natoms; + int colorperatom; +@@ -561,18 +567,18 @@ + int gy; + int gz; + +- CUDAMarchingCubes *mc; ///< Marching cubes class used to extract surface ++ CUDAMarchingCubes *mc; + +- float *devdensity; ///< density map stored in GPU memory +- void *devvoltexmap; ///< volumetric texture map +- float4 *xyzr_d; ///< atom coords and radii +- float4 *sorted_xyzr_d; ///< cell-sorted coords and radii +- float4 *color_d; ///< colors +- float4 *sorted_color_d; ///< cell-sorted colors +- +- unsigned int *atomIndex_d; ///< cell index for each atom +- unsigned int *atomHash_d; ///< +- uint2 *cellStartEnd_d; ///< cell start/end indices ++ float *devdensity; ++ void *devvoltexmap; ++ float4 *xyzr_d; ++ float4 *sorted_xyzr_d; ++ float4 *color_d; ++ float4 *sorted_color_d; ++ ++ unsigned int *atomIndex_d; ++ unsigned int *atomHash_d; ++ uint2 *cellStartEnd_d; + + void *safety; + float3 *v3f_d; diff --git a/sci-chemistry/vmd/vmd-1.9.3-r5.ebuild b/sci-chemistry/vmd/vmd-1.9.3-r5.ebuild new file mode 100644 index 000000000000..a4e0e6db49b0 --- /dev/null +++ b/sci-chemistry/vmd/vmd-1.9.3-r5.ebuild @@ -0,0 +1,272 @@ +# Copyright 1999-2020 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=7 +PYTHON_COMPAT=( python2_7 ) + +inherit cuda desktop flag-o-matic prefix python-single-r1 toolchain-funcs xdg + +DESCRIPTION="Visual Molecular Dynamics" +HOMEPAGE="http://www.ks.uiuc.edu/Research/vmd/" +SRC_URI=" + https://dev.gentoo.org/~jlec/distfiles/${P}-gentoo-patches.tar.xz + ${P}.src.tar +" + +SLOT="0" +LICENSE="vmd" +KEYWORDS="~amd64 ~x86 ~amd64-linux ~x86-linux" +IUSE="cuda gromacs msms povray sqlite tachyon xinerama" +REQUIRED_USE="${PYTHON_REQUIRED_USE}" + +RESTRICT="fetch" + +# currently, tk-8.5* with USE=truetype breaks some +# tk apps such as Sequence Viewer or Timeline. +CDEPEND=" + >=dev-lang/tk-8.6.1:0= + dev-lang/perl + dev-libs/expat + $(python_gen_cond_dep ' + || ( + dev-python/numpy-python2[${PYTHON_MULTI_USEDEP}] + dev-python/numpy[${PYTHON_MULTI_USEDEP}] + ) + ') + sci-libs/netcdf:0= + virtual/opengl + >=x11-libs/fltk-1.1.10-r2:1 + x11-libs/libXft + x11-libs/libXi + ${PYTHON_DEPS} + cuda? ( >=dev-util/nvidia-cuda-toolkit-4.2.9-r1:= ) + gromacs? ( >=sci-chemistry/gromacs-5.0.4-r1:0=[tng] ) + sqlite? ( dev-db/sqlite:3= ) + tachyon? ( >=media-gfx/tachyon-0.99_beta6 ) + xinerama? ( x11-libs/libXinerama ) +" +DEPEND="${CDEPEND}" +BDEPEND=" + virtual/pkgconfig + dev-lang/swig +" +RDEPEND="${CDEPEND} + sci-biology/stride + sci-chemistry/chemical-mime-data + sci-chemistry/surf + x11-terms/xterm + msms? ( sci-chemistry/msms-bin ) + povray? ( media-gfx/povray ) +" + +VMD_DOWNLOAD="http://www.ks.uiuc.edu/Development/Download/download.cgi?PackageName=VMD" +# Binary only plugin!! +QA_PREBUILT="usr/lib*/vmd/plugins/LINUX/tcl/intersurf1.1/bin/intersurf.so" +QA_FLAGS_IGNORED_amd64=" usr/lib64/vmd/plugins/LINUX/tcl/volutil1.3/volutil" +QA_FLAGS_IGNORED_x86=" usr/lib/vmd/plugins/LINUX/tcl/volutil1.3/volutil" + +pkg_nofetch() { + elog "Please download ${P}.src.tar from" + elog "${VMD_DOWNLOAD}" + elog "after agreeing to the license and get" + elog "https://dev.gentoo.org/~jlec/distfiles/${P}-gentoo-patches.tar.xz" + elog "Place both into your DISTDIR directory" + elog + elog "Due to an upstream bug (https://bugs.gentoo.org/640440) sources" + elog "file may get downloaded as a compressed tarball or not. In that case" + elog "you will need to ensure you uncompress the file and rename it" + elog "as ${P}.src.tar" +} + +src_prepare() { + xdg_src_prepare + + use cuda && cuda_sanitize + + # Compat with newer CUDA versions (from Arch) + eapply "${FILESDIR}"/${P}-cuda.patch + + cd "${WORKDIR}"/plugins || die + + eapply -p2 "${WORKDIR}"/${P}-gentoo-plugins.patch + + [[ ${SILENT} == yes ]] || sed '/^.SILENT/d' -i $(find -name Makefile) + + sed \ + -e "s:CC = gcc:CC = $(tc-getCC):" \ + -e "s:CXX = g++:CXX = $(tc-getCXX):" \ + -e "s:COPTO =.*\":COPTO = -fPIC -o \":" \ + -e "s:LOPTO = .*\":LOPTO = ${LDFLAGS} -fPIC -o \":" \ + -e "s:CCFLAGS =.*\":CCFLAGS = ${CFLAGS}\":" \ + -e "s:CXXFLAGS =.*\":CXXFLAGS = ${CXXFLAGS}\":" \ + -e "s:SHLD = gcc:SHLD = $(tc-getCC) -shared:" \ + -e "s:SHXXLD = g++:SHXXLD = $(tc-getCXX) -shared:" \ + -e "s:-ltcl8.5:-ltcl:" \ + -i Make-arch || die "Failed to set up plugins Makefile" + + sed \ + -e '/^AR /s:=:?=:g' \ + -e '/^RANLIB /s:=:?=:g' \ + -i ../plugins/*/Makefile || die + + tc-export AR RANLIB + + sed \ + -e "s:\$(CXXFLAGS)::g" \ + -i hesstrans/Makefile || die + + # prepare vmd itself + cd "${S}" || die + + eapply -p2 "${WORKDIR}"/${P}-gentoo-base.patch + eapply "${FILESDIR}"/${P}-configure-libtachyon.patch + eapply "${FILESDIR}"/${P}-tmpdir.patch + + # PREFIX + sed \ + -e "s:/usr/include/:${EPREFIX}/usr/include:g" \ + -i configure || die + + sed \ + -e "s:gentoo-bindir:${ED}/usr/bin:g" \ + -e "s:gentoo-libdir:${ED}/usr/$(get_libdir):g" \ + -e "s:gentoo-opengl-include:${EPREFIX}/usr/include/GL:g" \ + -e "s:gentoo-opengl-libs:${EPREFIX}/usr/$(get_libdir):g" \ + -e "s:gentoo-gcc:$(tc-getCC):g" \ + -e "s:gentoo-g++:$(tc-getCXX):g" \ + -e "s:gentoo-nvcc:${EPREFIX}/opt/cuda/bin/nvcc:g" \ + -e "s:gentoo-cflags:${CFLAGS}:g" \ + -e "s:gentoo-cxxflags:${CXXFLAGS}:g" \ + -e "s:gentoo-nvflags::g" \ + -e "s:gentoo-ldflags:${LDFLAGS}:g" \ + -e "s:gentoo-plugindir:${WORKDIR}/plugins:g" \ + -e "s:gentoo-fltk-include:$(fltk-config --includedir):g" \ + -e "s:gentoo-fltk-libs:$(dirname $(fltk-config --libs)) -Wl,-rpath,$(dirname $(fltk-config --libs)):g" \ + -e "s:gentoo-libtachyon-include:${EPREFIX}/usr/include/tachyon:g" \ + -e "s:gentoo-libtachyon-libs:${EPREFIX}/usr/$(get_libdir):g" \ + -e "s:gentoo-netcdf-include:${EPREFIX}/usr/include:g" \ + -e "s:gentoo-netcdf-libs:${EPREFIX}/usr/$(get_libdir):g" \ + -i configure || die + + if use cuda; then + sed \ + -e "s:gentoo-cuda-lib:${EPREFIX}/opt/cuda/$(get_libdir):g" \ + -e "/NVCCFLAGS/s:=:= ${NVCCFLAGS}:g" \ + -i configure src/Makefile || die + sed \ + -e '/compute_/d' \ + -i configure || die + sed \ + -e 's:-gencode .*code=sm_..::' \ + -i src/Makefile || die + fi + + sed \ + -e "s:LINUXPPC:LINUX:g" \ + -e "s:LINUXALPHA:LINUX:g" \ + -e "s:LINUXAMD64:LINUX:g" \ + -e "s:gentoo-stride:${EPREFIX}/usr/bin/stride:g" \ + -e "s:gentoo-surf:${EPREFIX}/usr/bin/surf:g" \ + -e "s:gentoo-tachyon:${EPREFIX}/usr/bin/tachyon:g" \ + -i "${S}"/bin/vmd.sh || die "failed setting up vmd wrapper script" + + EMAKEOPTS=( + TCLINC="-I${EPREFIX}/usr/include" + TCLLIB="-L${EPREFIX}/usr/$(get_libdir)" + TCLLDFLAGS="-shared" + NETCDFLIB="$($(tc-getPKG_CONFIG) --libs-only-L netcdf)${EPREFIX}/usr/$(get_libdir)/libnetcdf.so" + NETCDFINC="$($(tc-getPKG_CONFIG) --cflags-only-I netcdf)${EPREFIX}/usr/include" + NETCDFLDFLAGS="$($(tc-getPKG_CONFIG) --libs netcdf)" + NETCDFDYNAMIC=1 + EXPATINC="-I${EPREFIX}/usr/include" + EXPATLIB="$($(tc-getPKG_CONFIG) --libs expat)" + EXPATLDFLAGS="-shared" + EXPATDYNAMIC=1 + ) + if use gromacs; then + EMAKEOPTS+=( + TNGLIB="$($(tc-getPKG_CONFIG) --libs libgromacs)" + TNGINC="-I${EPREFIX}/usr/include" + TNGLDFLAGS="-shared" + TNGDYNAMIC=1 + ) + fi + if use sqlite; then + EMAKEOPTS+=( + SQLITELIB="$($(tc-getPKG_CONFIG) --libs sqlite3)" + SQLITEINC="-I${EPREFIX}/usr/include" + SQLITELDFLAGS="-shared" + SQLITEDYNAMIC=1 + ) + fi +} + +src_configure() { + local myconf="OPENGL OPENGLPBUFFER COLVARS FLTK TK TCL PTHREADS PYTHON IMD NETCDF NUMPY NOSILENT XINPUT" + rm -f configure.options && echo $myconf >> configure.options + + use cuda && myconf+=" CUDA" +# use mpi && myconf+=" MPI" + use tachyon && myconf+=" LIBTACHYON" + use xinerama && myconf+=" XINERAMA" + + export \ + PYTHON_INCLUDE_DIR="$(python_get_includedir)" \ + PYTHON_LIBRARY_DIR="$(python_get_library_path)" \ + PYTHON_LIBRARY="$(python_get_LIBS)" \ + NUMPY_INCLUDE_DIR="$(python_get_sitedir)/numpy/core/include" \ + NUMPY_LIBRARY_DIR="$(python_get_sitedir)/numpy/core/include" + + perl ./configure LINUX \ + ${myconf} || die +} + +src_compile() { + # build plugins + cd "${WORKDIR}"/plugins || die + + emake \ + ${EMAKEOPTS[@]} \ + LINUX + + # build vmd + cd "${S}"/src || die + emake +} + +src_install() { + # install plugins + cd "${WORKDIR}"/plugins || die + emake \ + PLUGINDIR="${ED}/usr/$(get_libdir)/${PN}/plugins" \ + distrib + + # install vmd + cd "${S}"/src || die + emake install + + # install docs + cd "${S}" || die + dodoc Announcement README doc/ig.pdf doc/ug.pdf + + # remove some of the things we don't want and need in + # /usr/lib + cd "${ED}"/usr/$(get_libdir)/vmd || die + rm -fr doc README Announcement LICENSE || \ + die "failed to clean up /usr/lib/vmd directory" + + # adjust path in vmd wrapper + sed \ + -e "s:${ED}::" -i "${ED}"/usr/bin/${PN} \ + -e "/^defaultvmddir/s:^.*$:defaultvmddir=\"${EPREFIX}/usr/$(get_libdir)/${PN}\":g" \ + || die "failed to set up vmd wrapper script" + + # install icon and generate desktop entry + insinto /usr/share/pixmaps + doins "${WORKDIR}"/vmd.png + eprefixify "${WORKDIR}"/vmd.desktop + sed -i '/^Path/d' "${WORKDIR}"/vmd.desktop || die + # Open PDB files with VMD + echo "MimeType=chemical/x-pdb;" >> "${WORKDIR}"/vmd.desktop || die + domenu "${WORKDIR}"/vmd.desktop +} |