Actual source code: petscdevice_hip.h
1: #pragma once
3: #include <petscdevice.h>
4: #include <petscpkg_version.h>
6: /* MANSEC = Sys */
7: /* SUBMANSEC = Device */
9: #if defined(__HCC__) || (defined(__clang__) && defined(__HIP__))
10: #define PETSC_USING_HCC 1
11: #endif
13: #if PetscDefined(HAVE_HIP)
14: #include <hip/hip_runtime.h>
16: #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0)
18: // cupmScalarPtrCast() returns hip{Float,Double}Complex while hipBLAS uses hipBlas{Float,Double}Complex, causing many VecCUPM errors like
19: // error: no matching function for call to 'cupmBlasXdot'.
20: // Before rocm-6.0, one can define ROCM_MATHLIBS_API_USE_HIP_COMPLEX to force rocm to 'typedef hipDoubleComplex hipBlasDoubleComplex' for example.
21: // Since then, ROCM_MATHLIBS_API_USE_HIP_COMPLEX is deprecated, and one can define HIPBLAS_V2 to use version 2 of hipBLAS that directly use hipDoubleComplex etc.
22: // Per AMD, HIPBLAS_V2 will be removed in the future so that hipBLAS only provides updated APIs (but not yet in 6.2.2 as of Sep. 27, 2024).
23: //
24: // see https://rocm.docs.amd.com/projects/hipBLAS/en/docs-6.0.0/functions.html#complex-datatypes
25: // and https://rocm.docs.amd.com/projects/hipBLAS/en/docs-6.2.2/functions.html#hipblas-v2-and-deprecations
26: #if PETSC_PKG_HIP_VERSION_GE(6, 0, 0)
27: #define HIPBLAS_V2
28: #else
29: #define ROCM_MATHLIBS_API_USE_HIP_COMPLEX
30: #endif
31: #include <hipblas/hipblas.h>
32: #include <hipsparse/hipsparse.h>
33: #else
34: #include <hipblas.h>
35: #include <hipsparse.h>
36: #endif
38: #if PETSC_PKG_HIP_VERSION_LT(5, 4, 0)
39: #define HIPSPARSE_ORDER_COL HIPSPARSE_ORDER_COLUMN
40: #endif
42: #if defined(__HIP_PLATFORM_NVCC__)
43: #include <cusolverDn.h>
44: #else // __HIP_PLATFORM_HCC__
45: #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0)
46: #include <hipsolver/hipsolver.h>
47: #else
48: #include <hipsolver.h>
49: #endif
50: #endif // __HIP_PLATFORM_NVCC__
51: #include <hip/hip_complex.h> // for hipComplex, hipDoubleComplex
53: /*MC
54: WaitForHIP - Block the calling host thread until all previously queued work on the current HIP device has completed
56: Synopsis:
57: #include <petscdevice_hip.h>
58: hipError_t WaitForHIP(void)
60: Not Collective; No Fortran Support
62: Level: developer
64: Note:
65: Thin convenience wrapper around `hipDeviceSynchronize()`. Marked for removal in favour of
66: explicit `PetscDeviceContext` synchronization.
68: .seealso: `PetscDeviceContext`, `PetscDeviceContextSynchronize()`, `WaitForCUDA()`
69: M*/
70: // REMOVE ME
71: #define WaitForHIP() hipDeviceSynchronize()
73: /* hipBLAS, hipSPARSE and hipSolver does not have hip*GetErrorName(). We create one on our own. */
74: PETSC_EXTERN const char *PetscHIPBLASGetErrorName(hipblasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */
75: PETSC_EXTERN const char *PetscHIPSPARSEGetErrorName(hipsparseStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSPARSE macro */
76: PETSC_EXTERN const char *PetscHIPSolverGetErrorName(hipsolverStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSOLVER macro */
78: #define PetscCallHIP(...) \
79: do { \
80: const hipError_t _p_hip_err__ = __VA_ARGS__; \
81: if (PetscUnlikely(_p_hip_err__ != hipSuccess)) { \
82: const char *name = hipGetErrorName(_p_hip_err__); \
83: const char *descr = hipGetErrorString(_p_hip_err__); \
84: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hip error %d (%s) : %s", (PetscErrorCode)_p_hip_err__, name, descr); \
85: } \
86: } while (0)
87: #define CHKERRHIP(...) PetscCallHIP(__VA_ARGS__)
89: #define PetscHIPCheckLaunch \
90: do { \
91: /* Check synchronous errors, i.e. pre-launch */ \
92: PetscCallHIP(hipGetLastError()); \
93: /* Check asynchronous errors, i.e. kernel failed (ULF) */ \
94: PetscCallHIP(hipDeviceSynchronize()); \
95: } while (0)
97: #define PetscCallHIPBLAS(...) \
98: do { \
99: const hipblasStatus_t _p_hipblas_stat__ = __VA_ARGS__; \
100: if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) { \
101: const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__); \
102: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipBLAS error %d (%s)", (PetscErrorCode)_p_hipblas_stat__, name); \
103: } \
104: } while (0)
105: #define CHKERRHIPBLAS(...) PetscCallHIPBLAS(__VA_ARGS__)
107: #if PETSC_PKG_HIP_VERSION_GE(4, 5, 0)
108: /* HIPSPARSE & HIPSOLVER have better functionality with ROCm-4.5 or newer */
109: #define PetscCallHIPSPARSE(...) \
110: do { \
111: const hipsparseStatus_t _p_hipsparse_stat__ = __VA_ARGS__; \
112: if (PetscUnlikely(_p_hipsparse_stat__ != HIPSPARSE_STATUS_SUCCESS)) { \
113: const char *name = PetscHIPSPARSEGetErrorName(_p_hipsparse_stat__); \
114: PetscCheck((_p_hipsparse_stat__ != HIPSPARSE_STATUS_NOT_INITIALIZED) && (_p_hipsparse_stat__ != HIPSPARSE_STATUS_ALLOC_FAILED), PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, "hipSPARSE errorcode %d (%s): Reports not initialized or alloc failed; this indicates the GPU has run out resources", (int)_p_hipsparse_stat__, name); \
115: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSPARSE errorcode %d (%s)", (int)_p_hipsparse_stat__, name); \
116: } \
117: } while (0)
118: #define CHKERRHIPSPARSE(...) PetscCallHIPSPARSE(__VA_ARGS__)
120: #define PetscCallHIPSOLVER(...) \
121: do { \
122: const hipsolverStatus_t _p_hipsolver_stat__ = __VA_ARGS__; \
123: if (PetscUnlikely(_p_hipsolver_stat__ != HIPSOLVER_STATUS_SUCCESS)) { \
124: const char *name = PetscHIPSolverGetErrorName(_p_hipsolver_stat__); \
125: if (((_p_hipsolver_stat__ == HIPSOLVER_STATUS_NOT_INITIALIZED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_ALLOC_FAILED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_HIP)) { \
126: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
127: "hipSolver error %d (%s). " \
128: "This indicates the GPU may have run out resources", \
129: (PetscErrorCode)_p_hipsolver_stat__, name); \
130: } else { \
131: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSolver error %d (%s)", (PetscErrorCode)_p_hipsolver_stat__, name); \
132: } \
133: } \
134: } while (0)
135: #define CHKERRHIPSOLVER(...) PetscCallHIPSOLVER(__VA_ARGS__)
137: #else /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */
138: /* hipSolver does not exist yet so we work around it
139: rocSOLVER users rocBLAS for the handle
140: * */
141: #if defined(__HIP_PLATFORM_NVCC__)
142: #include <cusolverDn.h>
143: /*MC
144: hipsolverHandle_t - Opaque handle to a hipSolver context used by PETSc to call into the hipSolver dense linear algebra library
146: Level: developer
148: Note:
149: On modern ROCm releases (>= 4.5) this is the native hipSolver handle. On older releases PETSc provides a compatibility shim that aliases the handle to `cusolverDnHandle_t` on the NVIDIA HIP platform or to `rocblas_handle` on the AMD HIP platform.
151: .seealso: `PetscHIPSOLVERGetHandle()`, `hipsolverStatus_t`, `PetscCallHIPSOLVER()`
152: M*/
153: typedef cusolverDnHandle_t hipsolverHandle_t;
155: /*MC
156: hipsolverStatus_t - Return-status enumeration used by hipSolver to report success or failure of a hipSolver call
158: Level: developer
160: Note:
161: On modern ROCm releases (>= 4.5) this is the native hipSolver status type. On older releases PETSc provides a compatibility shim that aliases the status to `cusolverStatus_t` on the NVIDIA HIP platform or to `rocblas_status` on the AMD HIP platform. Use `PetscCallHIPSOLVER()` to check the return value.
163: .seealso: `hipsolverHandle_t`, `PetscCallHIPSOLVER()`, `PetscHIPSolverGetErrorName()`
164: M*/
165: typedef cusolverStatus_t hipsolverStatus_t;
167: /* Alias hipsolverDestroy to cusolverDnDestroy */
168: static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle)
169: {
170: return cusolverDnDestroy(hipsolverhandle);
171: }
173: /* Alias hipsolverCreate to cusolverDnCreate */
174: static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
175: {
176: return cusolverDnCreate(hipsolverhandle);
177: }
179: /* Alias hipsolverGetStream to cusolverDnGetStream */
180: static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
181: {
182: return cusolverDnGetStream(handle, stream);
183: }
185: /* Alias hipsolverSetStream to cusolverDnSetStream */
186: static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
187: {
188: return cusolveDnSetStream(handle, stream);
189: }
190: #else /* __HIP_PLATFORM_HCC__ */
191: #include <rocsolver.h>
192: #include <rocblas.h>
193: typedef rocblas_handle hipsolverHandle_t;
194: typedef rocblas_status hipsolverStatus_t;
196: /* Alias hipsolverDestroy to rocblas_destroy_handle */
197: static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t hipsolverhandle)
198: {
199: return rocblas_destroy_handle(hipsolverhandle);
200: }
202: /* Alias hipsolverCreate to rocblas_destroy_handle */
203: static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
204: {
205: return rocblas_create_handle(hipsolverhandle);
206: }
208: // Alias hipsolverGetStream to rocblas_get_stream
209: static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
210: {
211: return rocblas_get_stream(handle, stream);
212: }
214: // Alias hipsolverSetStream to rocblas_set_stream
215: static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
216: {
217: return rocblas_set_stream(handle, stream);
218: }
219: #endif // __HIP_PLATFORM_NVCC__
220: #endif /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */
221: // REMOVE ME
222: PETSC_EXTERN hipStream_t PetscDefaultHipStream; // The default stream used by PETSc
223: PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t *);
224: PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t *);
225: PETSC_EXTERN PetscErrorCode PetscGetCurrentHIPStream(hipStream_t *);
227: #endif // PETSC_HAVE_HIP
229: // these can also be defined in petscdevice_cuda.h so we undef and define them *only* if the
230: // current compiler is HCC. In this case if petscdevice_cuda.h is included first, the macros
231: // would already be defined, but they would be empty since we cannot be using NVCC at the same
232: // time.
233: #if PetscDefined(USING_HCC)
234: #undef PETSC_HOST_DECL
235: #undef PETSC_DEVICE_DECL
236: #undef PETSC_KERNEL_DECL
237: #undef PETSC_SHAREDMEM_DECL
238: #undef PETSC_FORCEINLINE
239: #undef PETSC_CONSTMEM_DECL
241: #define PETSC_HOST_DECL __host__
242: #define PETSC_DEVICE_DECL __device__
243: #define PETSC_KERNEL_DECL __global__
244: #define PETSC_SHAREDMEM_DECL __shared__
245: #define PETSC_FORCEINLINE __forceinline__
246: #define PETSC_CONSTMEM_DECL __constant__
247: #endif
249: #if !defined(PETSC_HOST_DECL) // use HOST_DECL as canary
250: #define PETSC_HOST_DECL
251: #define PETSC_DEVICE_DECL
252: #define PETSC_KERNEL_DECL
253: #define PETSC_SHAREDMEM_DECL
254: #define PETSC_FORCEINLINE inline
255: #define PETSC_CONSTMEM_DECL
256: #endif
258: #if !defined(PETSC_DEVICE_DEFINED_DECLS_PRIVATE)
259: #define PETSC_DEVICE_DEFINED_DECLS_PRIVATE
260: #define PETSC_HOSTDEVICE_DECL PETSC_HOST_DECL PETSC_DEVICE_DECL
261: #define PETSC_DEVICE_INLINE_DECL PETSC_DEVICE_DECL PETSC_FORCEINLINE
262: #define PETSC_HOSTDEVICE_INLINE_DECL PETSC_HOSTDEVICE_DECL PETSC_FORCEINLINE
263: #endif