Created
January 28, 2026 19:17
-
-
Save lakshayg/488677fbcd027516692b019191ad12d0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <cstdio> | |
| #include <vector> | |
| #include <cuda_runtime.h> | |
| #include <cusolverDn.h> | |
| #include <limits> | |
| #define CHECK_CUDA(call) \ | |
| do { \ | |
| cudaError_t err = call; \ | |
| if (err != cudaSuccess) { \ | |
| printf("CUDA error %s:%d: %s\n", \ | |
| __FILE__, __LINE__, cudaGetErrorString(err)); \ | |
| std::exit(EXIT_FAILURE); \ | |
| } \ | |
| } while (0) | |
| #define CHECK_CUSOLVER(call) \ | |
| do { \ | |
| cusolverStatus_t status = call; \ | |
| if (status != CUSOLVER_STATUS_SUCCESS) { \ | |
| printf("cuSOLVER error %s:%d\n", \ | |
| __FILE__, __LINE__); \ | |
| std::exit(EXIT_FAILURE); \ | |
| } \ | |
| } while (0) | |
| int main() { | |
| // Matrix size | |
| const int m = 3; | |
| const int n = 3; | |
| const int lda = m; | |
| // Host matrix (column-major) | |
| double h_A[lda * n] = { | |
| 2.0, std::numeric_limits<double>::quiet_NaN(), 1.0, | |
| 4.0, std::numeric_limits<double>::quiet_NaN(), 0.0, | |
| -2.0, std::numeric_limits<double>::quiet_NaN(), 2.0 | |
| }; | |
| // Device memory | |
| double* d_A = nullptr; | |
| int* d_Ipiv = nullptr; | |
| int* d_info = nullptr; | |
| CHECK_CUDA(cudaMalloc(&d_A, sizeof(double) * lda * n)); | |
| CHECK_CUDA(cudaMalloc(&d_Ipiv, sizeof(int) * std::min(m, n))); | |
| CHECK_CUDA(cudaMalloc(&d_info, sizeof(int))); | |
| CHECK_CUDA(cudaMemcpy(d_A, h_A, | |
| sizeof(double) * lda * n, | |
| cudaMemcpyHostToDevice)); | |
| // cuSOLVER handle | |
| cusolverDnHandle_t handle; | |
| CHECK_CUSOLVER(cusolverDnCreate(&handle)); | |
| // Query workspace size | |
| int lwork = 0; | |
| CHECK_CUSOLVER( | |
| cusolverDnDgetrf_bufferSize( | |
| handle, m, n, d_A, lda, &lwork)); | |
| double* d_work = nullptr; | |
| CHECK_CUDA(cudaMalloc(&d_work, sizeof(double) * lwork)); | |
| // LU factorization | |
| CHECK_CUSOLVER( | |
| cusolverDnDgetrf( | |
| handle, | |
| m, n, | |
| d_A, lda, | |
| d_work, | |
| d_Ipiv, | |
| d_info)); | |
| // Copy results back | |
| int h_info = 0; | |
| CHECK_CUDA(cudaMemcpy(h_A, d_A, | |
| sizeof(double) * lda * n, | |
| cudaMemcpyDeviceToHost)); | |
| CHECK_CUDA(cudaMemcpy(&h_info, d_info, | |
| sizeof(int), | |
| cudaMemcpyDeviceToHost)); | |
| if (h_info != 0) { | |
| printf("LU factorization failed: info = %d\n", h_info); | |
| return EXIT_FAILURE; | |
| } | |
| // Print LU matrix | |
| printf("LU (combined L and U):\n"); | |
| for (int i = 0; i < m; ++i) { | |
| for (int j = 0; j < n; ++j) { | |
| printf("%8.4f ", h_A[j * lda + i]); | |
| } | |
| printf("\n"); | |
| } | |
| // Cleanup | |
| cudaFree(d_A); | |
| cudaFree(d_Ipiv); | |
| cudaFree(d_info); | |
| cudaFree(d_work); | |
| cusolverDnDestroy(handle); | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment