Hello,
I am also facing a similar problem;
I am using Mathematica 12.1 (can't use 12.2 at this moment as university procedure for update takes a while, so I have to work with 12.1), CUDA version 11.2, windows server 2019, visual studio 2019.
I ran the following code:
Needs["CUDALink`"];
cudaActiveContourFunc =
CUDAFunctionLoad[src,
"CUDAActiveContour", {{ _Real, "Input"}, { _Real,
"Input"}, { _Real, "Input"}, { _Real, "Input"}, { _Real,
"Input"}, { _Real, "Input"}, { _Real, "Input"}, {_Real,
"Output"}, {_Real,
"Output"}, _Real, _Real, _Real, _Integer, _Integer, _Integer},
256, "CompilerInstallation" ->
"C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v" <>
ToString[DecimalForm[CUDAversion, {2, 1}]] <> "\\"];
gave error:
CUDAFunctionLoad::invxpth: The "XCompilerInstallation" option set to $Failed is not valid. "XCompilerInstallation" must be a string pointing to the C compiler directory.
I ran the following commands and got the following output:
CUDAQ[]
o/p: true
CUDADriverVersion[]
o/p: 461.09
CUDAInformation[]
o/p: {1 -> {"Name" -> "GeForce GTX 1080 Ti", "Clock Rate" -> 1582000,
"Compute Capabilities" -> 6.1, "GPU Overlap" -> 1,
"Maximum Block Dimensions" -> {1024, 1024, 64},
"Maximum Grid Dimensions" -> {2147483647, 65535, 65535},
"Maximum Threads Per Block" -> 1024,
"Maximum Shared Memory Per Block" -> 49152,
"Total Constant Memory" -> 65536, "Warp Size" -> 32,
"Maximum Pitch" -> 2147483647,
"Maximum Registers Per Block" -> 65536, "Texture Alignment" -> 512,
"Multiprocessor Count" -> 28, "Core Count" -> 3584,
"Execution Timeout" -> 1, "Integrated" -> False,
"Can Map Host Memory" -> True, "Compute Mode" -> "Default",
"Texture1D Width" -> 131072, "Texture2D Width" -> 131072,
"Texture2D Height" -> 65536, "Texture3D Width" -> 16384,
"Texture3D Height" -> 16384, "Texture3D Depth" -> 16384,
"Texture2D Array Width" -> 32768,
"Texture2D Array Height" -> 32768,
"Texture2D Array Slices" -> 2048, "Surface Alignment" -> 512,
"Concurrent Kernels" -> True, "ECC Enabled" -> False,
"TCC Enabled" -> False, "Total Memory" -> 11811160064},
2 -> {"Name" -> "GeForce GTX 1080 Ti", "Clock Rate" -> 1582000,
"Compute Capabilities" -> 6.1, "GPU Overlap" -> 1,
"Maximum Block Dimensions" -> {1024, 1024, 64},
"Maximum Grid Dimensions" -> {2147483647, 65535, 65535},
"Maximum Threads Per Block" -> 1024,
"Maximum Shared Memory Per Block" -> 49152,
"Total Constant Memory" -> 65536, "Warp Size" -> 32,
"Maximum Pitch" -> 2147483647,
"Maximum Registers Per Block" -> 65536, "Texture Alignment" -> 512,
"Multiprocessor Count" -> 28, "Core Count" -> 3584,
"Execution Timeout" -> 1, "Integrated" -> False,
"Can Map Host Memory" -> True, "Compute Mode" -> "Default",
"Texture1D Width" -> 131072, "Texture2D Width" -> 131072,
"Texture2D Height" -> 65536, "Texture3D Width" -> 16384,
"Texture3D Height" -> 16384, "Texture3D Depth" -> 16384,
"Texture2D Array Width" -> 32768,
"Texture2D Array Height" -> 32768,
"Texture2D Array Slices" -> 2048, "Surface Alignment" -> 512,
"Concurrent Kernels" -> True, "ECC Enabled" -> False,
"TCC Enabled" -> False, "Total Memory" -> 11811160064},
3 -> {"Name" -> "Quadro K420", "Clock Rate" -> 0,
"Compute Capabilities" -> 3., "GPU Overlap" -> 1,
"Maximum Block Dimensions" -> {1024, 1024, 64},
"Maximum Grid Dimensions" -> {2147483647, 65535, 65535},
"Maximum Threads Per Block" -> 1024,
"Maximum Shared Memory Per Block" -> 49152,
"Total Constant Memory" -> 65536, "Warp Size" -> 32,
"Maximum Pitch" -> 2147483647,
"Maximum Registers Per Block" -> 65536, "Texture Alignment" -> 512,
"Multiprocessor Count" -> 1, "Core Count" -> 192,
"Execution Timeout" -> 1, "Integrated" -> False,
"Can Map Host Memory" -> True, "Compute Mode" -> "Default",
"Texture1D Width" -> 65536, "Texture2D Width" -> 65536,
"Texture2D Height" -> 65536, "Texture3D Width" -> 4096,
"Texture3D Height" -> 4096, "Texture3D Depth" -> 4096,
"Texture2D Array Width" -> 16384,
"Texture2D Array Height" -> 16384,
"Texture2D Array Slices" -> 2048, "Surface Alignment" -> 512,
"Concurrent Kernels" -> True, "ECC Enabled" -> False,
"TCC Enabled" -> False, "Total Memory" -> 2147483648}}
CCompilers[Full]
o/p: {{"Name" -> "Intel Compiler",
"Compiler" -> CCompilerDriver`IntelCompiler`IntelCompiler,
"CompilerInstallation" -> None,
"CompilerName" -> Automatic}, {"Name" -> "Generic C Compiler",
"Compiler" -> CCompilerDriver`GenericCCompiler`GenericCCompiler,
"CompilerInstallation" -> None,
"CompilerName" -> Automatic}, {"Name" -> "NVIDIA CUDA Compiler",
"Compiler" -> NVCCCompiler,
"CompilerInstallation" ->
"C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.2\\bin\
\\", "CompilerName" -> Automatic}}
cudaFun =
CUDAFunctionLoad[code,
"addTwo", {{_Integer, _, "Input"}, {_Integer, _,
"Output"}, _Integer}, 256]
o/p:
CUDAFunctionLoad::invprog: CUDALink encountered an invalid program.
CUDAFunctionLoad[code, "addTwo", {{_Integer, _,
"Input"}, {_Integer, _, "Output"}, _Integer}, 256]
CUDAResourcesInformation[]
o/p: {{"Name" -> "CUDAResources", "Version" -> "12.1.0",
"WolframVersion" -> "12.1", "Qualifier" -> "Win64",
"SystemID" -> {"Windows-x86-64"},
"Description" -> "{ToolkitVersion -> v10.2, MinimumDriver -> 290}",
"Category" -> Missing["NotAvailable"],
"Keywords" -> Missing["NotAvailable"],
"UUID" -> Missing["NotAvailable"],
"Creator" -> Missing["NotAvailable"],
"URL" -> Missing["NotAvailable"], "Internal" -> False,
"Context" -> {}, "Loading" -> Manual, "AutoUpdating" -> False,
"Enabled" -> True,
"Location" ->
"C:\\Users\\P70072599\\AppData\\Roaming\\Mathematica\\Paclets\\\
Repository\\CUDAResources-Win64-12.1.0",
"Hash" -> "3357678c60aa79e333fe04fbb5d04dd7"}}
CUDAResourcesInstall[]
o/p: {
PacletObject[
Association[
"Name" -> "CUDAResources", "Version" -> "12.1.0",
"MathematicaVersion" -> "12.1",
"Description" -> "{ToolkitVersion -> v10.2, MinimumDriver -> 290}",
"SystemID" -> {"Windows-x86-64"}, "Qualifier" -> "Win64",
"Extensions" -> {{
"Resource",
"Resources" -> {
"CUDAToolkit", "ExampleData", "LibraryResources"}}},
"Location" -> "C:\\Users\\P70072599\\AppData\\Roaming\\Mathematica\
\\Paclets\\Repository\\CUDAResources-Win64-12.1.0"]]}
vec = Range[1., 10];
CUDAFourier[vec]
o/p:{17.3925 + 0. I, -1.58114 - 4.86624 I, -1.58114 -
2.17625 I, -1.58114 - 1.14876 I, -1.58114 - 0.513743 I, -1.58114 +
0. I, -1.58114 + 0.513743 I, -1.58114 + 1.14876 I, -1.58114 +
2.17625 I, -1.58114 + 4.86624 I}
InstallCUDA[]
o/p: InstallCUDA[]
CUDADot[Table[i, {i, 10}, {j, 10}],
Table[i, {i, 10}, {j, 10}]] // MatrixForm
o/p: {
{55, 55, 55, 55, 55, 55, 55, 55, 55, 55},
{110, 110, 110, 110, 110, 110, 110, 110, 110, 110},
{165, 165, 165, 165, 165, 165, 165, 165, 165, 165},
{220, 220, 220, 220, 220, 220, 220, 220, 220, 220},
{275, 275, 275, 275, 275, 275, 275, 275, 275, 275},
{330, 330, 330, 330, 330, 330, 330, 330, 330, 330},
{385, 385, 385, 385, 385, 385, 385, 385, 385, 385},
{440, 440, 440, 440, 440, 440, 440, 440, 440, 440},
{495, 495, 495, 495, 495, 495, 495, 495, 495, 495},
{550, 550, 550, 550, 550, 550, 550, 550, 550, 550}
}
Any help here would be very very helpful, as I am stuck here for quite sometime.
Thank you!
Looking forward !!