Hello,
I am also facing a similar problem;
I am using Mathematica 12.1 (can't use 12.2 at this moment as university procedure for update takes a while, so I have to work with 12.1), CUDA version 11.2, windows server 2019, visual studio 2019.
I ran the following code:
Needs["CUDALink`"];
cudaActiveContourFunc = 
  CUDAFunctionLoad[src, 
   "CUDAActiveContour", {{ _Real, "Input"}, { _Real, 
     "Input"}, { _Real, "Input"}, { _Real, "Input"}, { _Real, 
     "Input"}, { _Real, "Input"}, { _Real, "Input"}, {_Real, 
     "Output"}, {_Real, 
     "Output"}, _Real, _Real, _Real, _Integer, _Integer, _Integer}, 
   256, "CompilerInstallation" -> 
    "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v" <> 
     ToString[DecimalForm[CUDAversion, {2, 1}]] <> "\\"];
gave error:
CUDAFunctionLoad::invxpth: The "XCompilerInstallation" option set to $Failed is not valid. "XCompilerInstallation" must be a string pointing to the C compiler directory.
I ran the following commands and got the following output:
CUDAQ[]
o/p: true
CUDADriverVersion[]
o/p: 461.09
CUDAInformation[]
o/p: {1 -> {"Name" -> "GeForce GTX 1080 Ti", "Clock Rate" -> 1582000, 
   "Compute Capabilities" -> 6.1, "GPU Overlap" -> 1, 
   "Maximum Block Dimensions" -> {1024, 1024, 64}, 
   "Maximum Grid Dimensions" -> {2147483647, 65535, 65535}, 
   "Maximum Threads Per Block" -> 1024, 
   "Maximum Shared Memory Per Block" -> 49152, 
   "Total Constant Memory" -> 65536, "Warp Size" -> 32, 
   "Maximum Pitch" -> 2147483647, 
   "Maximum Registers Per Block" -> 65536, "Texture Alignment" -> 512,
    "Multiprocessor Count" -> 28, "Core Count" -> 3584, 
   "Execution Timeout" -> 1, "Integrated" -> False, 
   "Can Map Host Memory" -> True, "Compute Mode" -> "Default", 
   "Texture1D Width" -> 131072, "Texture2D Width" -> 131072, 
   "Texture2D Height" -> 65536, "Texture3D Width" -> 16384, 
   "Texture3D Height" -> 16384, "Texture3D Depth" -> 16384, 
   "Texture2D Array Width" -> 32768, 
   "Texture2D Array Height" -> 32768, 
   "Texture2D Array Slices" -> 2048, "Surface Alignment" -> 512, 
   "Concurrent Kernels" -> True, "ECC Enabled" -> False, 
   "TCC Enabled" -> False, "Total Memory" -> 11811160064}, 
 2 -> {"Name" -> "GeForce GTX 1080 Ti", "Clock Rate" -> 1582000, 
   "Compute Capabilities" -> 6.1, "GPU Overlap" -> 1, 
   "Maximum Block Dimensions" -> {1024, 1024, 64}, 
   "Maximum Grid Dimensions" -> {2147483647, 65535, 65535}, 
   "Maximum Threads Per Block" -> 1024, 
   "Maximum Shared Memory Per Block" -> 49152, 
   "Total Constant Memory" -> 65536, "Warp Size" -> 32, 
   "Maximum Pitch" -> 2147483647, 
   "Maximum Registers Per Block" -> 65536, "Texture Alignment" -> 512,
    "Multiprocessor Count" -> 28, "Core Count" -> 3584, 
   "Execution Timeout" -> 1, "Integrated" -> False, 
   "Can Map Host Memory" -> True, "Compute Mode" -> "Default", 
   "Texture1D Width" -> 131072, "Texture2D Width" -> 131072, 
   "Texture2D Height" -> 65536, "Texture3D Width" -> 16384, 
   "Texture3D Height" -> 16384, "Texture3D Depth" -> 16384, 
   "Texture2D Array Width" -> 32768, 
   "Texture2D Array Height" -> 32768, 
   "Texture2D Array Slices" -> 2048, "Surface Alignment" -> 512, 
   "Concurrent Kernels" -> True, "ECC Enabled" -> False, 
   "TCC Enabled" -> False, "Total Memory" -> 11811160064}, 
 3 -> {"Name" -> "Quadro K420", "Clock Rate" -> 0, 
   "Compute Capabilities" -> 3., "GPU Overlap" -> 1, 
   "Maximum Block Dimensions" -> {1024, 1024, 64}, 
   "Maximum Grid Dimensions" -> {2147483647, 65535, 65535}, 
   "Maximum Threads Per Block" -> 1024, 
   "Maximum Shared Memory Per Block" -> 49152, 
   "Total Constant Memory" -> 65536, "Warp Size" -> 32, 
   "Maximum Pitch" -> 2147483647, 
   "Maximum Registers Per Block" -> 65536, "Texture Alignment" -> 512,
    "Multiprocessor Count" -> 1, "Core Count" -> 192, 
   "Execution Timeout" -> 1, "Integrated" -> False, 
   "Can Map Host Memory" -> True, "Compute Mode" -> "Default", 
   "Texture1D Width" -> 65536, "Texture2D Width" -> 65536, 
   "Texture2D Height" -> 65536, "Texture3D Width" -> 4096, 
   "Texture3D Height" -> 4096, "Texture3D Depth" -> 4096, 
   "Texture2D Array Width" -> 16384, 
   "Texture2D Array Height" -> 16384, 
   "Texture2D Array Slices" -> 2048, "Surface Alignment" -> 512, 
   "Concurrent Kernels" -> True, "ECC Enabled" -> False, 
   "TCC Enabled" -> False, "Total Memory" -> 2147483648}}
CCompilers[Full]
o/p: {{"Name" -> "Intel Compiler", 
  "Compiler" -> CCompilerDriver`IntelCompiler`IntelCompiler, 
  "CompilerInstallation" -> None, 
  "CompilerName" -> Automatic}, {"Name" -> "Generic C Compiler", 
  "Compiler" -> CCompilerDriver`GenericCCompiler`GenericCCompiler, 
  "CompilerInstallation" -> None, 
  "CompilerName" -> Automatic}, {"Name" -> "NVIDIA CUDA Compiler", 
  "Compiler" -> NVCCCompiler, 
  "CompilerInstallation" -> 
   "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.2\\bin\
\\", "CompilerName" -> Automatic}}
cudaFun = 
 CUDAFunctionLoad[code, 
  "addTwo", {{_Integer, _, "Input"}, {_Integer, _, 
    "Output"}, _Integer}, 256]
o/p: 
CUDAFunctionLoad::invprog: CUDALink encountered an invalid program.
CUDAFunctionLoad[code, "addTwo", {{_Integer, _, 
   "Input"}, {_Integer, _, "Output"}, _Integer}, 256]
CUDAResourcesInformation[]
o/p: {{"Name" -> "CUDAResources", "Version" -> "12.1.0", 
  "WolframVersion" -> "12.1", "Qualifier" -> "Win64", 
  "SystemID" -> {"Windows-x86-64"}, 
  "Description" -> "{ToolkitVersion -> v10.2, MinimumDriver -> 290}", 
  "Category" -> Missing["NotAvailable"], 
  "Keywords" -> Missing["NotAvailable"], 
  "UUID" -> Missing["NotAvailable"], 
  "Creator" -> Missing["NotAvailable"], 
  "URL" -> Missing["NotAvailable"], "Internal" -> False, 
  "Context" -> {}, "Loading" -> Manual, "AutoUpdating" -> False, 
  "Enabled" -> True, 
  "Location" -> 
   "C:\\Users\\P70072599\\AppData\\Roaming\\Mathematica\\Paclets\\\
Repository\\CUDAResources-Win64-12.1.0", 
  "Hash" -> "3357678c60aa79e333fe04fbb5d04dd7"}}
CUDAResourcesInstall[]
o/p: {
PacletObject[
Association[
  "Name" -> "CUDAResources", "Version" -> "12.1.0", 
   "MathematicaVersion" -> "12.1", 
   "Description" -> "{ToolkitVersion -> v10.2, MinimumDriver -> 290}",
    "SystemID" -> {"Windows-x86-64"}, "Qualifier" -> "Win64", 
   "Extensions" -> {{
     "Resource", 
      "Resources" -> {
       "CUDAToolkit", "ExampleData", "LibraryResources"}}}, 
   "Location" -> "C:\\Users\\P70072599\\AppData\\Roaming\\Mathematica\
\\Paclets\\Repository\\CUDAResources-Win64-12.1.0"]]}
vec = Range[1., 10];
CUDAFourier[vec]
o/p:{17.3925 + 0. I, -1.58114 - 4.86624 I, -1.58114 - 
  2.17625 I, -1.58114 - 1.14876 I, -1.58114 - 0.513743 I, -1.58114 + 
  0. I, -1.58114 + 0.513743 I, -1.58114 + 1.14876 I, -1.58114 + 
  2.17625 I, -1.58114 + 4.86624 I}
InstallCUDA[]
o/p: InstallCUDA[]
CUDADot[Table[i, {i, 10}, {j, 10}], 
  Table[i, {i, 10}, {j, 10}]] // MatrixForm
o/p: {
 {55, 55, 55, 55, 55, 55, 55, 55, 55, 55},
 {110, 110, 110, 110, 110, 110, 110, 110, 110, 110},
 {165, 165, 165, 165, 165, 165, 165, 165, 165, 165},
 {220, 220, 220, 220, 220, 220, 220, 220, 220, 220},
 {275, 275, 275, 275, 275, 275, 275, 275, 275, 275},
 {330, 330, 330, 330, 330, 330, 330, 330, 330, 330},
 {385, 385, 385, 385, 385, 385, 385, 385, 385, 385},
 {440, 440, 440, 440, 440, 440, 440, 440, 440, 440},
 {495, 495, 495, 495, 495, 495, 495, 495, 495, 495},
 {550, 550, 550, 550, 550, 550, 550, 550, 550, 550}
}
Any help here would be very very helpful, as I am stuck here for quite sometime.
Thank you!
Looking forward !!