On the Win 10 PC with two GTX 660 cards and Mathematica 12.2:
Needs["CUDALink`"]
CUDAInformation[]
{1 -> {"Name" -> "GeForce GTX 660", "Clock Rate" -> 888500,
"Compute Capabilities" -> 3., "GPU Overlap" -> 1,
"Maximum Block Dimensions" -> {1024, 1024, 64},
"Maximum Grid Dimensions" -> {2147483647, 65535, 65535},
"Maximum Threads Per Block" -> 1024,
"Maximum Shared Memory Per Block" -> 49152,
"Total Constant Memory" -> 65536, "Warp Size" -> 32,
"Maximum Pitch" -> 2147483647,
"Maximum Registers Per Block" -> 65536, "Texture Alignment" -> 512,
"Multiprocessor Count" -> 6, "Core Count" -> 1152,
"Execution Timeout" -> 1, "Integrated" -> False,
"Can Map Host Memory" -> True, "Compute Mode" -> "Default",
"Texture1D Width" -> 65536, "Texture2D Width" -> 65536,
"Texture2D Height" -> 65536, "Texture3D Width" -> 4096,
"Texture3D Height" -> 4096, "Texture3D Depth" -> 4096,
"Texture2D Array Width" -> 16384,
"Texture2D Array Height" -> 16384,
"Texture2D Array Slices" -> 2048, "Surface Alignment" -> 512,
"Concurrent Kernels" -> True, "ECC Enabled" -> False,
"TCC Enabled" -> False, "Total Memory" -> 1610612736},
2 -> {"Name" -> "GeForce GTX 660", "Clock Rate" -> 888500,
"Compute Capabilities" -> 3., "GPU Overlap" -> 1,
"Maximum Block Dimensions" -> {1024, 1024, 64},
"Maximum Grid Dimensions" -> {2147483647, 65535, 65535},
"Maximum Threads Per Block" -> 1024,
"Maximum Shared Memory Per Block" -> 49152,
"Total Constant Memory" -> 65536, "Warp Size" -> 32,
"Maximum Pitch" -> 2147483647,
"Maximum Registers Per Block" -> 65536, "Texture Alignment" -> 512,
"Multiprocessor Count" -> 6, "Core Count" -> 1152,
"Execution Timeout" -> 1, "Integrated" -> False,
"Can Map Host Memory" -> True, "Compute Mode" -> "Default",
"Texture1D Width" -> 65536, "Texture2D Width" -> 65536,
"Texture2D Height" -> 65536, "Texture3D Width" -> 4096,
"Texture3D Height" -> 4096, "Texture3D Depth" -> 4096,
"Texture2D Array Width" -> 16384,
"Texture2D Array Height" -> 16384,
"Texture2D Array Slices" -> 2048, "Surface Alignment" -> 512,
"Concurrent Kernels" -> True, "ECC Enabled" -> False,
"TCC Enabled" -> False, "Total Memory" -> 1610612736}}
Then:
CUDADot[Table[i, {i, 10}, {j, 10}],
Table[i, {i, 10}, {j, 10}]] // MatrixForm
CUDADot::allocf: A CUDALink memory allocation failed.
Also:
numberOfOptions = 32;
spotPrices = RandomReal[{25.0, 35.0}, numberOfOptions];
strikePrices = RandomReal[{20.0, 40.0}, numberOfOptions];
expiration = RandomReal[{0.1, 10.0}, numberOfOptions];
interest = 0.08;
volatility = RandomReal[{0.10, 0.50}, numberOfOptions];
dividend = RandomReal[{0.2, 0.06}, numberOfOptions];
CUDAFinancialDerivative[{"American",
"Call"}, {"StrikePrice" -> strikePrices,
"Expiration" -> expiration}, {"CurrentPrice" -> spotPrices,
"InterestRate" -> interest, "Volatility" -> volatility,
"Dividend" -> dividend}]
{0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., \
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.}