Global Variables
(* Loss function parameter and number of classes of the images *)
$\[Alpha] = 1/300;
$numClasses = 4314;
Net Layers
conv[out_Integer, k_Integer, str_Integer, p_Integer] := ConvolutionLayer[out, k, "Stride" -> str, "PaddingSize" -> p]; (* Convolution layer *)
fc[n_Integer] := DotPlusLayer[n]; (* Fully connected layer *)
relu = ElementwiseLayer[Ramp]; (* Ramp activation function *)
\[Sigma] = ElementwiseLayer[LogisticSigmoid];(* Sigmoid activation function *)
\[Sigma]1 = ElementwiseLayer[LogisticSigmoid];
tl1 = ScalarTimesLayer[100]; (* This layer multiplies elementwise the input tensor by a scalar number *)
tl2 = ScalarTimesLayer[100];
timesLoss = ScalarTimesLayer[$\[Alpha]];
bn = BatchNormalizationLayer[]; (* Batch Normalizaion layer *)
upSampl = UpsampleLayer[2]; (* Upsampling using the nearest neighbor techique *)
sl = SplitLayer[False]; (* This layer splits the input tensor into its channels *)
cl = CatenateLayer[]; (* This layer catenates the input tensors and outputs a new tensor *)
(* "Fusion" layer *)
rshL = ReshapeLayer[{256, 1, 1}]; (* This layer reinterprets the input to be an array of the specified dimensions *)
bl = BroadcastPlusLayer[]; (* This layer catenates a vector all along the corresponding dimension of a tensor *)
(* Loss functions *)
lossMS = MeanSquaredLossLayer[];
lossCE = CrossEntropyLossLayer["Index"];
Net Chains
(* Low-Level Features Network *)
lln = NetChain[{conv[64, 3, 2, 1], bn, relu, conv[128, 3, 1, 1], bn, relu, conv[128, 3, 2, 1], bn, relu, conv[256, 3, 1, 1], bn, relu,
conv[256, 3, 2, 1], bn, relu, conv[512, 3, 1, 1], bn, relu} ];
(* Mid-Level Features Network *)
mln = NetChain[{conv[512, 3, 1, 1], bn, relu, conv[256, 3, 1, 1], bn, relu}];
(* Colorization Network *)
coln = NetChain[{conv[256, 3, 1, 1], bn, relu, conv[128, 3, 1, 1], bn, relu, upSampl, conv[64, 3, 1, 1], bn, relu, conv[64, 3, 1, 1],
bn, relu, upSampl, conv[32, 3, 1, 1], bn, relu, conv[2, 3, 1, 1], \[Sigma], upSampl}];
(* Global Features Network *)
gln = NetChain[{conv[512, 3, 2, 1], bn, relu, conv[512, 3, 1, 1], bn, relu, conv[512, 3, 2, 1], bn, relu, conv[512, 3, 1, 1], bn, relu,
FlattenLayer[], fc[1024], bn, relu, fc[512], bn, relu}];
gln2 = NetChain[{fc[256], bn, relu}];
(* Classification Network *)
classn = NetChain[{fc[256], bn, relu, fc[$numClasses], bn, relu}];
Net Structure
classNet = NetGraph[
<| "SplitL" -> sl, "LowLev" -> lln, "MidLev" -> mln, "GlobLev" -> gln, "GlobLev2" -> gln2, "ColNet" -> coln, "Sigmoid" -> \[Sigma]1, "TimesL1" -> tl1, "TimesL2" -> tl2, "CatL" -> cl, "LossMS" -> lossMS, "LossCE" -> lossCE, "Broadcast" -> bl, "ReshapeL" -> rshL, "ClassN" -> classn, "timesLoss" -> timesLoss |>,
{ NetPort["Image"] -> "SplitL", "SplitL" -> {"LowLev", "TimesL1", "TimesL2"}, {"TimesL1", "TimesL2"} -> "CatL", "CatL" -> "Sigmoid", "LowLev" -> "MidLev", "LowLev" -> "GlobLev", "GlobLev" -> "GlobLev2", "GlobLev" -> "ClassN", "MidLev" -> NetPort["Broadcast", "LHS"], "GlobLev2" -> "ReshapeL", "ReshapeL" -> NetPort["Broadcast", "RHS"], "Broadcast" -> "ColNet",
"ColNet" -> NetPort["LossMS", "Input"], "Sigmoid" -> NetPort["LossMS", "Target"], "ClassN" -> NetPort["LossCE", "Input"],
NetPort["Class"] -> NetPort["LossCE", "Target"], "LossCE" -> "timesLoss" },
"Image" -> NetEncoder[{"Image", {224, 224}, "ColorSpace" -> "LAB", "Parallelize" -> False}] ]
Training
tnet = NetTrain [
classNet,
<|"Image" -> $trainPathsFile, "Class" -> $trainClasses|>,
ValidationSet -> <|"Image" -> $testPathsFile, "Class" -> $testClasses|>,
TargetDevice -> {"GPU", 1},
"Method" -> "ADAM"
]
Evaluation Net
evalNet = Take[tnet, {"LowLev", "ColNet"}]
evalNet = NetChain[{evalNet}, "Input"->NetEncoder["Image",{224,224},"ColorSpace"->"Grayscale"]];