Performing Image Semantic Segmentation on CamVid Data Unsing DeepLabV3+ Model with MobileNetV2 Backbone in MATLAB Environment
outputFolder = "C:\Anwar\CamVid\"
imgDir = fullfile(outputFolder,'train\');
imds = imageDatastore(imgDir);
I = readimage(imds, 200);
I = histeq(I);
imshow(I)
function labelIDs = camvidPixelLabelIDs()
% Return the label IDs corresponding to each class.
%
% The CamVid dataset has 32 classes. Group them into 11 classes following
% the original SegNet training methodology [1].
%
% The 11 classes are:
% "Sky" "Building", "Pole", "Road", "Pavement", "Tree", "SignSymbol",
% "Fence", "Car", "Pedestrian", and "Bicyclist".
%
% CamVid pixel label IDs are provided as RGB color values. Group them into
% 11 classes and return them as a cell array of M-by-3 matrices. The
% original CamVid class names are listed alongside each RGB value. Note
% that the Other/Void class are excluded below.
labelIDs = { ...
% "Sky"
[
128 128 128; ... % "Sky"
]
% "Building"
[
000 128 064; ... % "Bridge"
128 000 000; ... % "Building"
064 192 000; ... % "Wall"
064 000 064; ... % "Tunnel"
192 000 128; ... % "Archway"
]
% "Pole"
[
192 192 128; ... % "Column_Pole"
000 000 064; ... % "TrafficCone"
]
% Road
[
128 064 128; ... % "Road"
128 000 192; ... % "LaneMkgsDriv"
192 000 064; ... % "LaneMkgsNonDriv"
]
% "Pavement"
[
000 000 192; ... % "Sidewalk"
064 192 128; ... % "ParkingBlock"
128 128 192; ... % "RoadShoulder"
]
% "Tree"
[
128 128 000; ... % "Tree"
192 192 000; ... % "VegetationMisc"
]
% "SignSymbol"
[
192 128 128; ... % "SignSymbol"
128 128 064; ... % "Misc_Text"
000 064 064; ... % "TrafficLight"
]
% "Fence"
[
064 064 128; ... % "Fence"
]
% "Car"
[
064 000 128; ... % "Car"
064 128 192; ... % "SUVPickupTruck"
192 128 192; ... % "Truck_Bus"
192 064 128; ... % "Train"
128 064 064; ... % "OtherMoving"
]
% "Pedestrian"
[
064 064 000; ... % "Pedestrian"
192 128 064; ... % "Child"
064 000 192; ... % "CartLuggagePram"
064 128 064; ... % "Animal"
]
% "Bicyclist"
[
000 128 192; ... % "Bicyclist"
192 000 192; ... % "MotorcycleScooter"
]
};
end
function classes = getClassNames()
classes = [
"Sky"
"Building"
"Pole"
"Road"
"Pavement"
"Tree"
"SignSymbol"
"Fence"
"Car"
"Pedestrian"
"Bicyclist"
];
end
function pixelLabelColorbar(cmap, classNames)
% Add a colorbar to the current axis. The colorbar is formatted
% to display the class names with the color.
colormap(gca,cmap)
% Add colorbar to current figure.
c = colorbar('peer', gca);
% Use class names for tick marks.
c.TickLabels = classNames;
numClasses = size(cmap,1);
% Center tick labels.
c.Ticks = 1/(numClasses*2):1/numClasses:1;
% Remove tick mark.
c.TickLength = 0;
end
function cmap = camvidColorMap()
% Define the colormap used by CamVid dataset.
cmap = [
128 128 128 % Sky
128 0 0 % Building
192 192 192 % Pole
128 64 128 % Road
60 40 222 % Pavement
128 128 0 % Tree
192 128 128 % SignSymbol
64 64 128 % Fence
64 0 128 % Car
64 64 0 % Pedestrian
0 128 192 % Bicyclist
];
% Normalize between [0 1].
cmap = cmap ./ 255;
end
function [imdsTrain, imdsVal, imdsTest, pxdsTrain, pxdsVal, pxdsTest] = partitionCamVidData(imds,pxds)
% Partition CamVid data by randomly selecting 60% of the data for training. The
% rest is used for testing.
% Set initial random state for example reproducibility.
rng(0);
numFiles = numpartitions(imds);
shuffledIndices = randperm(numFiles);
% Use 60% of the images for training.
numTrain = round(0.60 * numFiles);
trainingIdx = shuffledIndices(1:numTrain);
% Use 20% of the images for validation
numVal = round(0.20 * numFiles);
valIdx = shuffledIndices(numTrain+1:numTrain+numVal);
% Use the rest for testing.
testIdx = shuffledIndices(numTrain+numVal+1:end);
% Create image datastores for training and test.
imdsTrain = subset(imds,trainingIdx);
imdsVal = subset(imds,valIdx);
imdsTest = subset(imds,testIdx);
% Create pixel label datastores for training and test.
pxdsTrain = subset(pxds,trainingIdx);
pxdsVal = subset(pxds,valIdx);
pxdsTest = subset(pxds,testIdx);
end
function data = augmentImageAndLabel(data, xTrans, yTrans)
% Augment images and pixel label images using random reflection and
% translation.
for i = 1:size(data,1)
tform = randomAffine2d(...
XReflection=true,...
XTranslation=xTrans, ...
YTranslation=yTrans);
% Center the view at the center of image in the output space while
% allowing translation to move the output image out of view.
rout = affineOutputView(size(data{i,1}), tform, BoundsStyle='centerOutput');
% Warp the image and pixel labels using the same transform.
data{i,1} = imwarp(data{i,1}, tform, OutputView=rout);
data{i,2} = imwarp(data{i,2}, tform, OutputView=rout);
end
end
function loss = modelLoss(Y,T,classWeights)
weights = dlarray(classWeights,"C");
mask = ~isnan(T);
T(isnan(T)) = 0;
loss = crossentropy(Y,T,weights,Mask=mask,NormalizationFactor="mask-included");
end
classes = getClassNames()
imageSize = [720 960 3];
numClasses = 11;
net = deeplabv3plus(imageSize, numClasses, 'mobilenetv2');
I = imread("C:\Anwar\CamVid\train\0001TP_009210.png");
inputSize = net.Layers(1).InputSize;
I = imresize(I,inputSize(1:2));
C = semanticseg(I,net);
cmap = camvidColorMap;
B = labeloverlay(I,C,Colormap=cmap,Transparency=0.4);
figure
imshow(B)
pixelLabelColorbar(cmap, classes);
labelIDs = camvidPixelLabelIDs();
labelDir = fullfile(outputFolder,"train_labels");
pxds = pixelLabelDatastore(labelDir,classes,labelIDs);
C = readimage(pxds,200);
cmap = camvidColorMap;
B = labeloverlay(I,C,ColorMap=cmap);
imshow(B)
pixelLabelColorbar(cmap,classes);
tbl = countEachLabel(pxds)
frequency = tbl.PixelCount/sum(tbl.PixelCount);
bar(1:numel(classes),frequency)
xticks(1:numel(classes))
xticklabels(tbl.Name)
xtickangle(45)
ylabel("Frequency")
valdir = fullfile(outputFolder, 'val\')
imdsVal = imageDatastore(valdir)
vallabelDir = fullfile(outputFolder,"val_labels\");
pxdsVal = pixelLabelDatastore(vallabelDir,classes,labelIDs);
dsTrain = combine(imds, pxds)
dsVal = combine(imdsVal, pxdsVal)
xTrans = [-10 10];
yTrans = [-10 10];
dsTrain = transform(dsTrain, @(data)augmentImageAndLabel(data,xTrans,yTrans));
imageSize = [720 960 3];
numClasses = numel(classes);
net = deeplabv3plus(imageSize, numClasses, 'mobilenetv2');
imageFreq = tbl.PixelCount ./ tbl.ImagePixelCount;
classWeights = median(imageFreq) ./ imageFreq;
function iou = computeIoU(YTrue, YPred)
% Convert predictions to binary mask (assuming single class for simplicity)
YPred = YPred > 0.5; % Threshold predictions
intersection = sum((YPred & YTrue), 'all');
union = sum((YPred | YTrue), 'all');
if union == 0
iou = 1; % Perfect match (or no objects in both)
else
iou = intersection / union;
end
end
options = trainingOptions("sgdm", ...
LearnRateSchedule="piecewise", ...
LearnRateDropPeriod=6, ...
LearnRateDropFactor=0.1, ...
Momentum=0.9, ...
InitialLearnRate=1e-2, ...
L2Regularization=0.005, ...
ValidationData=dsVal, ...
MaxEpochs=18, ...
MiniBatchSize=4, ...
Shuffle="every-epoch", ...
CheckpointPath="C:\Anwar\CamVid\", ...
VerboseFrequency=10, ...
ValidationPatience=4, ...
Metrics = { "accuracy", @computeIoU }, ...
Plots="training-progress"); % Enable training graphs
doTraining = true;
if doTraining
[net,info] = trainnet(dsTrain,net,@(Y,T) modelLoss(Y,T,classWeights),options);
end
testimgDir = fullfile(outputFolder,'test\');
imdsTest = imageDatastore(testimgDir);
testlabelDir = fullfile(outputFolder,"test_labels\");
pxdsTest = pixelLabelDatastore(testlabelDir,classes,labelIDs);
I = readimage(imdsTest,35);
C = semanticseg(I,net,Classes=classes);
B = labeloverlay(I,C,Colormap=cmap,Transparency=0.4);
imshow(B)
pixelLabelColorbar(cmap, classes);
expectedResult = readimage(pxdsTest,35);
actual = uint8(C);
expected = uint8(expectedResult);
imshowpair(actual, expected)
iou = jaccard(C,expectedResult);
table(classes,iou)
pxdsResults = semanticseg(imdsTest,net, ...
Classes=classes, ...
MiniBatchSize=4, ...
WriteLocation=tempdir, ...
Verbose=false);
metrics = evaluateSemanticSegmentation(pxdsResults,pxdsTest,Verbose=false);
metrics.DataSetMetrics
metrics.ClassMetrics
testData = combine(imdsTest, pxdsTest)
function meanIoU = computeTestmIoU(net, testData, classes)
% Initialize IoU computation
numClasses = numel(classes);
intersection = zeros(numClasses, 1);
union = zeros(numClasses, 1);
% Reset the datastore before looping
reset(testData);
% Loop through test data
while hasdata(testData)
% Read a batch (image, ground truth mask)
data = read(testData);
img = data{1}; % Test image
trueMask = data{2}; % Ground truth mask
% Get network prediction
predMask = semanticseg(img, net);
% Compute IoU for each class
for c = 1:numClasses
% Get binary masks for class c
predClass = (predMask == classNames(c));
trueClass = (trueMask == classNames(c));
% Compute intersection and union
intersection(c) = intersection(c) + sum(predClass(:) & trueClass(:));
union(c) = union(c) + sum(predClass(:) | trueClass(:));
end
end
% Compute IoU for each class and take mean
IoU = intersection ./ (union + eps); % Avoid division by zero
meanIoU = mean(IoU, 'omitnan');
% Display result
fprintf('Overall Test mIoU: %.4f\n', meanIoU);
end
testmIoU = computeTestmIoU(net, testData, classes);