Easy Learn: February 2025

Performing Image Semantic Segmentation on CamVid Data Unsing DeepLabV3+ Model with MobileNetV2 Backbone in MATLAB Environment

outputFolder = "C:\Anwar\CamVid\"

imgDir = fullfile(outputFolder,'train\');

imds = imageDatastore(imgDir);

I = readimage(imds, 200);

I = histeq(I);

imshow(I)

function labelIDs = camvidPixelLabelIDs()

% Return the label IDs corresponding to each class.

% The CamVid dataset has 32 classes. Group them into 11 classes following

% the original SegNet training methodology [1].

% The 11 classes are:

% "Sky" "Building", "Pole", "Road", "Pavement", "Tree", "SignSymbol",

% "Fence", "Car", "Pedestrian", and "Bicyclist".

% CamVid pixel label IDs are provided as RGB color values. Group them into

% 11 classes and return them as a cell array of M-by-3 matrices. The

% original CamVid class names are listed alongside each RGB value. Note

% that the Other/Void class are excluded below.

labelIDs = { ...

% "Sky"

[

128 128 128; ... % "Sky"

]

% "Building"

[

000 128 064; ... % "Bridge"

128 000 000; ... % "Building"

064 192 000; ... % "Wall"

064 000 064; ... % "Tunnel"

192 000 128; ... % "Archway"

]

% "Pole"

[

192 192 128; ... % "Column_Pole"

000 000 064; ... % "TrafficCone"

]

% Road

[

128 064 128; ... % "Road"

128 000 192; ... % "LaneMkgsDriv"

192 000 064; ... % "LaneMkgsNonDriv"

]

% "Pavement"

[

000 000 192; ... % "Sidewalk"

064 192 128; ... % "ParkingBlock"

128 128 192; ... % "RoadShoulder"

]

% "Tree"

[

128 128 000; ... % "Tree"

192 192 000; ... % "VegetationMisc"

]

% "SignSymbol"

[

192 128 128; ... % "SignSymbol"

128 128 064; ... % "Misc_Text"

000 064 064; ... % "TrafficLight"

]

% "Fence"

[

064 064 128; ... % "Fence"

]

% "Car"

[

064 000 128; ... % "Car"

064 128 192; ... % "SUVPickupTruck"

192 128 192; ... % "Truck_Bus"

192 064 128; ... % "Train"

128 064 064; ... % "OtherMoving"

]

% "Pedestrian"

[

064 064 000; ... % "Pedestrian"

192 128 064; ... % "Child"

064 000 192; ... % "CartLuggagePram"

064 128 064; ... % "Animal"

]

% "Bicyclist"

[

000 128 192; ... % "Bicyclist"

192 000 192; ... % "MotorcycleScooter"

]

};

end

function classes = getClassNames()

classes = [

"Sky"

"Building"

"Pole"

"Road"

"Pavement"

"Tree"

"SignSymbol"

"Fence"

"Car"

"Pedestrian"

"Bicyclist"

];

end

function pixelLabelColorbar(cmap, classNames)

% Add a colorbar to the current axis. The colorbar is formatted

% to display the class names with the color.

colormap(gca,cmap)

% Add colorbar to current figure.

c = colorbar('peer', gca);

% Use class names for tick marks.

c.TickLabels = classNames;

numClasses = size(cmap,1);

% Center tick labels.

c.Ticks = 1/(numClasses*2):1/numClasses:1;

% Remove tick mark.

c.TickLength = 0;

end

function cmap = camvidColorMap()

% Define the colormap used by CamVid dataset.

cmap = [

128 128 128 % Sky

128 0 0 % Building

192 192 192 % Pole

128 64 128 % Road

60 40 222 % Pavement

128 128 0 % Tree

192 128 128 % SignSymbol

64 64 128 % Fence

64 0 128 % Car

64 64 0 % Pedestrian

0 128 192 % Bicyclist

];

% Normalize between [0 1].

cmap = cmap ./ 255;

end

function [imdsTrain, imdsVal, imdsTest, pxdsTrain, pxdsVal, pxdsTest] = partitionCamVidData(imds,pxds)

% Partition CamVid data by randomly selecting 60% of the data for training. The

% rest is used for testing.

% Set initial random state for example reproducibility.

rng(0);

numFiles = numpartitions(imds);

shuffledIndices = randperm(numFiles);

% Use 60% of the images for training.

numTrain = round(0.60 * numFiles);

trainingIdx = shuffledIndices(1:numTrain);

% Use 20% of the images for validation

numVal = round(0.20 * numFiles);

valIdx = shuffledIndices(numTrain+1:numTrain+numVal);

% Use the rest for testing.

testIdx = shuffledIndices(numTrain+numVal+1:end);

% Create image datastores for training and test.

imdsTrain = subset(imds,trainingIdx);

imdsVal = subset(imds,valIdx);

imdsTest = subset(imds,testIdx);

% Create pixel label datastores for training and test.

pxdsTrain = subset(pxds,trainingIdx);

pxdsVal = subset(pxds,valIdx);

pxdsTest = subset(pxds,testIdx);

end

function data = augmentImageAndLabel(data, xTrans, yTrans)

% Augment images and pixel label images using random reflection and

% translation.

for i = 1:size(data,1)

tform = randomAffine2d(...

XReflection=true,...

XTranslation=xTrans, ...

YTranslation=yTrans);

% Center the view at the center of image in the output space while

% allowing translation to move the output image out of view.

rout = affineOutputView(size(data{i,1}), tform, BoundsStyle='centerOutput');

% Warp the image and pixel labels using the same transform.

data{i,1} = imwarp(data{i,1}, tform, OutputView=rout);

data{i,2} = imwarp(data{i,2}, tform, OutputView=rout);

end

function loss = modelLoss(Y,T,classWeights)

weights = dlarray(classWeights,"C");

mask = ~isnan(T);

T(isnan(T)) = 0;

loss = crossentropy(Y,T,weights,Mask=mask,NormalizationFactor="mask-included");

end

classes = getClassNames()

imageSize = [720 960 3];

numClasses = 11;

net = deeplabv3plus(imageSize, numClasses, 'mobilenetv2');

I = imread("C:\Anwar\CamVid\train\0001TP_009210.png");

inputSize = net.Layers(1).InputSize;

I = imresize(I,inputSize(1:2));

C = semanticseg(I,net);

cmap = camvidColorMap;

B = labeloverlay(I,C,Colormap=cmap,Transparency=0.4);

figure

imshow(B)

pixelLabelColorbar(cmap, classes);

labelIDs = camvidPixelLabelIDs();

labelDir = fullfile(outputFolder,"train_labels");

pxds = pixelLabelDatastore(labelDir,classes,labelIDs);

C = readimage(pxds,200);

cmap = camvidColorMap;

B = labeloverlay(I,C,ColorMap=cmap);

imshow(B)

pixelLabelColorbar(cmap,classes);

tbl = countEachLabel(pxds)

frequency = tbl.PixelCount/sum(tbl.PixelCount);

bar(1:numel(classes),frequency)

xticks(1:numel(classes))

xticklabels(tbl.Name)

xtickangle(45)

ylabel("Frequency")

valdir = fullfile(outputFolder, 'val\')

imdsVal = imageDatastore(valdir)

vallabelDir = fullfile(outputFolder,"val_labels\");

pxdsVal = pixelLabelDatastore(vallabelDir,classes,labelIDs);

dsTrain = combine(imds, pxds)

dsVal = combine(imdsVal, pxdsVal)

xTrans = [-10 10];

yTrans = [-10 10];

dsTrain = transform(dsTrain, @(data)augmentImageAndLabel(data,xTrans,yTrans));

imageSize = [720 960 3];

numClasses = numel(classes);

net = deeplabv3plus(imageSize, numClasses, 'mobilenetv2');

imageFreq = tbl.PixelCount ./ tbl.ImagePixelCount;

classWeights = median(imageFreq) ./ imageFreq;

function iou = computeIoU(YTrue, YPred)

% Convert predictions to binary mask (assuming single class for simplicity)

YPred = YPred > 0.5; % Threshold predictions

intersection = sum((YPred & YTrue), 'all');

union = sum((YPred | YTrue), 'all');

if union == 0

iou = 1; % Perfect match (or no objects in both)

else

iou = intersection / union;

end

options = trainingOptions("sgdm", ...

LearnRateSchedule="piecewise", ...

LearnRateDropPeriod=6, ...

LearnRateDropFactor=0.1, ...

Momentum=0.9, ...

InitialLearnRate=1e-2, ...

L2Regularization=0.005, ...

ValidationData=dsVal, ...

MaxEpochs=18, ...

MiniBatchSize=4, ...

Shuffle="every-epoch", ...

CheckpointPath="C:\Anwar\CamVid\", ...

VerboseFrequency=10, ...

ValidationPatience=4, ...

Metrics = { "accuracy", @computeIoU }, ...

Plots="training-progress"); % Enable training graphs

doTraining = true;

if doTraining

[net,info] = trainnet(dsTrain,net,@(Y,T) modelLoss(Y,T,classWeights),options);

end

testimgDir = fullfile(outputFolder,'test\');

imdsTest = imageDatastore(testimgDir);

testlabelDir = fullfile(outputFolder,"test_labels\");

pxdsTest = pixelLabelDatastore(testlabelDir,classes,labelIDs);

I = readimage(imdsTest,35);

C = semanticseg(I,net,Classes=classes);

B = labeloverlay(I,C,Colormap=cmap,Transparency=0.4);

imshow(B)

pixelLabelColorbar(cmap, classes);

expectedResult = readimage(pxdsTest,35);

actual = uint8(C);

expected = uint8(expectedResult);

imshowpair(actual, expected)

iou = jaccard(C,expectedResult);

table(classes,iou)

pxdsResults = semanticseg(imdsTest,net, ...

Classes=classes, ...

MiniBatchSize=4, ...

WriteLocation=tempdir, ...

Verbose=false);

metrics = evaluateSemanticSegmentation(pxdsResults,pxdsTest,Verbose=false);

metrics.DataSetMetrics

metrics.ClassMetrics

testData = combine(imdsTest, pxdsTest)

function meanIoU = computeTestmIoU(net, testData, classes)

% Initialize IoU computation

numClasses = numel(classes);

intersection = zeros(numClasses, 1);

union = zeros(numClasses, 1);

% Reset the datastore before looping

reset(testData);

% Loop through test data

while hasdata(testData)

% Read a batch (image, ground truth mask)

data = read(testData);

img = data{1}; % Test image

trueMask = data{2}; % Ground truth mask

% Get network prediction

predMask = semanticseg(img, net);

% Compute IoU for each class

for c = 1:numClasses

% Get binary masks for class c

predClass = (predMask == classNames(c));

trueClass = (trueMask == classNames(c));

% Compute intersection and union

intersection(c) = intersection(c) + sum(predClass(:) & trueClass(:));

union(c) = union(c) + sum(predClass(:) | trueClass(:));

end

% Compute IoU for each class and take mean

IoU = intersection ./ (union + eps); % Avoid division by zero

meanIoU = mean(IoU, 'omitnan');

% Display result

fprintf('Overall Test mIoU: %.4f\n', meanIoU);

end

testmIoU = computeTestmIoU(net, testData, classes);

Monday, February 24, 2025