gabri14el commited on
Commit
e6b3e35
1 Parent(s): ab6a9fe

Upload with huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +16 -0
  2. .gitignore +3 -0
  3. Dissertação/Acessorios/MyAppendixA.tex +17 -0
  4. Dissertação/Acessorios/MyMacroDefinitions.tex +115 -0
  5. Disserta/303/247/303/243o/Acessorios/UTAD.JPG +0 -0
  6. Dissertação/Acessorios/datasheet.pdf +3 -0
  7. Dissertação/Cap0/MyAbstract.aux +57 -0
  8. Dissertação/Cap0/MyAbstract.tex +28 -0
  9. Dissertação/Cap0/MyAgradecimentos.aux +58 -0
  10. Dissertação/Cap0/MyAgradecimentos.tex +15 -0
  11. Dissertação/Cap0/MyGlossario.aux +62 -0
  12. Dissertação/Cap0/MyGlossario.tex +68 -0
  13. Dissertação/Cap0/MyResumo.aux +57 -0
  14. Dissertação/Cap0/MyResumo.tex +30 -0
  15. Dissertação/Cap1/CAP1.aux +92 -0
  16. Dissertação/Cap1/CAP1.tex +97 -0
  17. Disserta/303/247/303/243o/Cap1/Figure.jpg +0 -0
  18. Disserta/303/247/303/243o/Cap1/wine-consuption-over-the-years.png +0 -0
  19. Disserta/303/247/303/243o/Cap1/wine-production-over-the-years.png +0 -0
  20. Dissertação/Cap2/CAP2.aux +126 -0
  21. Dissertação/Cap2/CAP2.tex +60 -0
  22. Dissertação/Cap2/classification.tex +180 -0
  23. Disserta/303/247/303/243o/Cap2/classification/cm-5-6.png +0 -0
  24. Disserta/303/247/303/243o/Cap2/classification/cm-5-7-8-9-10.png +0 -0
  25. Disserta/303/247/303/243o/Cap2/classification/cm-5-7.png +0 -0
  26. Dissertação/Cap2/classification/xai-architectures.png +3 -0
  27. Dissertação/Cap2/classification/xai-baseline.png +3 -0
  28. Dissertação/Cap2/classification/xai-focalloss.png +3 -0
  29. Dissertação/Cap2/classification/xai-segmentation.png +3 -0
  30. Dissertação/Cap2/grad_cam.tex +17 -0
  31. Disserta/303/247/303/243o/Cap2/res-finetuning-impact-gradcam.png +0 -0
  32. Disserta/303/247/303/243o/Cap2/res-finetuning-impact-gradcam2.png +0 -0
  33. Disserta/303/247/303/243o/Cap2/res-finetuning-impact.png +0 -0
  34. Dissertação/Cap2/segmentation.tex +58 -0
  35. Disserta/303/247/303/243o/Cap2/segmentation/segmentation-case1.jpg +0 -0
  36. Disserta/303/247/303/243o/Cap2/segmentation/segmentation-case2.jpg +0 -0
  37. Disserta/303/247/303/243o/Cap2/segmentation/segmentation-case3.jpg +0 -0
  38. Dissertação/Cap3/CAP3.aux +119 -0
  39. Dissertação/Cap3/CAP3.tex +329 -0
  40. Disserta/303/247/303/243o/Cap3/cds5-image-distribution.png +0 -0
  41. Disserta/303/247/303/243o/Cap3/comparacao_tempo.jpg +0 -0
  42. Disserta/303/247/303/243o/Cap3/distribuition.png +0 -0
  43. Disserta/303/247/303/243o/Cap3/distribution-ds1.png +0 -0
  44. Disserta/303/247/303/243o/Cap3/distribution-ds3.png +0 -0
  45. Disserta/303/247/303/243o/Cap3/ds3-examples.jpg +0 -0
  46. Disserta/303/247/303/243o/Cap3/example_ds12.jpg +0 -0
  47. Disserta/303/247/303/243o/Cap3/fluxograma geral.jpg +0 -0
  48. Disserta/303/247/303/243o/Cap3/mascara.jpg +0 -0
  49. Disserta/303/247/303/243o/Cap3/segmentation-example.jpg +0 -0
  50. Disserta/303/247/303/243o/Cap3/segmented-images.jpg +0 -0
.gitattributes CHANGED
@@ -32,3 +32,19 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ Dissertação/dissertação.pdf filter=lfs diff=lfs merge=lfs -text
36
+ Dissertação/Acessorios/datasheet.pdf filter=lfs diff=lfs merge=lfs -text
37
+ Dissertação/Cap2/classification/xai-architectures.png filter=lfs diff=lfs merge=lfs -text
38
+ Dissertação/Cap2/classification/xai-baseline.png filter=lfs diff=lfs merge=lfs -text
39
+ Dissertação/Cap2/classification/xai-focalloss.png filter=lfs diff=lfs merge=lfs -text
40
+ Dissertação/Cap2/classification/xai-segmentation.png filter=lfs diff=lfs merge=lfs -text
41
+ experimentos/classificacao/Experimento[[:space:]]10/EAI.ipynb filter=lfs diff=lfs merge=lfs -text
42
+ experimentos/classificacao/Experimento[[:space:]]5/EAI.ipynb filter=lfs diff=lfs merge=lfs -text
43
+ experimentos/classificacao/Experimento[[:space:]]6/EAI.ipynb filter=lfs diff=lfs merge=lfs -text
44
+ experimentos/classificacao/Experimento[[:space:]]7/EAI.ipynb filter=lfs diff=lfs merge=lfs -text
45
+ experimentos/classificacao/Experimento[[:space:]]8/EAI.ipynb filter=lfs diff=lfs merge=lfs -text
46
+ experimentos/classificacao/Experimento[[:space:]]9/EAI.ipynb filter=lfs diff=lfs merge=lfs -text
47
+ experimentos/fine-tuning/Experimento[[:space:]]1/model_checkpoint_weights_callback.weights.best.hdf5 filter=lfs diff=lfs merge=lfs -text
48
+ experimentos/fine-tuning/Experimento[[:space:]]2/exp[[:space:]]2/exp2.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
49
+ experimentos/fine-tuning/Experimento[[:space:]]3/exp[[:space:]]3/exp3.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
50
+ experimentos/fine-tuning/Experimento[[:space:]]4/exp[[:space:]]4/exp[[:space:]]4.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ build/
2
+ dist/
3
+ __pycache__/
Dissertação/Acessorios/MyAppendixA.tex ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ % ------------------------------------------------------------------
2
+ % Parâmetros do Processo AMI C07MA
3
+ % ------------------------------------------------------------------
4
+
5
+ \chapter{Apêndice}
6
+
7
+
8
+ \section*{Datasheets}
9
+
10
+
11
+
12
+ \subsection{DS3231}
13
+ \newcommand*\side{90}
14
+
15
+ \includepdf[pages=-,width=\textwidth,pagecommand={\xdef\side{\the\numexpr-\side\relax}}]{Acessorios/datasheet.pdf}
16
+
17
+ % ---------------------------------------------------------------------
Dissertação/Acessorios/MyMacroDefinitions.tex ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ %-------------------------------------------------------------------------
2
+ % Outros Termos e Comandos
3
+ %-------------------------------------------------------------------------
4
+ \newcommand{\LDC}{L_{\mathrm{DC}}}
5
+ \newcommand{\QL}{Q_{\mathrm{L}}}
6
+ \newcommand{\KVCO}{\mathrm{K}_{\mathrm{VCO}}}
7
+ \newcommand{\KPFD}{\mathrm{K}_{\phi}}
8
+ \newcommand{\UP}{\mathsf{UP}}
9
+ \newcommand{\DOWN}{\mathsf{DOWN}}
10
+ \newcommand{\NAND}{\mathsf{NAND}}
11
+
12
+ %\newcommand{\mi}[1]{#1\index{#1}} % MARCA ENTRADA PARA O ÍNDICE REMISSIVO
13
+
14
+ %-------------------------------------------------------------------------
15
+ % Parâmetros MOSFET
16
+ %-------------------------------------------------------------------------
17
+ \newcommand{\GM}{g_{\mathrm{m}}}
18
+ \newcommand{\EOX}{\varepsilon_{\mathrm{OX}}}
19
+ \newcommand{\TOX}{\mathrm{t}_{\mathrm{OX}}}
20
+ \newcommand{\MOBILITYN}{\mu_{\mathrm{n}}}
21
+ \newcommand{\MOBILITYP}{\mu_{\mathrm{p}}}
22
+ \newcommand{\KPN}{\mathrm{KP}_{\mathrm{n}}}
23
+ \newcommand{\KPP}{\mathrm{KP}_{\mathrm{p}}}
24
+ \newcommand{\CDB}{C_{\mathrm{db}}}
25
+
26
+ %-------------------------------------------------------------------------
27
+ % Capacidades
28
+ %-------------------------------------------------------------------------
29
+ \newcommand{\COX}{C_{\mathrm{ox}}}
30
+ \newcommand{\CS}{C_{\mathrm{S}}}
31
+ \newcommand{\CB}{C_{\mathrm{B}}}
32
+
33
+ \newcommand{\RMS}{\mathrm{rms}}
34
+ \newcommand{\BW}{\mathrm{BW}}
35
+ \newcommand{\DS}{$\Delta\Sigma$ }
36
+ \newcommand{\MAX}{\mathrm{max}}
37
+ \newcommand{\SNRMAX}{\mathrm{SNR}_{\mathrm{max}}}
38
+ \newcommand{\SLEW}{\textit{slew-rate}}
39
+ %-------------------------------------------------------------------------
40
+ % Tempo e Frequência
41
+ %-------------------------------------------------------------------------
42
+ \newcommand{\TCLK}{T_{\mathrm{CLK}}}
43
+ \newcommand{\TN}{T_{\mathrm{N}}}
44
+ \newcommand{\TS}{T_{\mathrm{s}}}
45
+ \newcommand{\FS}{f_{\mathrm{s}}}
46
+ \newcommand{\FB}{f_{\mathrm{B}}}
47
+ \newcommand{\FN}{f_{\mathrm{N}}}
48
+ \newcommand{\FREF}{f_{\mathrm{REF}}}
49
+ \newcommand{\FDIV}{f_{\mathrm{DIV}}}
50
+ \newcommand{\FOUT}{f_{\mathrm{OUT}}}
51
+ \newcommand{\fout}{f_{\mathrm{out}}}
52
+ \newcommand{\fc}{f_{\mathrm{c}}}
53
+ \newcommand{\kbps}{\mathrm{kbps}}
54
+
55
+ %-------------------------------------------------------------------------
56
+ % Controlo de Espaços
57
+ %-------------------------------------------------------------------------
58
+ \newcommand{\EspacoPequeno}{\vskip2mm}
59
+ \newcommand{\EspacoMedio}{\vskip4mm}
60
+ \newcommand{\EspacoGrande}{\vskip8mm}
61
+ \newcommand{\EspacoExtra}{\vskip10mm}
62
+
63
+ %-------------------------------------------------------------------------
64
+ % Tensões
65
+ %-------------------------------------------------------------------------
66
+ \newcommand{\VOUT}{v_{\mathrm{out}}}
67
+ \newcommand{\VOUTMAX}{v_{\mathrm{out,max}}}
68
+ \newcommand{\VIN}{v_{\mathrm{in}}}
69
+ \newcommand{\VFB}{v_{\mathrm{FB}}}
70
+ \newcommand{\VFS}{V_{\mathrm{FS}}}
71
+ \newcommand{\VGS}{V_{\mathrm{GS}}}
72
+ \newcommand{\VP}{V_{\mathrm{P}}}
73
+ \newcommand{\VDS}{V_{\mathrm{DS}}}
74
+ \newcommand{\VDSON}{V_{\mathrm{DS,ON}}}
75
+ \newcommand{\vDSpico}{V_{\mathrm{DS,ON}}}
76
+ \newcommand{\VDD}{V_{\mathrm{DD}}}
77
+ \newcommand{\VSW}{v_{\mathrm{SW}}}
78
+ \newcommand{\VSWON}{V_{\mathrm{SW,ON}}}
79
+ \newcommand{\VCO}{\mathrm{VCO_{\mathrm{control}}}}
80
+ \newcommand{\VCP}{V_{\mathrm{CP}}}
81
+ \newcommand{\VRF}{V_{\mathrm{RF}}}
82
+ \newcommand{\GND}{\mathrm{GND}}
83
+
84
+ \newcommand{\VTN}{V_{\mathrm{TN}}}
85
+ \newcommand{\VTP}{V_{\mathrm{TP}}}
86
+ \newcommand{\VEFF}{V_{\mathrm{eff}}}
87
+ %-------------------------------------------------------------------------
88
+ % Correntes
89
+ %-------------------------------------------------------------------------
90
+ \newcommand{\ID}{I_{\mathrm{D}}}
91
+ \newcommand{\iD}{i_{\mathrm{D}}}
92
+ \newcommand{\iDpico}{i_{\mathrm{D,pico}}}
93
+ \newcommand{\IDC}{I_{\mathrm{DC}}}
94
+ \newcommand{\IDSS}{I_{\mathrm{DSS}}}
95
+ \newcommand{\ISW}{i_{\mathrm{SW}}}
96
+ \newcommand{\IRFMAX}{i_{\mathrm{RF,max}}}
97
+ \newcommand{\iRF}{i_{\mathrm{RF}}}
98
+ \newcommand{\IUP}{I_{\mathrm{UP}}}
99
+ \newcommand{\IDOWN}{I_{\mathrm{DOWN}}}
100
+ \newcommand{\IPUMP}{I_{\mathrm{pump}}}
101
+ \newcommand{\ICP}{I_{\mathrm{CP}}}
102
+ %-------------------------------------------------------------------------
103
+ % Potências
104
+ %-------------------------------------------------------------------------
105
+ \newcommand{\POUT}{P_{\mathrm{out}}}
106
+ \newcommand{\POUTMAX}{P_{\mathrm{out,max}}}
107
+ \newcommand{\PIN}{P_{\mathrm{in}}}
108
+ \newcommand{\PDC}{P_{\mathrm{DC}}}
109
+ %-------------------------------------------------------------------------
110
+ % Resistências
111
+ %-------------------------------------------------------------------------
112
+ \newcommand{\RL}{R_{\mathrm{L}}}
113
+ \newcommand{\ZIN}{Z_{\mathrm{in}}}
114
+ \newcommand{\ZOUT}{Z_{\mathrm{out}}}
115
+ \newcommand{\ZL}{Z_{\mathrm{L}}}
Disserta/303/247/303/243o/Acessorios/UTAD.JPG ADDED
Dissertação/Acessorios/datasheet.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2590d00bd44a1e5df767e64521a35b7165a0845bcb2fac99c8a2f555e0bb756
3
+ size 1272082
Dissertação/Cap0/MyAbstract.aux ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \relax
2
+ \providecommand\hyper@newdestlabel[2]{}
3
+ \@setckpt{Cap0/MyAbstract}{
4
+ \setcounter{page}{15}
5
+ \setcounter{equation}{0}
6
+ \setcounter{enumi}{0}
7
+ \setcounter{enumii}{0}
8
+ \setcounter{enumiii}{0}
9
+ \setcounter{enumiv}{0}
10
+ \setcounter{footnote}{0}
11
+ \setcounter{mpfootnote}{0}
12
+ \setcounter{part}{0}
13
+ \setcounter{chapter}{0}
14
+ \setcounter{section}{0}
15
+ \setcounter{subsection}{0}
16
+ \setcounter{subsubsection}{0}
17
+ \setcounter{paragraph}{0}
18
+ \setcounter{subparagraph}{0}
19
+ \setcounter{figure}{0}
20
+ \setcounter{table}{0}
21
+ \setcounter{parentequation}{0}
22
+ \setcounter{NAT@ctr}{0}
23
+ \setcounter{subfigure}{0}
24
+ \setcounter{lofdepth}{1}
25
+ \setcounter{subtable}{0}
26
+ \setcounter{lotdepth}{1}
27
+ \setcounter{LT@tables}{0}
28
+ \setcounter{LT@chunks}{0}
29
+ \setcounter{tcbbreakpart}{0}
30
+ \setcounter{tcblayer}{0}
31
+ \setcounter{tcolorbox@number}{0}
32
+ \setcounter{tcbrastercolumn}{0}
33
+ \setcounter{tcbrasterrow}{0}
34
+ \setcounter{tcbrasternum}{0}
35
+ \setcounter{tcbraster}{0}
36
+ \setcounter{nlinenum}{0}
37
+ \setcounter{r@tfl@t}{0}
38
+ \setcounter{float@type}{16}
39
+ \setcounter{FBl@b}{0}
40
+ \setcounter{FRobj}{0}
41
+ \setcounter{FRsobj}{0}
42
+ \setcounter{FBcnt}{0}
43
+ \setcounter{ALG@line}{0}
44
+ \setcounter{ALG@rem}{0}
45
+ \setcounter{ALG@nested}{0}
46
+ \setcounter{ALG@Lnr}{2}
47
+ \setcounter{ALG@blocknr}{10}
48
+ \setcounter{ALG@storecount}{0}
49
+ \setcounter{ALG@tmpcounter}{0}
50
+ \setcounter{AM@survey}{0}
51
+ \setcounter{Item}{0}
52
+ \setcounter{Hfootnote}{0}
53
+ \setcounter{bookmark@seq@number}{2}
54
+ \setcounter{code}{0}
55
+ \setcounter{FBLTpage}{0}
56
+ \setcounter{section@level}{0}
57
+ }
Dissertação/Cap0/MyAbstract.tex ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ %-----------------------------------------------------------------------------------------------------------------
2
+ % Abstract - Resumo em Inglês
3
+ %-----------------------------------------------------------------------------------------------------------------
4
+
5
+ \begin{center}
6
+ \large{Automatic Grapevine Variety Classification using Deep Learning}
7
+
8
+ \vskip5mm
9
+ \normalsize{\textit{Gabriel Antonio Pereira dos Santos Carneiro}}
10
+
11
+ \vskip5mm
12
+ \small{Submitted to the University of Trás-os-Montes and Alto Douro \\
13
+ in partial fulfillment of the requirements for the degree of \\
14
+ Master in Computer Science and Engineering}
15
+ \end{center}
16
+
17
+ \textbf{Abstract ---}
18
+ Wine is one the most important products from Portugal, being the grapevine variety very important to ensure uniqueness, authenticity and classification. In the Douro Demarcated Region, only certain grapevine varieties are allowed, implying the need for an identification mechanism.
19
+
20
+ Ampeography remains one of the most accurate ways to identify grapevine species. However, the ampelographs, professionals that use visual analysis of the grapvines' phenotypic characteristics to classify it, are disappearing. In this situation, one possible solution for ampelographs scarcity can be deep learning models.
21
+
22
+ In this study is presented the study of using deep learning models to grapevine species identification from images acquired in-field. Ten experiments were conducted, so that pre-trained models in ImageNet were fine-tuned. In these experiments were analyzed: the impact of different fine-tuning configurations; the use of rough segmentation as a pre-processing tool; changes that happened when the Focal Loss is employed; and the architectures Xception, ResNet-101, MobileNetV2 and EfficientNet. The metrics score F1 and accuracy, and Explainable Artificial Intelligence approaches Grad-CAM, Grad-CAM++ and LIME were applied in order to evaluate the results.
23
+
24
+ As a result, the EfficientNet was the better architecture, in terms of computation cost and metrics, achieving 0.94 of accuracy and F1 Score, outperforming the state-of-the-art's accuracy by 16.7\%. Furthermore, it was found that: the choice of the layers that will be trained in the fine-tuning directly affects the features that the model look to take decisions; the Focal Loss decrease the performance of the model, however it look at fewer background pixels when taking the decision; the use of the segmented dataset decreased the model performance, but it also looked to fewer background pixels to take decisions; and, in this application, LIME was appropriate to compare different architectures, while the Grad-CAM to compare different hyper-parameters in the same architecture.
25
+
26
+ \textbf{Key Words:} deep learning, grapevine species identification, segmentation, explainable artificial intelligence
27
+
28
+ %-----------------------------------------------------------------------------------------------------------------
Dissertação/Cap0/MyAgradecimentos.aux ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \relax
2
+ \providecommand\hyper@newdestlabel[2]{}
3
+ \@writefile{toc}{\contentsline {chapter}{Agradecimentos}{xv}{chapter*.4}\protected@file@percent }
4
+ \@setckpt{Cap0/MyAgradecimentos}{
5
+ \setcounter{page}{16}
6
+ \setcounter{equation}{0}
7
+ \setcounter{enumi}{0}
8
+ \setcounter{enumii}{0}
9
+ \setcounter{enumiii}{0}
10
+ \setcounter{enumiv}{0}
11
+ \setcounter{footnote}{0}
12
+ \setcounter{mpfootnote}{0}
13
+ \setcounter{part}{0}
14
+ \setcounter{chapter}{0}
15
+ \setcounter{section}{0}
16
+ \setcounter{subsection}{0}
17
+ \setcounter{subsubsection}{0}
18
+ \setcounter{paragraph}{0}
19
+ \setcounter{subparagraph}{0}
20
+ \setcounter{figure}{0}
21
+ \setcounter{table}{0}
22
+ \setcounter{parentequation}{0}
23
+ \setcounter{NAT@ctr}{0}
24
+ \setcounter{subfigure}{0}
25
+ \setcounter{lofdepth}{1}
26
+ \setcounter{subtable}{0}
27
+ \setcounter{lotdepth}{1}
28
+ \setcounter{LT@tables}{0}
29
+ \setcounter{LT@chunks}{0}
30
+ \setcounter{tcbbreakpart}{0}
31
+ \setcounter{tcblayer}{0}
32
+ \setcounter{tcolorbox@number}{0}
33
+ \setcounter{tcbrastercolumn}{0}
34
+ \setcounter{tcbrasterrow}{0}
35
+ \setcounter{tcbrasternum}{0}
36
+ \setcounter{tcbraster}{0}
37
+ \setcounter{nlinenum}{0}
38
+ \setcounter{r@tfl@t}{0}
39
+ \setcounter{float@type}{16}
40
+ \setcounter{FBl@b}{0}
41
+ \setcounter{FRobj}{0}
42
+ \setcounter{FRsobj}{0}
43
+ \setcounter{FBcnt}{0}
44
+ \setcounter{ALG@line}{0}
45
+ \setcounter{ALG@rem}{0}
46
+ \setcounter{ALG@nested}{0}
47
+ \setcounter{ALG@Lnr}{2}
48
+ \setcounter{ALG@blocknr}{10}
49
+ \setcounter{ALG@storecount}{0}
50
+ \setcounter{ALG@tmpcounter}{0}
51
+ \setcounter{AM@survey}{0}
52
+ \setcounter{Item}{0}
53
+ \setcounter{Hfootnote}{0}
54
+ \setcounter{bookmark@seq@number}{3}
55
+ \setcounter{code}{0}
56
+ \setcounter{FBLTpage}{0}
57
+ \setcounter{section@level}{0}
58
+ }
Dissertação/Cap0/MyAgradecimentos.tex ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ %-----------------------------------------------------------------------------------------------------------------
2
+ \prefacesection{Agradecimentos}
3
+ %-----------------------------------------------------------------------------------------------------------------
4
+ Agradeço à minha mãe, Maria da Conceição Pereira dos Santos, por ter me dado vida e todo o apoio para a elaboração deste estudo.
5
+
6
+ Como brasileiro nascido no século 20, agradeço a Deus.
7
+
8
+ E, por último, meu muito obrigado aos meus orientadores António e Joaquim e ao meu ex coordenador de bolsa, Roberto, por possibilitarem que eu conseguisse alcançar tal feito, disponibilizando conselhos, ensinamentos e compreensão.
9
+
10
+
11
+ \EspacoMedio
12
+ \EspacoMedio
13
+ \EspacoMedio
14
+ \noindent UTAD, \hfill Gabriel Antonio Pereira dos Santos Carneiro \\ Vila Real, 05 of October of 2021
15
+ %-----------------------------------------------------------------------------------------------------------------
Dissertação/Cap0/MyGlossario.aux ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \relax
2
+ \providecommand\hyper@newdestlabel[2]{}
3
+ \gdef \LT@i {\LT@entry
4
+ {1}{97.35826pt}\LT@entry
5
+ {1}{324.9803pt}}
6
+ \@writefile{toc}{\contentsline {chapter}{List of abbreviations}{xxi}{chapter*.10}\protected@file@percent }
7
+ \gdef \FBLTpage@i {\gdef\flrow@LTlastpage{2}}
8
+ \@setckpt{Cap0/MyGlossario}{
9
+ \setcounter{page}{23}
10
+ \setcounter{equation}{0}
11
+ \setcounter{enumi}{0}
12
+ \setcounter{enumii}{0}
13
+ \setcounter{enumiii}{0}
14
+ \setcounter{enumiv}{0}
15
+ \setcounter{footnote}{0}
16
+ \setcounter{mpfootnote}{0}
17
+ \setcounter{part}{0}
18
+ \setcounter{chapter}{0}
19
+ \setcounter{section}{0}
20
+ \setcounter{subsection}{0}
21
+ \setcounter{subsubsection}{0}
22
+ \setcounter{paragraph}{0}
23
+ \setcounter{subparagraph}{0}
24
+ \setcounter{figure}{0}
25
+ \setcounter{table}{1}
26
+ \setcounter{parentequation}{0}
27
+ \setcounter{NAT@ctr}{0}
28
+ \setcounter{subfigure}{0}
29
+ \setcounter{lofdepth}{1}
30
+ \setcounter{subtable}{0}
31
+ \setcounter{lotdepth}{1}
32
+ \setcounter{LT@tables}{1}
33
+ \setcounter{LT@chunks}{5}
34
+ \setcounter{tcbbreakpart}{0}
35
+ \setcounter{tcblayer}{0}
36
+ \setcounter{tcolorbox@number}{0}
37
+ \setcounter{tcbrastercolumn}{0}
38
+ \setcounter{tcbrasterrow}{0}
39
+ \setcounter{tcbrasternum}{0}
40
+ \setcounter{tcbraster}{0}
41
+ \setcounter{nlinenum}{0}
42
+ \setcounter{r@tfl@t}{0}
43
+ \setcounter{float@type}{16}
44
+ \setcounter{FBl@b}{0}
45
+ \setcounter{FRobj}{0}
46
+ \setcounter{FRsobj}{0}
47
+ \setcounter{FBcnt}{0}
48
+ \setcounter{ALG@line}{0}
49
+ \setcounter{ALG@rem}{0}
50
+ \setcounter{ALG@nested}{0}
51
+ \setcounter{ALG@Lnr}{2}
52
+ \setcounter{ALG@blocknr}{10}
53
+ \setcounter{ALG@storecount}{0}
54
+ \setcounter{ALG@tmpcounter}{0}
55
+ \setcounter{AM@survey}{0}
56
+ \setcounter{Item}{0}
57
+ \setcounter{Hfootnote}{0}
58
+ \setcounter{bookmark@seq@number}{7}
59
+ \setcounter{code}{0}
60
+ \setcounter{FBLTpage}{3}
61
+ \setcounter{section@level}{0}
62
+ }
Dissertação/Cap0/MyGlossario.tex ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ %-----------------------------------------------------------------------------------------------------------------
2
+ % Glossário de termos, lista de acrónimos e lista de abreviaturas
3
+ %-----------------------------------------------------------------------------------------------------------------
4
+ \prefacesection{List of abbreviations}
5
+ {
6
+ \setlinespacing{1.33}
7
+ \def\baselinestretch{1.15}
8
+ \renewcommand{\arraystretch}{1.1}
9
+ \setlength{\arrayrulewidth}{0.2mm}%
10
+ %-----------------------------------------------------------------------------------------------------------------
11
+ %\section*{Glossário de termos}
12
+ %-----------------------------------------------------------------------------------------------------------------
13
+ %\vskip10mm
14
+
15
+ %-----------------------------------------------------------------------------------------------------------------
16
+ %\section*{List of abbreviations}
17
+ %-----------------------------------------------------------------------------------------------------------------
18
+
19
+ \begin{longtable}[c]{p{3cm} p{11cm}}
20
+ \textbf{Abbreviation} & \textbf{Expansion} \\
21
+ \endfirsthead
22
+ \textbf{Abbreviation} & \textbf{Expansion} \\
23
+ \endhead
24
+ \endfoot
25
+ \endlastfoot\\
26
+ AI & Artificial Intelligence \\
27
+ CD & Códega \\
28
+ CM & Confusion Matrix \\
29
+ CNN & Convolutional Neural Network \\
30
+ DDR & Douro Demarcated Region \\
31
+ DL & Deep Learning \\
32
+ FPS & Frames per Second \\
33
+ Grad-CAM & Gradient-weighted Class Activation Mapping\\
34
+ HSV & Hue, Saturation, Value \\
35
+ km & Kilometers \\
36
+ LIME & Local Interpretable Model-Agnostic Explanations \\
37
+ LR & Learning Rate \\
38
+ MF & Malvasia Fina \\
39
+ MG & Moscatel Galego \\
40
+ ML & Machine Learning \\
41
+ MP & Malvasia Preta \\
42
+ MR & Malvasia Rei \\
43
+ MT & Mourisco Tinto \\
44
+ NN & Neural Network \\
45
+ RAM & Random Access Memory \\
46
+ ReLU & Rectified Linear Units \\
47
+ RG & Rabigato \\
48
+ RGB & Red, Green and Blue \\
49
+ TA & Tinta Amarela \\
50
+ TC & Tinto Cão \\
51
+ TB & Tinta Barroca \\
52
+ TN & Touriga Nacional \\
53
+ TR & Tinta Roriz \\
54
+ XAI & Explanable Artificial Inteligence \\
55
+ %RGB & \emph{Direct Current} (corrente contínua) \\
56
+
57
+ %MOSFET & \textit {Metal-Oxide-Semiconductor Field-Effect Transistor} \\
58
+
59
+ \end{longtable}
60
+
61
+ \vskip20mm
62
+ %-----------------------------------------------------------------------------------------------------------------
63
+
64
+ %-----------------------------------------------------------------------------------------------------------------
65
+
66
+
67
+
68
+ %-----------------------------------------------------------------------------------------------------------------
Dissertação/Cap0/MyResumo.aux ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \relax
2
+ \providecommand\hyper@newdestlabel[2]{}
3
+ \@setckpt{Cap0/MyResumo}{
4
+ \setcounter{page}{13}
5
+ \setcounter{equation}{0}
6
+ \setcounter{enumi}{0}
7
+ \setcounter{enumii}{0}
8
+ \setcounter{enumiii}{0}
9
+ \setcounter{enumiv}{0}
10
+ \setcounter{footnote}{0}
11
+ \setcounter{mpfootnote}{0}
12
+ \setcounter{part}{0}
13
+ \setcounter{chapter}{0}
14
+ \setcounter{section}{0}
15
+ \setcounter{subsection}{0}
16
+ \setcounter{subsubsection}{0}
17
+ \setcounter{paragraph}{0}
18
+ \setcounter{subparagraph}{0}
19
+ \setcounter{figure}{0}
20
+ \setcounter{table}{0}
21
+ \setcounter{parentequation}{0}
22
+ \setcounter{NAT@ctr}{0}
23
+ \setcounter{subfigure}{0}
24
+ \setcounter{lofdepth}{1}
25
+ \setcounter{subtable}{0}
26
+ \setcounter{lotdepth}{1}
27
+ \setcounter{LT@tables}{0}
28
+ \setcounter{LT@chunks}{0}
29
+ \setcounter{tcbbreakpart}{0}
30
+ \setcounter{tcblayer}{0}
31
+ \setcounter{tcolorbox@number}{0}
32
+ \setcounter{tcbrastercolumn}{0}
33
+ \setcounter{tcbrasterrow}{0}
34
+ \setcounter{tcbrasternum}{0}
35
+ \setcounter{tcbraster}{0}
36
+ \setcounter{nlinenum}{0}
37
+ \setcounter{r@tfl@t}{0}
38
+ \setcounter{float@type}{16}
39
+ \setcounter{FBl@b}{0}
40
+ \setcounter{FRobj}{0}
41
+ \setcounter{FRsobj}{0}
42
+ \setcounter{FBcnt}{0}
43
+ \setcounter{ALG@line}{0}
44
+ \setcounter{ALG@rem}{0}
45
+ \setcounter{ALG@nested}{0}
46
+ \setcounter{ALG@Lnr}{2}
47
+ \setcounter{ALG@blocknr}{10}
48
+ \setcounter{ALG@storecount}{0}
49
+ \setcounter{ALG@tmpcounter}{0}
50
+ \setcounter{AM@survey}{0}
51
+ \setcounter{Item}{0}
52
+ \setcounter{Hfootnote}{0}
53
+ \setcounter{bookmark@seq@number}{1}
54
+ \setcounter{code}{0}
55
+ \setcounter{FBLTpage}{0}
56
+ \setcounter{section@level}{0}
57
+ }
Dissertação/Cap0/MyResumo.tex ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ %-----------------------------------------------------------------------------------------------------------------
2
+ % Resumo em Português
3
+ %-----------------------------------------------------------------------------------------------------------------
4
+
5
+
6
+ \begin{center}
7
+ \large{Classificação Automática de Castas de Uva utilizando Deep Learning}
8
+
9
+ \vskip5mm
10
+ \normalsize{\textit{Gabriel Antonio Pereira dos Santos Carneiro}}
11
+
12
+
13
+ \vskip5mm
14
+ \small{Submetido na Universidade de Trás-os-Montes e Alto Douro \\
15
+ para o preenchimento dos requisitos parciais para obtenção do grau de \\
16
+ Mestre em Engenharia Informática}
17
+ \end{center}
18
+
19
+ \textbf{Resumo ---}
20
+ O vinho é um dos produtos mais importantes produzidos em Portugal, sendo a casta da uva um fator decisivo para garantir singularidade, autenticidade e qualidade do produto. Na Região Demarcada do Douro, somente algumas castas são permitidas, implicando na necessidade de um mecanismo de identificação.
21
+
22
+ A ampelografia é uma das formas mais acuradas de identificar castas de uva. Entretanto, os ampelógrafos, profissionais que usam análise visual das característica fenotípicas das vinhas para classificar as castas, estão desaparecendo. Perante esta situação, os métodos baseados em \textit{Deep Learning} para identificação automática de castas a partir de imagens se tornam uma alternativa para lidar com a escassez de tais profissionais.
23
+
24
+ Nesta dissertação é apresentado um estudo da utilização de modelos de \textit{Deep Learning} para identificação automática de castas de uva a partir de imagens adquiridas no campo. Dez experimentos foram conduzidos, de maneira que modelos pré-treinados no ImageNet foram retreinados para tal tarefa. Nesses experimentos foram analisados: o impacto de diferentes configurações de \textit{fine-tuning}; a utilização de segmentação rudimentar como pré-processamento; as modificações trazidas pelo uso da Focal Loss; e o uso das arquiteturas pré-treinadas Xception, ResNet-101, MobileNetV2 e EfficientNet. Para avaliar os resultados, as métricas Escore F1 e Acurácia foram utilizadas, além das ferramentas de \textit{Explainable Artifical Intelligence} LIME, Grad-CAM, e Grad-CAM++.
25
+
26
+ Como resultado a EfficientNet foi a melhor arquitetura em termos custo computacional e métricas, atingindo Escore F1 e acurácia de 0.94, superando a acurácia do estado-da-arte em 16.7\%. Além disso, foi possível constatar que: a escolha das camadas treinadas no fine-tuning afeta diretamente as regiões das imagens que os modelos usam para tomar decisões; o uso da Focal Loss diminui a performance do modelo em termos de métricas, mas leva-o a considerar regiões mais importantes para tomar decisões; segmentar o dataset antes da classificação diminui a performance, entretanto faz com que menos \textit{background} seja levado em consideração quando o modelo toma decisões; e, nesta aplicação, o LIME gerou explicações mais apropriadas para comparar diferentes arquiteturas, enquanto as do Grad-CAM/Grad-CAM++ forem mais úteis na comparação de diferentes hiperparâmetros no treinamento de uma mesma arquitetura.
27
+
28
+ \textbf{Palavras Chave:} \textit{deep learning}, identificação de castas, segmentação, \textit{explainable artificial intelligence}
29
+
30
+ %-----------------------------------------------------------------------------------------------------------------
Dissertação/Cap1/CAP1.aux ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \relax
2
+ \providecommand\hyper@newdestlabel[2]{}
3
+ \citation{Giacosa2019}
4
+ \citation{TheInternationalOrganisationofVineandWine2018}
5
+ \citation{TheInternationalOrganisationofVineandWine2018}
6
+ \citation{TheInternationalOrganisationofVineandWine2018}
7
+ \citation{TheInternationalOrganisationofVineandWine2018}
8
+ \citation{Caldeira2018}
9
+ \citation{Valverde2021}
10
+ \@writefile{toc}{\contentsline {chapter}{\numberline {1}Introduction}{1}{chapter.1}\protected@file@percent }
11
+ \@writefile{lof}{\addvspace {10\p@ }}
12
+ \@writefile{lot}{\addvspace {10\p@ }}
13
+ \newlabel{Ch:Introducao}{{1}{1}{Introduction}{chapter.1}{}}
14
+ \@writefile{lof}{\contentsline {figure}{\numberline {1.1}{\ignorespaces Evolution of world wine consumption in 2020. Source: \cite {TheInternationalOrganisationofVineandWine2018}}}{1}{figure.1.1}\protected@file@percent }
15
+ \newlabel{fig:wine-consuption}{{1.1}{1}{Evolution of world wine consumption in 2020. Source: \cite {TheInternationalOrganisationofVineandWine2018}}{figure.1.1}{}}
16
+ \citation{MONCAYO2016185}
17
+ \citation{Chitwood2014}
18
+ \citation{galet1971precis}
19
+ \citation{ampelografia,Pavek2003}
20
+ \citation{Chitwood2014}
21
+ \citation{galet1971precis}
22
+ \@writefile{lof}{\contentsline {figure}{\numberline {1.2}{\ignorespaces Evolution of world wine production in 2020. Source: \cite {TheInternationalOrganisationofVineandWine2018}}}{2}{figure.1.2}\protected@file@percent }
23
+ \newlabel{fig:wine-production}{{1.2}{2}{Evolution of world wine production in 2020. Source: \cite {TheInternationalOrganisationofVineandWine2018}}{figure.1.2}{}}
24
+ \@writefile{toc}{\contentsline {section}{\numberline {1.1}Motivation}{2}{section.1.1}\protected@file@percent }
25
+ \citation{ampelografia,tassie2010vine}
26
+ \citation{P2004}
27
+ \citation{P2004}
28
+ \citation{Garcia-Garcia2017,Calo1996,Cunha2009}
29
+ \citation{deng2009imagenet}
30
+ \citation{DBLP:journals/corr/LinMBHPRDZ14}
31
+ \citation{Xiong2020}
32
+ \citation{Neto2006}
33
+ \citation{BarredoArrieta2020}
34
+ \@writefile{toc}{\contentsline {section}{\numberline {1.2}Objectives}{4}{section.1.2}\protected@file@percent }
35
+ \@writefile{toc}{\contentsline {section}{\numberline {1.3}Contributions}{5}{section.1.3}\protected@file@percent }
36
+ \@writefile{toc}{\contentsline {section}{\numberline {1.4}Publications}{5}{section.1.4}\protected@file@percent }
37
+ \@writefile{toc}{\contentsline {section}{\numberline {1.5}Document Structure}{6}{section.1.5}\protected@file@percent }
38
+ \@setckpt{Cap1/CAP1}{
39
+ \setcounter{page}{7}
40
+ \setcounter{equation}{0}
41
+ \setcounter{enumi}{3}
42
+ \setcounter{enumii}{0}
43
+ \setcounter{enumiii}{0}
44
+ \setcounter{enumiv}{0}
45
+ \setcounter{footnote}{2}
46
+ \setcounter{mpfootnote}{0}
47
+ \setcounter{part}{0}
48
+ \setcounter{chapter}{1}
49
+ \setcounter{section}{5}
50
+ \setcounter{subsection}{0}
51
+ \setcounter{subsubsection}{0}
52
+ \setcounter{paragraph}{0}
53
+ \setcounter{subparagraph}{0}
54
+ \setcounter{figure}{2}
55
+ \setcounter{table}{0}
56
+ \setcounter{parentequation}{0}
57
+ \setcounter{NAT@ctr}{0}
58
+ \setcounter{subfigure}{0}
59
+ \setcounter{lofdepth}{1}
60
+ \setcounter{subtable}{0}
61
+ \setcounter{lotdepth}{1}
62
+ \setcounter{LT@tables}{1}
63
+ \setcounter{LT@chunks}{5}
64
+ \setcounter{tcbbreakpart}{1}
65
+ \setcounter{tcblayer}{0}
66
+ \setcounter{tcolorbox@number}{1}
67
+ \setcounter{tcbrastercolumn}{0}
68
+ \setcounter{tcbrasterrow}{0}
69
+ \setcounter{tcbrasternum}{0}
70
+ \setcounter{tcbraster}{0}
71
+ \setcounter{nlinenum}{0}
72
+ \setcounter{r@tfl@t}{0}
73
+ \setcounter{float@type}{16}
74
+ \setcounter{FBl@b}{0}
75
+ \setcounter{FRobj}{0}
76
+ \setcounter{FRsobj}{0}
77
+ \setcounter{FBcnt}{0}
78
+ \setcounter{ALG@line}{0}
79
+ \setcounter{ALG@rem}{0}
80
+ \setcounter{ALG@nested}{0}
81
+ \setcounter{ALG@Lnr}{2}
82
+ \setcounter{ALG@blocknr}{10}
83
+ \setcounter{ALG@storecount}{0}
84
+ \setcounter{ALG@tmpcounter}{0}
85
+ \setcounter{AM@survey}{0}
86
+ \setcounter{Item}{7}
87
+ \setcounter{Hfootnote}{2}
88
+ \setcounter{bookmark@seq@number}{13}
89
+ \setcounter{code}{0}
90
+ \setcounter{FBLTpage}{3}
91
+ \setcounter{section@level}{0}
92
+ }
Dissertação/Cap1/CAP1.tex ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ %----------------------------------------------------------
2
+
3
+ %-------------------------------------------------------
4
+ \chapter{Introduction}
5
+ \label{Ch:Introducao}
6
+ %---------------------------------------------------------------------------------------------------------------
7
+
8
+ The wine is one of the most popular agro-food in the four corners of the continents \citep{Giacosa2019}, being the European Union responsible for 48\% of the world consumption (almost 112 million hectoliters) and 63\% for the world production (almost 165 million hectoliters) in 2019 \citep{TheInternationalOrganisationofVineandWine2018}. The world wine's consumption and production over the years can be seen, respectively, in the Figures \ref{fig:wine-consuption} and \ref{fig:wine-production}. In terms of value, the wine's marketshare moved almost 29.6 billion euros, despite the Covid-19 pandemic crises in 2020 \citep{TheInternationalOrganisationofVineandWine2018}.
9
+
10
+ \begin{figure}[htb!]
11
+ \centering
12
+ \includegraphics[width=1\textwidth]{Cap1/wine-consuption-over-the-years.png}
13
+ \caption{Evolution of world wine consumption in 2020. Source: \cite{TheInternationalOrganisationofVineandWine2018}}
14
+ \label{fig:wine-consuption}
15
+ \end{figure}
16
+
17
+ \begin{figure}[htb!]
18
+ \centering
19
+ \includegraphics[width=1\textwidth]{Cap1/wine-production-over-the-years.png}
20
+ \caption{Evolution of world wine production in 2020. Source: \cite{TheInternationalOrganisationofVineandWine2018}}
21
+ \label{fig:wine-production}
22
+ \end{figure}
23
+
24
+
25
+ In Portugal, the 11\textsuperscript{th} producer in the world, the gross value added of the wine industry was 2.3 times higher than the average of the total of other companies \citep{Caldeira2018} with exportation valued in almost 798 millions of euros \citep{Valverde2021}. More than for the economy, the wine is important for the Portuguese's culture and society.
26
+
27
+ The Douro wine region in Portugal, located around the Douro river valley, is known internationally for the famous port wine. To assure high-quality standards, the wines produced in this region are carefully controlled by the Port and Douro Wines Institute, I. P., that has a function the control, certify, promote and protect the designations of origin \textit{“Porto”} and \textit{“Douro”} in the Douro Demarcated Region (DDR).
28
+
29
+
30
+ \section{Motivation}
31
+
32
+ To maintain wine’s quality, uniqueness and exclusivity only specific grapevine varieties are authorized in the DDR. Grape varieties are among the most relevant factors in the wine production chain \citep{MONCAYO2016185} as they directly influence the authenticity and classification of the wine. Therefore, identifying the different grapevine varieties is crucial for control activities and quality assurance, as well for regulating production.
33
+
34
+ Ampelography, defined by \cite{Chitwood2014} as "the science of phenotypically distinguishing grapevines", is one of the most accurate ways to identify grape varieties through visual analysis. Its authoritative reference is \textit{Precis D'Ampelographie Pratique} \citep{galet1971precis}, however nowadays it uses well defined official descriptors supplied into plant material identity to grape identification \citep{ampelografia, Pavek2003}. Its concepts was confirmed by \cite{Chitwood2014}, that using molecular genetics related the \citep{galet1971precis}'s description with the new morphological Elliptical Fourier descriptors and generalized Procrustes analysis of leaf venation landmarks.
35
+
36
+
37
+ Like any visual analysis task, the ampelography depends on those doing it, making the process subjective. Furthermore, it can be exposed to interference from environmental, cultural and genetic conditions, which may introduce uncertainty into the identification process \citep{ampelografia, tassie2010vine}. Finally, like any other human-based task, this can be time-consuming, error-prone and ampelographs, professionals who do this work, are scarce \citep{P2004}.
38
+
39
+
40
+ Nowadays, molecular markers have been used in grapevines species identification \citep{P2004}. This approach handle with the subjectiveness and the environment influence, however it must be
41
+ complemented by ampelography, due to leaves' characteristics that only could be evaluated in the field \citep{Garcia-Garcia2017, Calo1996, Cunha2009}. Furthermore, identifying grape species aiming to control and regulate the production involves several molecular analysis, elevating the costs and needed time, when compared with the ampelography.
42
+
43
+ A great alternative to handle with ampelographs scarcity or the needed of molecular analysis are the Deep Learning (DL) methods. They emerged in 2012 and now represent state-of-the-art in most image classification challenges, reaching performances equal or better than humans. Several DL-classification architectures were proposed by the literature and the use of the learning of pre-trained models in bigger generalist datasets, e.g. ImageNet \citep{deng2009imagenet} or Microsoft COCO \citep{DBLP:journals/corr/LinMBHPRDZ14}, has been enabling their application in many tasks.
44
+
45
+ Despite the notable improvement in the computer vision tasks' performance, several labeled images are required to use the DL-classification models. Furthermore, to build a system capable of to identify grapevine species precisely like ampelographs, images acquired in-field should be used to train them. This needed increases the complexity of the problem, since this type of image contains a variety of information that is not related to the grapevine species classification goal, which can lead DL-models to error \citep{Xiong2020}.
46
+
47
+ In this situation, segmentation becomes an effective way to separate background regions from the grapevine regions in images acquired in-field. It can be applied to the images as a pre-processing tool, turning the classification more reliable. %Besides that, an accurate leaf segmentation can be employed in automatic Elliptical Fourier descriptors calculus, e.g. \cite{Neto2006}, or in the Procrustes analysis, which can enrich the available data used by DL-models for the grapevine species identification.
48
+
49
+ Explainable Artificial Intelligence approaches can be applied to ensure that models are not using background regions to make decisions. Such techniques can provide explanations about a prediction, so that these explanations can be used to correct the DL-models deficiencies \citep{BarredoArrieta2020}.
50
+
51
+ \section{Objectives}
52
+
53
+ This study aims to identify different grapevine species using RGB leaf-centered images acquired in-field.
54
+
55
+ The specific objectives are enumerated below:
56
+ \begin{enumerate}
57
+ \item Explore different DL-classification architectures to do grapevine species identification;
58
+ \item Verify the impact of the different fine-tuning configurations in the grapevine species classification, aiming to optimize their identification;
59
+ \item Verify the impact of utilizing segmentation as image pre-processing tool in the grapevine species classification, aiming to decrease the utilization of not related information by the DL-models when it is taking decisions;
60
+ \item Evaluate and analyze the coherence of trained models using Explainable Artificial Intelligence (XAI) approaches;
61
+ \end{enumerate}
62
+
63
+
64
+ \section{Contributions}
65
+
66
+ This dissertation presents the following contributions:
67
+ \begin{itemize}
68
+ \item A predictive model to the grapevine species classification, build upon comparison between different pre-trained architectures;
69
+
70
+ \item An analysis of the impact of the fine-tuning in the features that the models look to take decisions;
71
+
72
+ \item A predictive model to segment grapevine leaves from background regions in images acquired in-field;
73
+
74
+ \item A analysis of the trained models predictions using different XAI approaches (LIME, Grad-CAM and Grad-CAM++), summarizing the most used regions by the models to take decisions and enumerating the pros and cons in using each one of them;
75
+
76
+ \item A contribution in the library Image Segmentation Keras\footnote{https://github.com/divamgupta/image-segmentation-keras}, providing control of fine-tuning to models' training, a control to the size of encoder and decoder used in the SegNet model and compatibility with TensorFlow 2.0 or most newer versions. The code is available in a git repository \footnote{https://github.com/gabri14el/image-segmentation-keras}.
77
+
78
+ \end{itemize}
79
+
80
+
81
+ \section{Publications}
82
+
83
+ This dissertation involved the following publications during its period:
84
+
85
+ \begin{enumerate}
86
+ \item G. Carneiro, L. Pádua, J. J. Sousa, E. Peres, R. Morais and A. Cunha, "Grapevine Variety Identification Through Grapevine Leaf Images Acquired in Natural Environment", 2021 IEEE International Geoscience and Remote Sensing Symposium IGARSS, 2021, pp. 7055-7058, doi: 10.1109/IGARSS47720.2021.9555141.
87
+ \item G. Carneiro, A. Ferreira, R. Morais, J. J. Sousa and A. Cunha, "Analyzing the Fine Tuning’s impact in Grapvine Classification", International Conference on ENTERprise Information Systems, CENTERIS, 2021.
88
+ \item G. Carneiro, R. Magalhães, Alexandre Neto, J. J. Sousa and A. Cunha, "Grapevine Segmentation in RGB Images using Deep Learning", International Conference on ENTERprise Information Systems, CENTERIS, 2021.
89
+ \end{enumerate}
90
+
91
+ \section{Document Structure}
92
+
93
+ This document is divided into 7 chapters. In each chapter there is a introductory note with the purpose of the chapter, specifying its content.
94
+
95
+ The Chapter \ref{Ch:DLforCV} discribes the DL's basic concepts, the training process and some DL-architectures for classification and segmentation. The Explanable Artificial Intelligence is defined and the methods Grad-CAM, Grad-CAM++ and LIME are explained in the Chapter \ref{Ch:XAI}. In the Chapter \ref{Ch:Literatura} a literature review about grapevine species identification using DL-methods is presented. The study's methods are presented in the Chapter \ref{Ch:Methods}, the results in the Chapter \ref{Ch:results} and the conclusions and future work in the Chapter \ref{Ch:conclusion}.
96
+
97
+
Disserta/303/247/303/243o/Cap1/Figure.jpg ADDED
Disserta/303/247/303/243o/Cap1/wine-consuption-over-the-years.png ADDED
Disserta/303/247/303/243o/Cap1/wine-production-over-the-years.png ADDED
Dissertação/Cap2/CAP2.aux ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \relax
2
+ \providecommand\hyper@newdestlabel[2]{}
3
+ \citation{Xie2017}
4
+ \@writefile{toc}{\contentsline {chapter}{\numberline {6}Results and Discussions}{71}{chapter.6}\protected@file@percent }
5
+ \@writefile{lof}{\addvspace {10\p@ }}
6
+ \@writefile{lot}{\addvspace {10\p@ }}
7
+ \newlabel{Ch:results}{{6}{71}{Results and Discussions}{chapter.6}{}}
8
+ \@writefile{toc}{\contentsline {section}{\numberline {6.1}Segmentation}{71}{section.6.1}\protected@file@percent }
9
+ \newlabel{sec:res_segmentation}{{6.1}{71}{Segmentation}{section.6.1}{}}
10
+ \newlabel{tab:seg_results}{{6.1}{71}{Segmentation}{table.6.1}{}}
11
+ \citation{DBLP:journals/corr/BadrinarayananK15}
12
+ \@writefile{lot}{\contentsline {table}{\numberline {6.1}{\ignorespaces Results of the segmentation experiments conducted in this study.}}{72}{table.6.1}\protected@file@percent }
13
+ \@writefile{lof}{\contentsline {figure}{\numberline {6.1}{\ignorespaces Results for the segmentation for the a) SEXP1, b) SEXP2, c) SEXP3 and d) SEXP4. The results shown that in situations with high presence of leaves and low presence of background the performance of the models is poor.}}{73}{figure.6.1}\protected@file@percent }
14
+ \newlabel{fig:res-seg-case1}{{6.1}{73}{Results for the segmentation for the a) SEXP1, b) SEXP2, c) SEXP3 and d) SEXP4. The results shown that in situations with high presence of leaves and low presence of background the performance of the models is poor}{figure.6.1}{}}
15
+ \@writefile{lof}{\contentsline {figure}{\numberline {6.2}{\ignorespaces Results for the segmentation by the SEXP1, SEXP2, SEXP3 and SEXP4 for 3 samples (a, b and c). The results shown that the models have good performance if background regions are big and well separated (a and b), however in cases when the background regions are spread the performance can be poor.}}{74}{figure.6.2}\protected@file@percent }
16
+ \newlabel{fig:res-seg-case2}{{6.2}{74}{Results for the segmentation by the SEXP1, SEXP2, SEXP3 and SEXP4 for 3 samples (a, b and c). The results shown that the models have good performance if background regions are big and well separated (a and b), however in cases when the background regions are spread the performance can be poor}{figure.6.2}{}}
17
+ \@writefile{lof}{\contentsline {figure}{\numberline {6.3}{\ignorespaces Results for the segmentation by the SEXP1, SEXP2, SEXP3 and SEXP4 for 5 samples (a, b, c, d and e). Smooth backgrounds with similar colors with the leaves region (a, b, and c) are poor segmented by the models. However, smooth backgrounds with different color and textures (ground and sky) are well segmented.}}{75}{figure.6.3}\protected@file@percent }
18
+ \newlabel{fig:res-seg-case3}{{6.3}{75}{Results for the segmentation by the SEXP1, SEXP2, SEXP3 and SEXP4 for 5 samples (a, b, c, d and e). Smooth backgrounds with similar colors with the leaves region (a, b, and c) are poor segmented by the models. However, smooth backgrounds with different color and textures (ground and sky) are well segmented}{figure.6.3}{}}
19
+ \@writefile{toc}{\contentsline {section}{\numberline {6.2}Impact of Different Fine-Tuning Configurations}{75}{section.6.2}\protected@file@percent }
20
+ \newlabel{sec:res_finetuning}{{6.2}{75}{Impact of Different Fine-Tuning Configurations}{section.6.2}{}}
21
+ \@writefile{lof}{\contentsline {figure}{\numberline {6.4}{\ignorespaces Confusion Matrix for the experiments CEXP1, CEXP2, CEXP3 and CEXP4. The changes between CMs are marked with colored squares.}}{76}{figure.6.4}\protected@file@percent }
22
+ \newlabel{fig:res-finetuning-impact}{{6.4}{76}{Confusion Matrix for the experiments CEXP1, CEXP2, CEXP3 and CEXP4. The changes between CMs are marked with colored squares}{figure.6.4}{}}
23
+ \@writefile{lof}{\contentsline {figure}{\numberline {6.5}{\ignorespaces a) Comparison between heatmaps generated by CEXP1 and CEXP2 for the same image; b) Comparison between heatmaps generated by CEXP1 and CEXP3 for two different images.}}{77}{figure.6.5}\protected@file@percent }
24
+ \newlabel{fig:res-finetuning-impact-gradcam}{{6.5}{77}{a) Comparison between heatmaps generated by CEXP1 and CEXP2 for the same image; b) Comparison between heatmaps generated by CEXP1 and CEXP3 for two different images}{figure.6.5}{}}
25
+ \@writefile{lof}{\contentsline {figure}{\numberline {6.6}{\ignorespaces Heatmaps and scores examples generated for CEXP1, CEXP2, CEXP3 and CEXP4 for the same samples.}}{79}{figure.6.6}\protected@file@percent }
26
+ \newlabel{fig:res-finetuning-impact-gradcam2}{{6.6}{79}{Heatmaps and scores examples generated for CEXP1, CEXP2, CEXP3 and CEXP4 for the same samples}{figure.6.6}{}}
27
+ \@writefile{toc}{\contentsline {section}{\numberline {6.3}Different Hyper-Parameters in the Grapevine Species Classification}{80}{section.6.3}\protected@file@percent }
28
+ \newlabel{sec:res_classification}{{6.3}{80}{Different Hyper-Parameters in the Grapevine Species Classification}{section.6.3}{}}
29
+ \@writefile{lot}{\contentsline {table}{\numberline {6.2}{\ignorespaces Results of the classification experiments conducted in this study.}}{80}{table.6.2}\protected@file@percent }
30
+ \newlabel{tab:classification_results}{{6.3}{80}{Different Hyper-Parameters in the Grapevine Species Classification}{table.6.2}{}}
31
+ \@writefile{lof}{\contentsline {figure}{\numberline {6.7}{\ignorespaces Comparison between the CEXP5 and CEXP6's confusion matrices.}}{81}{figure.6.7}\protected@file@percent }
32
+ \newlabel{fig:res-clas-case1}{{6.7}{81}{Comparison between the CEXP5 and CEXP6's confusion matrices}{figure.6.7}{}}
33
+ \@writefile{lof}{\contentsline {figure}{\numberline {6.8}{\ignorespaces Comparison between different XAI approaches to three different samples. The explanations was obtained to the ground-truth class: a) MG, b) RG, c) RG and d) RG.}}{82}{figure.6.8}\protected@file@percent }
34
+ \newlabel{fig:xai-baseline}{{6.8}{82}{Comparison between different XAI approaches to three different samples. The explanations was obtained to the ground-truth class: a) MG, b) RG, c) RG and d) RG}{figure.6.8}{}}
35
+ \citation{Mukhoti2020}
36
+ \citation{Mukhoti2020}
37
+ \@writefile{toc}{\contentsline {subsection}{\numberline {6.3.1}Impact of Using Focal Loss in the classification of the Unbalaced Dataset}{83}{subsection.6.3.1}\protected@file@percent }
38
+ \@writefile{lof}{\contentsline {figure}{\numberline {6.9}{\ignorespaces Comparison between different XAI approaches to three different samples for CEXP5 and CEXP6. The explanations was obtained to the ground-truth class: a) CD, b) TC and c) TN. The results to each sample for the CEXP5 are marked with a red form and for the CEXP6 with a blue form.}}{84}{figure.6.9}\protected@file@percent }
39
+ \newlabel{fig:xai-focalloss}{{6.9}{84}{Comparison between different XAI approaches to three different samples for CEXP5 and CEXP6. The explanations was obtained to the ground-truth class: a) CD, b) TC and c) TN. The results to each sample for the CEXP5 are marked with a red form and for the CEXP6 with a blue form}{figure.6.9}{}}
40
+ \citation{Mukhoti2020}
41
+ \@writefile{toc}{\contentsline {subsection}{\numberline {6.3.2}Segmentation as a Pre-processing Tool}{85}{subsection.6.3.2}\protected@file@percent }
42
+ \@writefile{lof}{\contentsline {figure}{\numberline {6.10}{\ignorespaces Comparison between the CEXP5 and CEXP7's confusion matrices.}}{86}{figure.6.10}\protected@file@percent }
43
+ \newlabel{fig:res-clas-case2}{{6.10}{86}{Comparison between the CEXP5 and CEXP7's confusion matrices}{figure.6.10}{}}
44
+ \@writefile{lof}{\contentsline {figure}{\numberline {6.11}{\ignorespaces Comparison between different XAI approaches to three different samples for CEXP5 and CEXP7. The explanations was obtained to the ground-truth class: a) CD, b) CD and c) MG. The results to each sample for the CEXP5 are marked with a red form and for the CEXP7 with a gray form. Since the segmentation was did using a black region to hide the background regions, aiming to get a better view the pixels that not contributed positively to the ground-truth class was colored with white in the CEXP7's generated heatmaps.}}{87}{figure.6.11}\protected@file@percent }
45
+ \newlabel{fig:xai-segmentation}{{6.11}{87}{Comparison between different XAI approaches to three different samples for CEXP5 and CEXP7. The explanations was obtained to the ground-truth class: a) CD, b) CD and c) MG. The results to each sample for the CEXP5 are marked with a red form and for the CEXP7 with a gray form. Since the segmentation was did using a black region to hide the background regions, aiming to get a better view the pixels that not contributed positively to the ground-truth class was colored with white in the CEXP7's generated heatmaps}{figure.6.11}{}}
46
+ \@writefile{toc}{\contentsline {subsection}{\numberline {6.3.3}Different Architectures}{88}{subsection.6.3.3}\protected@file@percent }
47
+ \@writefile{lof}{\contentsline {figure}{\numberline {6.12}{\ignorespaces Comparison between the CEXP5, CEXP8, CEXP9 and CEXP10's confusion matrices.}}{89}{figure.6.12}\protected@file@percent }
48
+ \newlabel{fig:res-clas-case3}{{6.12}{89}{Comparison between the CEXP5, CEXP8, CEXP9 and CEXP10's confusion matrices}{figure.6.12}{}}
49
+ \@writefile{lof}{\contentsline {figure}{\numberline {6.13}{\ignorespaces Comparison between different XAI approaches to three different samples (same of the fist three in the Fig. \ref {fig:xai-baseline}) for CEXP8, CEXP9 and CEXP10. The explanations was obtained to the ground-truth class: a) MG, b) MG and c) RG. The results to each sample for the CEXP8 are marked with a purple form, CEXP9 with a pink form and for the CEXP7 with a brown form.}}{90}{figure.6.13}\protected@file@percent }
50
+ \newlabel{fig:xai-architectures}{{6.13}{90}{Comparison between different XAI approaches to three different samples (same of the fist three in the Fig. \ref {fig:xai-baseline}) for CEXP8, CEXP9 and CEXP10. The explanations was obtained to the ground-truth class: a) MG, b) MG and c) RG. The results to each sample for the CEXP8 are marked with a purple form, CEXP9 with a pink form and for the CEXP7 with a brown form}{figure.6.13}{}}
51
+ \citation{ADAO2019}
52
+ \citation{Skrabanek2021}
53
+ \citation{Pereira2019}
54
+ \citation{Pereira2019}
55
+ \@writefile{toc}{\contentsline {section}{\numberline {6.4}Discussion}{91}{section.6.4}\protected@file@percent }
56
+ \@writefile{toc}{\contentsline {paragraph}{Overall Metrics.}{91}{paragraph*.11}\protected@file@percent }
57
+ \citation{Skrabanek2021}
58
+ \citation{ADAO2019}
59
+ \citation{Pereira2019}
60
+ \newlabel{tab:overall_result}{{6.4}{92}{Overall Metrics}{paragraph*.11}{}}
61
+ \@writefile{lot}{\contentsline {table}{\numberline {6.3}{\ignorespaces Comparison between the proposed methods and the state-of-the-art in grapevine species identification using Deep Learning.}}{92}{table.6.3}\protected@file@percent }
62
+ \@writefile{toc}{\contentsline {paragraph}{Fine-Tuning's Layer Configuration.}{92}{paragraph*.12}\protected@file@percent }
63
+ \@writefile{toc}{\contentsline {paragraph}{Focal Loss.}{92}{paragraph*.13}\protected@file@percent }
64
+ \citation{Woo2018}
65
+ \citation{Woo2018}
66
+ \citation{Schramowski2020}
67
+ \citation{Schramowski2020}
68
+ \@writefile{toc}{\contentsline {paragraph}{Segmentation.}{93}{paragraph*.14}\protected@file@percent }
69
+ \@writefile{toc}{\contentsline {paragraph}{Different Architectures.}{93}{paragraph*.15}\protected@file@percent }
70
+ \citation{gradcamplus}
71
+ \citation{Howard2019}
72
+ \@setckpt{Cap2/CAP2}{
73
+ \setcounter{page}{96}
74
+ \setcounter{equation}{0}
75
+ \setcounter{enumi}{4}
76
+ \setcounter{enumii}{0}
77
+ \setcounter{enumiii}{0}
78
+ \setcounter{enumiv}{0}
79
+ \setcounter{footnote}{0}
80
+ \setcounter{mpfootnote}{0}
81
+ \setcounter{part}{0}
82
+ \setcounter{chapter}{6}
83
+ \setcounter{section}{4}
84
+ \setcounter{subsection}{0}
85
+ \setcounter{subsubsection}{0}
86
+ \setcounter{paragraph}{0}
87
+ \setcounter{subparagraph}{0}
88
+ \setcounter{figure}{13}
89
+ \setcounter{table}{3}
90
+ \setcounter{parentequation}{0}
91
+ \setcounter{NAT@ctr}{0}
92
+ \setcounter{subfigure}{0}
93
+ \setcounter{lofdepth}{1}
94
+ \setcounter{subtable}{0}
95
+ \setcounter{lotdepth}{1}
96
+ \setcounter{LT@tables}{1}
97
+ \setcounter{LT@chunks}{5}
98
+ \setcounter{tcbbreakpart}{1}
99
+ \setcounter{tcblayer}{0}
100
+ \setcounter{tcolorbox@number}{6}
101
+ \setcounter{tcbrastercolumn}{0}
102
+ \setcounter{tcbrasterrow}{0}
103
+ \setcounter{tcbrasternum}{0}
104
+ \setcounter{tcbraster}{0}
105
+ \setcounter{nlinenum}{0}
106
+ \setcounter{r@tfl@t}{0}
107
+ \setcounter{float@type}{16}
108
+ \setcounter{FBl@b}{0}
109
+ \setcounter{FRobj}{0}
110
+ \setcounter{FRsobj}{0}
111
+ \setcounter{FBcnt}{0}
112
+ \setcounter{ALG@line}{0}
113
+ \setcounter{ALG@rem}{0}
114
+ \setcounter{ALG@nested}{0}
115
+ \setcounter{ALG@Lnr}{2}
116
+ \setcounter{ALG@blocknr}{10}
117
+ \setcounter{ALG@storecount}{0}
118
+ \setcounter{ALG@tmpcounter}{0}
119
+ \setcounter{AM@survey}{0}
120
+ \setcounter{Item}{17}
121
+ \setcounter{Hfootnote}{7}
122
+ \setcounter{bookmark@seq@number}{38}
123
+ \setcounter{code}{0}
124
+ \setcounter{FBLTpage}{3}
125
+ \setcounter{section@level}{0}
126
+ }
Dissertação/Cap2/CAP2.tex ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ %-----------------------------------------------------------------------------------------------------------------
2
+ \chapter{Results and Discussions}
3
+ \label{Ch:results}
4
+
5
+ The present chapter will describe the results of the experiments that were conducted to classify grapevine species. In the Sec. \ref{sec:res_segmentation} are presented the results of four experiments aiming to separate grapevine leaves areas from the background using the U-Net and SegNet architectures. The impact of the fine-tuning in the grapevine species identification is analyzed in the Sec. \ref{sec:res_finetuning} as from four different layers' configuration of the Xception model. In the Sec. \ref{sec:res_classification} are presented the impacts of the use of the Focal Loss, segmentation as a pre-processing tool and different architectures in the grapevine species identification. An analysis of the use of the LIME, Grad-CAM and Grad-CAM++ is also presented in this section.
6
+
7
+
8
+ %-------------------------------------------------------------------------------------------------------------
9
+
10
+ \section{Segmentation}\label{sec:res_segmentation}
11
+
12
+ \input{Cap2/segmentation}
13
+
14
+ \section{Impact of Different Fine-Tuning Configurations}\label{sec:res_finetuning}
15
+
16
+ The results achieved by the experiments CEXP1, CEXP2, CEXP3 and CEXP4 were 0.92 of F1-Score and 0.92 of accuracy. The confusion matrix (CM) for each experiment can be seen in Fig. \ref{fig:res-finetuning-impact}.
17
+
18
+ \begin{figure}[htb!]
19
+ \centering
20
+ \includegraphics[width=0.8\textwidth]{Cap2/res-finetuning-impact.png}
21
+ \caption{Confusion Matrix for the experiments CEXP1, CEXP2, CEXP3 and CEXP4. The changes between CMs are marked with colored squares.}
22
+ \label{fig:res-finetuning-impact}
23
+ \end{figure}
24
+
25
+ Comparing the results of predictions using the Grad-CAM, one can see that applying fine-tuning modifies the obtained results, against the equality of the metrics.
26
+
27
+ The first case was the change of the class in predicted images in the class MF, highlighted with red squares in Fig. \ref{fig:res-finetuning-impact} (a) and (b)). In Fig. \ref{fig:res-finetuning-impact-gradcam} (a), one can see the Grad-CAM heatmaps and scores for both classes. Note that the score is distributed because these classes are the top-2 greatest scores in both experiments. Furthermore, the pixels' contribution was mostly inverted, meaning that the most pixels that contributed for the class TA in the CEXP1 contributed to the class CD in CEXP2.
28
+
29
+ \begin{figure}[htb!]
30
+ \centering
31
+ \includegraphics[width=0.7\textwidth]{Cap2/res-finetuning-impact-gradcam.png}
32
+ \caption{a) Comparison between heatmaps generated by CEXP1 and CEXP2 for the same image; b) Comparison between heatmaps generated by CEXP1 and CEXP3 for two different images.}
33
+ \label{fig:res-finetuning-impact-gradcam}
34
+ \end{figure}
35
+
36
+ In the CEXP3's CM, one can see that the model decreases the error for the MF class (green squares in Fig.\ref{fig:res-finetuning-impact} (a) and (c)) in the predictions. However, it increases the error for the MP class (grey squares in Figure \ref{fig:res-finetuning-impact} (a) and (c)), keeping the accuracy and F1 score. In the first image shown in Fig. \ref{fig:res-finetuning-impact-gradcam} (b), one can note that applying fine-tuning to the blocks 13-14 increase the pixels' contribution, leading the model to consider a greater region, although in the second sample, in Fig. \ref{fig:res-finetuning-impact-gradcam} (b), one can see that the model used background region to make the decision. Note that in both cases, the given scores to involved classes are small (considering that a big score is greater than 0.7) and in the second image, the fine-tuning decreased it almost by 50\%.
37
+
38
+ Two other interesting cases were identified during the heatmaps' analysis. There are cases, e.g. Fig. \ref{fig:res-finetuning-impact-gradcam2} (a), where the model increases pixels' contribution and classes' scores when fine-tuning only the last blocks, however the pixels' contribution decrease when blocks more on the middle are also trained.
39
+
40
+ \begin{figure}[htb!]
41
+ \centering
42
+ \includegraphics[width=1\textwidth]{Cap2/res-finetuning-impact-gradcam2.png}
43
+ \caption{Heatmaps and scores examples generated for CEXP1, CEXP2, CEXP3 and CEXP4 for the same samples.}
44
+ \label{fig:res-finetuning-impact-gradcam2}
45
+ \end{figure}
46
+
47
+ Note that in all Fig. \ref{fig:res-finetuning-impact-gradcam2} examples, although CEXP3 increases the pixels' contribution, it does not imply an improvement in the prediction. In Fig. \ref{fig:res-finetuning-impact-gradcam2} (d), despite some pixels contribute to the prediction, the score of the class was decreased, if compared with the results for the same sample in other experiments, which implies that the model is more indecisive. In some cases, this can lead the model to error.
48
+
49
+ In the CEXP3 third sample, one can see in Fig. \ref{fig:res-finetuning-impact-gradcam2} (c), there are a lot of background-pixels contributing to the decision. Comparing the result with CEXP1 for the same sample, it is clear that some background pixels contributed more than leaves' pixels. This situation implies directly in the confidence of the model, leading users to trust less in the results.
50
+
51
+ So, although more pixels contribute to classification, training the blocks more in the middle of the model does not imply an improvement in the predictions made by it in the grapevine leaves classification.
52
+
53
+ One can observe there are samples where exists few changes in the region of contribution when comparing the model acting as feature extractor (CEXP1) and the model totally fine-tuned (CEXP2), e.g. Fig. \ref{fig:res-finetuning-impact-gradcam2} (c) and (d).
54
+
55
+ In general terms, the regions that contributed the most (more red in the heatmaps) are still similar in all samples. The model acting as feature extractor and fully fine-tuned obtain similar results in terms of metrics and pixel contribution. Fine-tuning from the middle blocks lead the model to look at more features in the image, however not necessarily increase the model performance or turn it more reliable in this application.
56
+
57
+ \input{Cap2/classification}
58
+
59
+
60
+ %-----------------------------------------------------------------------------------------------------------------
Dissertação/Cap2/classification.tex ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \section{Different Hyper-Parameters in the Grapevine Species Classification}\label{sec:res_classification}
2
+
3
+ The results of the classification experiments CEXP5, CEXP6, CEXP7, CEXP8, CEXP9, and CEXP10 are summarized in Table \ref{tab:classification_results}. All the models achieved accuracy and F1-Score between 0.90 and 0.94. The CEXP5 will be the baseline model in this entire discussion, in order to compare the use of the Focal Loss, the roughly segmentation as pre-processing and different architectures in the classification. The confusion matrix for the CEXP5 can be seen in the Fig. \ref{fig:res-clas-case1} (a).
4
+
5
+
6
+ \begin{table}[htp!]
7
+ \begin{tabular}{@{}lll@{}}
8
+ \toprule
9
+ Alias & Accuracy & F1 Score \\ \midrule
10
+ CEXP5 (baseline) & \textbf{0.94} & \textbf{0.94} \\
11
+ CEXP6 & 0.90 & 0.90 \\
12
+ CEXP7 & 0.93 & 0.92 \\
13
+ CEXP8 & 0.93 & 0.93 \\
14
+ CEXP9 & \textbf{0.94} & \textbf{0.94} \\
15
+ CEXP10 & \textbf{0.94} & \textbf{0.94} \\ \bottomrule
16
+ \end{tabular}\caption{Results of the classification experiments conducted in this study.}
17
+ \end{table}\label{tab:classification_results}
18
+
19
+
20
+ \begin{figure}[htb!]
21
+ \centering
22
+ \includegraphics[width=1\textwidth]{Cap2/classification/cm-5-6.png}
23
+ \caption{Comparison between the CEXP5 and CEXP6's confusion matrices.}
24
+ \label{fig:res-clas-case1}
25
+ \end{figure}
26
+
27
+ The XAI reports using LIME, Grad-CAM and Grad-CAM++ showed that leaf parts are most considered in the decisions made by CEXP5 (see Fig. \ref{fig:xai-baseline}) with regions around of the leaf's center being highlighted. Two situations were highlighted when looking at such XAI reports.
28
+
29
+ The first is when the stem gives a big contribution to the model take the decision, which can represents a bias in the dataset. As one can see in Fig. \ref{fig:xai-baseline} (b), for LIME, Grad-CAM and Grad-CAM++ the it was in a region that contributed positively to the ground-truth class.
30
+
31
+ In the second, in images from the back of the leaves more background pixels are used to take decisions, if compared to images from the front of the leaf. This happened because of the small quantity of images from back of leaves in the dataset, since only the CDS12 contains this type of samples. Thus, is expected that the models to be confused when classifying images from the back of leaves and the XAI approaches confirmed such expectation as one can see in the Fig. \ref{fig:xai-baseline} (b) and (c).
32
+
33
+ \begin{figure}[htb!]
34
+ \centering
35
+ \includegraphics[width=1\textwidth]{Cap2/classification/xai-baseline.png}
36
+ \caption{Comparison between different XAI approaches to three different samples. The explanations was obtained to the ground-truth class: a) MG, b) RG, c) RG and d) RG.}
37
+ \label{fig:xai-baseline}
38
+ \end{figure}
39
+
40
+ Comparing the results from the different methods, it was observed that in most of the cases there is a overlap between the regions that positively contributed returned by LIME, Grad-CAM and Grad-CAM++. Howerver, in few cases the more-heat region in the heatmaps returned by Grad-CAM does not appear as a contributing superpixel in LIME's result.
41
+
42
+ In general terms the Grad-CAM++ returns higher contributing regions if compared with Grad-CAM. In few cases the more-heat region in the Grad-CAM++ is different from the more-heat region in Grad-CAM (see the most heat parts (colored with red in the heatmaps) Fig. \ref{fig:xai-baseline} (b)).
43
+
44
+ In cases where more than one leaf appear in the image, the Grad-CAM++ tends to return heatmaps the covers more than one leaf. This can be a benefit of using Grad-CAM++, however most the cases in this application will be focused in one leaf. So, for this application, Grad-CAM can be more appropriate than Grad-CAM++, due to its less sensitivity.
45
+
46
+ It is important to report that it were founded cases where LIME or Grad-CAM do not returned any region that contributed positively (e.g. \ref{fig:xai-baseline} (d)).
47
+
48
+ \subsection{Impact of Using Focal Loss in the classification of the Unbalaced Dataset}
49
+
50
+ As stated in the Section \ref{sec:training-process} the Focal Loss is a loss function employed when there is a big unbalance in the dataset used to training neural networks. Since the CDS5 is unbalanced, such technique was applied aiming to improve the performance of the CEXP5. As showed in the Table \ref{tab:classification_results} the application of the Focal Loss with its defaults parameters caused a decrease in the performance of the CEXP5 if the metrics were observed.
51
+
52
+ A comparison between the CEXP5 and CEXP6's CMs can be observed in the Fig. \ref{fig:res-clas-case1}. All the classes in the CEXP6 achieved a poor performance when compared with the CEXP5, except the RG class, that increased the true positive in one sample. In our opinion, this happened because the difference between samples is not enough to justify the use of the Focal Loss, so that the Cross Entropy without weighing achieved better performance in the metrics. The Focal Loss is mainly applied in cases when the difference between classes are bigger than 10x.
53
+
54
+ \begin{figure}[htp!]
55
+ \centering
56
+ \includegraphics[width=1\textwidth]{Cap2/classification/xai-focalloss.png}
57
+ \caption{Comparison between different XAI approaches to three different samples for CEXP5 and CEXP6. The explanations was obtained to the ground-truth class: a) CD, b) TC and c) TN. The results to each sample for the CEXP5 are marked with a red form and for the CEXP6 with a blue form.}
58
+ \label{fig:xai-focalloss}
59
+ \end{figure}
60
+
61
+ Looking at XAI explanations, if compared with the CEXP5, the use of Focal Loss increased the pixels' contribution in the heatmaps generated by the Grad-CAM and Grad-CAM++ (see Fig. \ref{fig:xai-focalloss} (a) and (b)), which means that the models returned bigger contributing regions. This regions are similar to the baseline model, however in few cases there are changes in the more-heat region (see Fig. \ref{fig:xai-focalloss} (b) and (c)). The most of the cases that the Grad-CAM does not returned heatmaps in the baseline model, in this experiment, it returned.
62
+
63
+
64
+ The increase of the contributing regions in Grad-CAM can be connected with the effect of regularization of the weights of the network during training brought by Focal Loss. A recent study conducted by \cite{Mukhoti2020} used the Focal Loss to handle with miscalibration of multi-class classification models, in the sense that the probability values that they associate with the class labels they predict overestimate the likelihoods of those class labels being correct in the real world \citep{Mukhoti2020}. The authors concluded that models trained with focal loss are more calibrated than those trained with cross-entropy loss.
65
+
66
+ One of the experiments conducted in the \cite{Mukhoti2020}'s study was calculate the $L_2$ norm of the last linear activation's weights from the models. We calculate $L_2$ norm to CEXP5 and CEXP6 to the weights of the last fully connected layer before the models' output and we obtained, respectively, 22.94 and 22.77. Since the CEXP6 obtained $L_2$ norm less than the CEXP5, it corroborates our hypothesis that the Focal Loss calibrated the model during the training.
67
+
68
+ Despite few cases with notable differences in the LIME's results from CEXP6 to CEXP5, e.g. Fig. \ref{fig:xai-focalloss} (a), in most of the cases the results are very similar to the CEXP5, e.g. Fig. \ref{fig:xai-focalloss} (b) and (c).
69
+
70
+ \subsection{Segmentation as a Pre-processing Tool}
71
+
72
+ As one can see in the Table \ref{tab:classification_results}, the model trained with the segmented CDS5 does not achieve the better performance in the overall metrics, obtaining a result worse than the CEXP5. A comparison between its CM and CEXP5's CM can be observed in the Fig. \ref{fig:res-clas-case2}.
73
+
74
+ \begin{figure}[t]
75
+ \centering
76
+ \includegraphics[width=1\textwidth]{Cap2/classification/cm-5-7.png}
77
+ \caption{Comparison between the CEXP5 and CEXP7's confusion matrices.}
78
+ \label{fig:res-clas-case2}
79
+ \end{figure}
80
+
81
+ In general terms both models achieved a good performance (considering F1 score greater than 0.9 as a good performance ). The CEXP7 achieved better performance in the MG and RG classes, worse performance in the CD, TR, and TC classes and kept the performance for TN class.
82
+
83
+ Looking at the CEXP7's XAI reports (see Fig. \ref{fig:xai-segmentation}), it was observed that less background pixels were used by model to take the decision, when it was compared with the CEXP5.
84
+
85
+ \begin{figure}[p]
86
+ \centering
87
+ \includegraphics[width=0.9\textwidth]{Cap2/classification/xai-segmentation.png}
88
+ \caption{Comparison between different XAI approaches to three different samples for CEXP5 and CEXP7. The explanations was obtained to the ground-truth class: a) CD, b) CD and c) MG. The results to each sample for the CEXP5 are marked with a red form and for the CEXP7 with a gray form. Since the segmentation was done using a black region to hide the background regions, aiming to get a better view the pixels that not contributed positively to the ground-truth class was colored with white in the CEXP7's generated heatmaps.}
89
+ \label{fig:xai-segmentation}
90
+ \end{figure}
91
+
92
+ In this experiment the LIME returned more different contributing regions, if compared with the CEXP5 and CEXP6 (e.g. Fig. \ref{fig:xai-segmentation} (a)). In cases where a lot of background was removed from the image, it returned less background contribution in the superpixels (e.g. Fig. \ref{fig:xai-segmentation} (c)).
93
+
94
+ The size of the contributing regions obtained with Grad-CAM increased, if compared with the CEXP5. As well in the CEXP6, in some cases where Grad-CAM does not return heatmaps for CEXP5, for the CEXP7 it did (see e.g. Fig. \ref{fig:xai-segmentation} (b)). It was also observed that the heatmaps returned by Grad-CAM and Grad-CAM++ were more leaf-centered (see e.g. Fig. \ref{fig:xai-segmentation} (c))
95
+
96
+
97
+ It is important to highlight that the segmentation applied to dataset is not accurate. Furthermore, it was applied after the dataset be augmented. Despite the model does not achieved the better result in the overall metrics, it is clear in XAI reports that more reliable features are considered to take decisions, if compared with the CEXP5. Thus, an improvement in the segmentation accuracy aside of an application of the segmentation to the dataset before the data augmentation can improve both metrics performance and human-trust in this application.
98
+
99
+ \subsection{Different Architectures}
100
+
101
+ In the CEXP8, CEXP9 and CEXP10 our main objective was to verify the impact of use of another DL-architectures in the classification of grapevine species. As one can see in the Table \ref{tab:classification_results} the Xception, ResNet-101 and EfficientNetB3 achieved the same performance, outperforming the remaining experiments. A comparison between the CEXP5, CEXP8, CEXP9 and CEXP10's CMs can be seen in the Fig. \ref{fig:res-clas-case3}.
102
+
103
+ \begin{figure}[htp!]
104
+ \centering
105
+ \includegraphics[width=1\textwidth]{Cap2/classification/cm-5-7-8-9-10.png}
106
+ \caption{Comparison between the CEXP5, CEXP8, CEXP9 and CEXP10's confusion matrices.}
107
+ \label{fig:res-clas-case3}
108
+ \end{figure}
109
+
110
+ In fact, models that used the skip connection presented in the ResNet architecture achieved the best performance in terms of metrics, as well in several other DL-classification tasks. The CM for each model show that the difference between the four architectures is small. The worse performance was achieved by the MobileNetV2 architecture. However, it is important to highlight that the EfficientNetB3 represents the best result in terms of metrics and computation. The model achieved 0.94 of accuracy and F1-Score using approximately 12 million parameters. For comparison purposes, Xception has almost 23 million and ResNet-101 almost 44 million. Considering that in the ImageNet the EfficientNetB3 outperformed all the architecture explored in this study, a performance equal to or superior to the others was expected.
111
+
112
+ Looking at the XAI reports, the different architectures generated different contributing regions in both LIME and Grad-CAM/Grad-CAM++.
113
+
114
+ \begin{figure}[htp!]
115
+ \centering
116
+ \includegraphics[width=0.95\textwidth]{Cap2/classification/xai-architectures.png}
117
+ \caption{Comparison between different XAI approaches to three different samples (same of the first three in the Fig. \ref{fig:xai-baseline}) for CEXP8, CEXP9 and CEXP10. The explanations were obtained to the ground-truth class: a) MG, b) MG and c) RG. The results to each sample for the CEXP8 are marked with a purple form, CEXP9 with a pink form and for the CEXP7 with a brown form.}
118
+ \label{fig:xai-architectures}
119
+ \end{figure}
120
+
121
+ In the CEXP8, the MobileNetV2 experiment, the regions generated by LIME, in general terms, are similar to the generated by the CEXP5, the heatmaps generated by the Grad-CAM are small, generally. It was observed that there is an increase in the background pixels in the contributing regions.
122
+
123
+ In the CEXP9, the ResNet-101 expriment, the LIME generated also similar results to the CEXP5. It was observed that in the returned superpixels there are less background pixels. The heatmaps generated by the saliency maps approaches were very different from the baseline model.
124
+
125
+ There are a lot samples which heatmaps are not returned in both Grad-CAM and Grad-CAM++ in the CEXP9. The reason for this behavior is that the gradient of the referred class' score with respect to the last convolutional layer's activation maps in the ResNet-101 is zero for all pixels. Despite the depth of the ResNet, the model's convolutional part output shape is composed by 2048 activation maps of 10x10 pixels, which is bigger than the MobileNetV2 output, thus the size of the output was discarded as causative. The applied threshold was also eliminated and the behavior continued. Them, it was experimented the explanations for others convolutional layers (the first from the last convolutional block and the last from the penultimate convolutional block) and the method still not returning heatmaps to several samples.
126
+
127
+ In the CEXP10 the Grad-CAM generated bigger contributing regions with a big overlap between them and the heatmaps generated to the CEXP5, however it was observed that the more-heat regions changed. In this experiment all the samples obtained a heatmap from the Grad-CAM, different from the CEXP5 where some of them do not. It was also noted that the Grad-CAM++ is less sensible, generating small regions more leaf-centered.
128
+
129
+
130
+
131
+ \section{Discussion}
132
+
133
+ \paragraph{Overall Metrics.} Observing the results exposed in the Table \ref{tab:classification_results} one can see that Xception, ResNet-101 and EfficientNetB3 achieved the best overall performance with 0.94 of accuracy and F1 score. All of them were built upon the skip connections concept, firstly introduced in the ResNet architecture. In terms of computation and performance the best model was the EfficientNetB3.
134
+
135
+ In the Table \ref{tab:overall_result} one can see the bests results achieved by the proposed method and the state-of-the-art in grapevine species identification. It is important to highlight that the direct comparison is not possible, once the involved datasets are not public. The best accuracy is achieved by \cite{ADAO2019}, however this method used a controlled ambient to take the images used in the identification. \cite{Skrabanek2021} achieved accuracy if 98\%, but the method is focused in fruits images instead leaves images. \citep{Pereira2019} is the most similar work, classifying six grape species using RGB images from leaves. The proposed method outperformed the \citep{Pereira2019}. It is important to highlight that between the considered similar works, the dataset used to training the models in this approach is the bigger in terms of quantity of images.
136
+
137
+ \begin{table}[htp]
138
+ \begin{tabular}{@{}llll@{}}
139
+ \toprule
140
+ Publication & Images & Varieties & Accuracy (\%) \\ \midrule
141
+ \cite{Skrabanek2021} & 7200 in-field & 7 & 98.00 \\
142
+ \cite{ADAO2019} & 312 controlled & 6 & \textbf{100.00} \\
143
+ \cite{Pereira2019} & 224 in-field & 6 & 77.30 \\
144
+ Our method & 10596 in-field & 6 & 94.00 \\ \bottomrule
145
+ \end{tabular}\label{tab:overall_result}\caption{The proposed methods' result and the state-of-the-art in grapevine species identification using Deep Learning.}
146
+ \end{table}
147
+
148
+ \paragraph{Fine-Tuning's Layer Configuration.} The choice of the layers that will be trained in the fine-tuning directly affects the features that the model look to take decisions in this application. Using the Grad-CAM to explain predictions, it could be concluded that the Xception model acting as feature extractor and totally retrained achieved similar results in terms of metrics and contributing regions and fine-tuning only the last two blocks lead a increase in the contributing regions, however this not necessarily increase the model performance, once several background pixels start to compose such regions. Then, on account of small improvements in the contributing regions, which can lead the model to be more generalist, applying the fine-tuning in all the layers in the Xception's fine-tuning improve the identification of grapevine species.
149
+
150
+
151
+ \paragraph{Focal Loss.} In the CEXP6 was observed that the use of the Focal Loss instead Cross-Entropy deteriorate the Xception performance. However, the XAI explanations showed that the CEXP6 used less background information to take decisions. It was investigated and this behavior can be connected with the regularization exerted by the Focal Loss during training. To ensure this consequence more tests are needed.
152
+
153
+ \paragraph{Segmentation.} The same behavior was observed in the CEXP7. Despite the Xception model trained with the roughly segmented dataset to achieve a worse performance, when compared with the Xception trained without the roughly segmentation, the XAI approaches showed that the model look into more reliable regions when taking decision. Thus, a more accurate segmentation can improve performance and turns it more reliable.
154
+
155
+ Other techniques can be used to decrease the background pixels usage to take decision by the models. The concept of attention can be applied in the ConvNets employing the Convolutional Block Attention Module proposed by \cite{Woo2018}. This technique can be coupled to different architectures. \cite{Woo2018} showed in their work that the using of this module lead the models to look at more important features in general classification tasks using the Grad-CAM.
156
+
157
+ Explanatory active learning can be also explored in order to improve the human-trust in the model. In this technique a learner can query the user (or some other information source) to obtain desired outputs for the data \citep{Schramowski2020}. The learner should provide an explanation about the decision for the user. Thus, for our application, a learner is a model being trained, the user can be a ampelograph, or a segmented dataset and the explanations can be provided by LIME or Grad-CAM. More details about this technique can be found in the work proposed by \cite{Schramowski2020}.
158
+
159
+ \paragraph{Different Architectures.} The XAI approaches confirmed that the model mostly is looking to the leaf regions when taking decisions in all the experiments. The difference between the experiments allowed a comparison between the LIME, Grad-CAM and Grad-CAM++.
160
+
161
+ The class activation based approaches, Grad-CAM and Grad-CAM++, are strongly dependent of the layer that is chosen to extract the gradient. This dependency can turns the methods unappropriated to compare the contributing regions between different architectures, since the convolutional layers can be strongly different from each other. Another weakness in this dependency is the possibility that there is not a scores' variation in the output of a multi-classification model with respect to a specific layer in the model, the case that was related for the CEXP9 experiment. In this case, as stated in this work, the method can not generate heatmaps, once the derivative of the model output's scores with with respect to the convolutional layer's activations is zero.
162
+
163
+ The nonentity of the heat-maps in several samples in the CEXP9 need to be careful investigated. To the best of our knowledge, in the literature, none work related or investigate this behavior of Grad-CAM being used to obtain explanations about a fine-tuned ResNet-101 model.
164
+
165
+ LIME can be a more reliable approach to compare different architecture. It sees the model that it is explaining as a black-box, using surrogate models to approximate the behave of the model in a specific sample's neighborhood. So, it was expected that the explanations are more similar between different architectures trained in a specific dataset. However, it is important to ensure the stability of the generation of superpixels, so that the results are reproducible.
166
+
167
+ Grad-CAM is a good approach to explain the same architecture with different training hyper-parameters, as it could be observed in the comparison between the CEXP5 and CEXP6.
168
+
169
+ In the Grad-CAM++, compared to the Grad-CAM we observed that this methods is more sensible for this application. This means that bigger regions are returned if it be applied for the same model that Grad-CAM was. The use of partial derivatives to obtain the the importance of the maps, instead of using Global Average Pooling, also implies in changes in the more heat-regions.
170
+
171
+ This behavior is expected once \cite{gradcamplus} related that the heatmap's localization obtained with the Grad-CAM is not precise enough. However, since most of the cases are leaf-centered and there is not a huge difference between the classified classes, once all the samples are composed only by leaves, thus the Grad-CAM appear to be more appropriate to be used to explain decisions in this application than the Grad-CAM++.
172
+
173
+ It is important to highlight that it was observed that the Grad-CAM++ returned smaller contributing regions than the Grad-CAM in several samples in the CEXP10 and it does not work in the CEXP9, not returning heatmaps for all samples. In terms of architecture construction the only difference between EfficientNetB3 and the remaining architectures is the use of squeeze-and-excitation optimization. The Grad-CAM returned heatmaps successfully for all the samples in the CEXP10 can be related with this optimization. This technique increase the channels interdependence, giving to the model a adaptive way to define their individual relevance. Thus, this can reflect in the overall network, causing a increase in the variation of the output's scores with relation to the last convolutional layer in the architecture. One way to verify this affirmation is to fine-tuning a MobileNetV3 \citep{Howard2019}, which uses the same technique, and verify the heatmaps generated by the Grad-CAM.
174
+
175
+
176
+
177
+
178
+
179
+
180
+
Disserta/303/247/303/243o/Cap2/classification/cm-5-6.png ADDED
Disserta/303/247/303/243o/Cap2/classification/cm-5-7-8-9-10.png ADDED
Disserta/303/247/303/243o/Cap2/classification/cm-5-7.png ADDED
Dissertação/Cap2/classification/xai-architectures.png ADDED

Git LFS Details

  • SHA256: b2f5199941579f3a9fdfcd07dd79e0c07628836d9dbf0e7a0c0d4ac92d2fbce8
  • Pointer size: 132 Bytes
  • Size of remote file: 4.02 MB
Dissertação/Cap2/classification/xai-baseline.png ADDED

Git LFS Details

  • SHA256: ea1fbe283e2bb96e2543ca295c9acec9c077a5d5520f2e9da2ce145a008fd424
  • Pointer size: 132 Bytes
  • Size of remote file: 2.46 MB
Dissertação/Cap2/classification/xai-focalloss.png ADDED

Git LFS Details

  • SHA256: 1966b89220693fb1a2129fe291615b7c51b967b6549d98bdcf7a58196c231b30
  • Pointer size: 132 Bytes
  • Size of remote file: 3.18 MB
Dissertação/Cap2/classification/xai-segmentation.png ADDED

Git LFS Details

  • SHA256: 8469a832ac434529f1924c2304acf60715e6e413a3300a7ece5ee04d8c2fe27f
  • Pointer size: 132 Bytes
  • Size of remote file: 3.52 MB
Dissertação/Cap2/grad_cam.tex ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \section{Gradient-weighted Class Activation Mapping}
2
+
3
+ O Gradient-weighted Class Activation Mapping (Grad-CAM) é uma técnica proposta por \citet{DBLP:journals/corr/SelvarajuDVCPB16} que objetiva explicar como um modelo M, chegou a conclusão que uma imagem I pertence à classe C. Tal técnica é uma generalização da técnica Class Activation Mapping \citep{zhou2015learning}, podendo ser aplicada em várias famílias de ConvNets, sem fazer qualquer alteração em sua arquitetura.
4
+
5
+ Como resultado, o Grad-CAM exibe um mapa de calor, contendo as regiões da imagem I que contribuíram positivamente na sua classificação. Essa característica permite que as explicações sejam utilizadas para gerar confiança nas classificações feitas pelos modelo e no diagnóstico de classificações errôneas, permitindo análise de classificações incorretas, identificação do efeito do ruído contraditório e identificação de viés no dataset utilizado; \citep{DBLP:journals/corr/SelvarajuDVCPB16}.
6
+
7
+ Dada um modelo $M$, uma imagem $I$, uma camada convolucional $L$ e uma classe $C$, o método segue a seguinte abordagem \citep{DBLP:journals/corr/SelvarajuDVCPB16}:
8
+ \begin{enumerate}
9
+ \item Calcula-se o gradiente do escore da classe $C$, $y_{c}$, com respeito aos mapas de ativação $A^{k}$ de uma camada convolucional $L$. Ou seja, estamos calculando a variação de $y_{c}$ com relação a $A^{k}$, $\frac{\partial y_{c}}{\partial A^{k}}$, obtendo gradientes para cada um dos mapas de ativação de $L$.
10
+ \item Aplica-se a técnica de Global Average Pooling, sobre as dimensões de altura e largura, indexadas por $i$ e $j$ respetivamente, obtendo-se a importância do mapa de ativação $A^{k}$, $\omega_{k}^{c}$, sendo assim: $\omega_{k}^{c} = \frac{1}{Z}\sum\limits_{i}\sum\limits_{j}\frac{\partial y_{c}}{\partial A^{k}}$. Onde $Z$ representa a quantidade de pixeis $i*j$ presentes no mapa de ativação $A^{k}$, note que como os mapas de ativação pertencem à mesma camada convolucional, todos possuem o mesmo tamanho.
11
+ \item Faz-se uma combinação linear dos mapas de ativação $A^{k}$ com as importâncias calculadas $\omega_{k}^{c}$ e aplica-se ao seu resultado a função Rectified Linear Units (ReLU), pois apenas interessa para o mapa de calor os pixeis que tenham contribuído positivamente para a classificação. Ou seja, o mapa de calor $L_{Grad-CAM}^{c} = ReLU(\sum\limits_{k}\omega_{k}^{c}A^{k})$. Além disso, $L_{Grad-CAM}^{c}$ tem o mesmo tamanho dos mapas de ativação da camada $L$.
12
+
13
+ \end{enumerate}
14
+
15
+ Segundo \cite{gradcamplus}, apesar de gerar mapas de calor com detalhes sobre a classe prevista, úteis para explicar modelos profundos, o Grad-CAM apresenta dois problemas: lidar com localização de múltiplas ocorrências da mesma classe na imagem e a localização do mapa de calor ainda não ser tão precisa com respeito a cobrir a região da classe na imagem. Assim, os autores apresentaram nesse trabalho o Grad-Cam++, uma generalização do Grad-CAM.
16
+
17
+ O Grad-CAM++ substitui os pesos dados aos mapas de ativação $\omega_{k}^{c}$, baseando-se no fato que um gradiente positivo numa localização $(i,j)$ de um mapa de ativação $A^k$, implica que ao aumentar a intensidade do pixel $(i,j)$ o escore da classe $C$, $Y^c$, sofreria uma influência positiva. Dessa forma, \cite{gradcamplus} afirmam que uma combinação linear de derivadas parciais positivas, com relação a cada pixel num mapa de ativação $A^k$ capturaria a importância daquele mapa para a classe $C$. Portanto, utilizar essas derivadas, ao invés de utilizar o Global Average Pooling, garante que $\omega_{k}^{c}$ seja uma média ponderada dos gradientes pixeis. Logo, no Grad-CAM++, $\omega_{k}^{c} = \sum\limits_{i}\sum\limits_{j}\alpha_{ij}^{kc}.relu(\frac{\partial Y^c}{\partial A_{ij}^k})$, onde $\alpha_{ij}^{kc}$ são os coeficientes de ponderamento para o gradiente dos pixeis para a classe $C$ e mapa de ativação $A^k$ \citep{gradcamplus}. Matematicamente $\alpha_{ij}^{kc} = \frac{\frac{\partial^2Y^c}{(\partial A^{k}_{ij})^2}}{2\frac{\partial^2Y^c}{(\partial A^{k}_{ij})^2} + \sum\limits_{a}\sum\limits_{b} A^{k}_{ab} {\frac{\partial^3Y^c}{(\partial A^{k}_{ij})^3}}}$, onde $a,b$ são coordenadas de pixeis do mapa de ativiação $A^k$, assim como $i,j$.
Disserta/303/247/303/243o/Cap2/res-finetuning-impact-gradcam.png ADDED
Disserta/303/247/303/243o/Cap2/res-finetuning-impact-gradcam2.png ADDED
Disserta/303/247/303/243o/Cap2/res-finetuning-impact.png ADDED
Dissertação/Cap2/segmentation.tex ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ The results of the segmentation experiments are summarized in the Table \ref{tab:seg_results}.
2
+
3
+ \begin{table}[htb!]
4
+ \begin{tabular}{ll}
5
+ \hline
6
+ Alias & IoU Score \\ \hline
7
+ SEXP1 & 0.9401 \\
8
+ SEXP2 & \textbf{0.9408} \\
9
+ SEXP3 & 0.7670 \\
10
+ SEXP4 & 0.7670 \\ \hline
11
+ \end{tabular}\caption{Results of the segmentation experiments conducted in this study.}
12
+ \end{table}\label{tab:seg_results}
13
+
14
+ The best result was achieved using the U-Net model with EfficientNetB3 as backbone. However the difference between the U-Net plus ResNet-50 (SEXP1) is not relevant. The EfficientNetB3's better performance was expected, since it use the skip connections, as well ResNet-50, and achieved better performance than the ResNext-101 \citep{Xie2017}, a ResNet-101 update, in the ImageNet.
15
+
16
+ The SegNet experiments achieved a poor dataset when compared with the U-Net experiments. In fact, the SegNet was thought to be fast keeping a good performance, thus the use of the combination max unpooling plus convolutions to fill sparse matrix did not work well in this application. The change of SegNet's encoder depth did not result in any significant change in results, both experiments achieved the same IoU score.
17
+
18
+ It is important to analyze qualitatively the obtained results once the segmentation will be applied as a pre-processing tool. Thus, a analysis with the XDS was conducted, aiming to verify how the experiments behave in a dataset with samples of different acquisition devices. In general terms the SEXP2 achieved the better results, as well in the metrics.
19
+
20
+ Some cases must be analyzed. In images with a great leaves region presence and small background regions none of the models achieved an acceptable performance. The Fig. \ref{fig:res-seg-case1} bring two samples of example. The SEXP4 still manages to remove some pieces of background as one can see in the Fig. \ref{fig:res-seg-case1} d) in the second row. According \cite{DBLP:journals/corr/BadrinarayananK15} the max unpooling makes the architecture segments nicely small objects, which can explain the fact the only it removed this background small part.
21
+
22
+ \begin{figure}[htb!]
23
+ \centering
24
+ \includegraphics[width=1\textwidth]{Cap2/segmentation/segmentation-case1.jpg}
25
+ \caption{Results for the segmentation for the a) SEXP1, b) SEXP2, c) SEXP3 and d) SEXP4. The results shown that in situations with high presence of leaves and low presence of background the performance of the models is poor.}
26
+ \label{fig:res-seg-case1}
27
+ \end{figure}
28
+
29
+ The ground and sky are easily segmented from leaves regions by all the models when they are in big well separated regions, e.g. Fig. \ref{fig:res-seg-case2}. However, if they are in small spread regions the model can not segment them well.
30
+
31
+ \begin{figure}[htb!]
32
+ \centering
33
+ \includegraphics[width=1\textwidth]{Cap2/segmentation/segmentation-case2.jpg}
34
+ \caption{Results for the segmentation by the SEXP1, SEXP2, SEXP3 and SEXP4 for 3 samples (a, b and c). The results shown that the models have good performance if background regions are big and well separated (a and b), however in cases when the background regions are spread the performance can be poor.}
35
+ \label{fig:res-seg-case2}
36
+ \end{figure}
37
+
38
+ The SEXP3 tends to generate smoother regions in segmentation when compared with SEXP4 (see Fig. \ref{fig:res-seg-case2} (a) and (b)). The effect of using more high-level features in the encoder and more levels of growing in the decoder drives the model to this result. In some cases, the SEXP4 may misclassify leaf regions as background regions due to the less smooth generated regions. The loss of information from leaves can lead the classification models to error, which is a big problem since the purpose of segmentation is to remove insignificant information before classification and leaves are the more relevant feature in this task.
39
+
40
+ The models' behavior in backgrounds generated by different acquisition devices deserve attention. All the models are able to segment well the smooth background generated by the Canon EOS 600D, even though on the XDS there are only examples acquired with smartphones. Backgrounds with color very similar to the leaf region are better segmented by the SEXP4, e.g. Fig. \ref{fig:res-seg-case3} (a) and (b).
41
+
42
+
43
+ \begin{figure}[htb!]
44
+ \centering
45
+ \includegraphics[width=1\textwidth]{Cap2/segmentation/segmentation-case3.jpg}
46
+ \caption{Results for the segmentation by the SEXP1, SEXP2, SEXP3 and SEXP4 for 5 samples (a, b, c, d and e). Smooth backgrounds with similar colors with the leaves region (a, b, and c) are poor segmented by the models. However, smooth backgrounds with different color and textures (ground and sky) are well segmented.}
47
+ \label{fig:res-seg-case3}
48
+ \end{figure}
49
+
50
+ The models do not segment well smooth backgrounds generated by the smartphones (see Fig. \ref{fig:res-seg-case3} (c)) when their color is very similar to the leaf color. However, when the smooth background regions are ground or sky, U-Net can separate them better than SegNet, e.g. Fig. \ref{fig:res-seg-case3} (d) and (e).
51
+
52
+ Despite back images from leaves are not in XDS, the segmentation models behaves well in this situation. Moreover, in the few cases with hands in the images, they are not considered as background and fruits are considered as background by the U-Net models.
53
+
54
+ The overall results show that the SEXP4 is the best model to be applied as pre-processing in the classification experiments, due to its better IoU score and reduced presence of false positives in the background class. It is important to highlight that XDS was roughly annotated, thus the results are compatible with the dataset used in the training. An accurate annotation with data augmentation techniques can produce more accurate results, besides that, it can be employed in automatic Elliptical Fourier descriptors calculus, e.g. \cite{Neto2006}, or in the Procrustes analysis, which can enrich the available data used by DL-models for the grapevine species identification.
55
+
56
+
57
+
58
+
Disserta/303/247/303/243o/Cap2/segmentation/segmentation-case1.jpg ADDED
Disserta/303/247/303/243o/Cap2/segmentation/segmentation-case2.jpg ADDED
Disserta/303/247/303/243o/Cap2/segmentation/segmentation-case3.jpg ADDED
Dissertação/Cap3/CAP3.aux ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \relax
2
+ \providecommand\hyper@newdestlabel[2]{}
3
+ \@writefile{toc}{\contentsline {chapter}{\numberline {5}Methods}{47}{chapter.5}\protected@file@percent }
4
+ \@writefile{lof}{\addvspace {10\p@ }}
5
+ \@writefile{lot}{\addvspace {10\p@ }}
6
+ \newlabel{Ch:Methods}{{5}{47}{Methods}{chapter.5}{}}
7
+ \@writefile{lof}{\contentsline {figure}{\numberline {5.1}{\ignorespaces General flowchart of the methods used in this work.}}{47}{figure.5.1}\protected@file@percent }
8
+ \newlabel{fig:general_flowchat}{{5.1}{47}{General flowchart of the methods used in this work}{figure.5.1}{}}
9
+ \@writefile{toc}{\contentsline {section}{\numberline {5.1}Datasets}{48}{section.5.1}\protected@file@percent }
10
+ \newlabel{sec:met_datasets}{{5.1}{48}{Datasets}{section.5.1}{}}
11
+ \@writefile{lot}{\contentsline {table}{\numberline {5.1}{\ignorespaces Summary of datasets used during the work's development}}{49}{table.5.1}\protected@file@percent }
12
+ \newlabel{tab:dataset_sum}{{5.1}{49}{Summary of datasets used during the work's development}{table.5.1}{}}
13
+ \citation{sensarea}
14
+ \@writefile{lof}{\contentsline {figure}{\numberline {5.2}{\ignorespaces Images acquired for the same grapevine in different epochs. The month of acquisition is specified in each sample.}}{50}{figure.5.2}\protected@file@percent }
15
+ \newlabel{fig:cap3_comparacao_tempo}{{5.2}{50}{Images acquired for the same grapevine in different epochs. The month of acquisition is specified in each sample}{figure.5.2}{}}
16
+ \@writefile{lof}{\contentsline {figure}{\numberline {5.3}{\ignorespaces Example of annotation obtained with Sensarea. It is important to highlight that the labeling was rough, this means that parts of sky or ground can be included in the region that represents the grapevine (orange mask in this example).}}{51}{figure.5.3}\protected@file@percent }
17
+ \newlabel{fig:cap3_mascara}{{5.3}{51}{Example of annotation obtained with Sensarea. It is important to highlight that the labeling was rough, this means that parts of sky or ground can be included in the region that represents the grapevine (orange mask in this example)}{figure.5.3}{}}
18
+ \@writefile{lof}{\contentsline {figure}{\numberline {5.4}{\ignorespaces Distribution of images samples per grapevine variety in the video patches dataset for each subset.}}{52}{figure.5.4}\protected@file@percent }
19
+ \newlabel{fig:distribution-DS1}{{5.4}{52}{Distribution of images samples per grapevine variety in the video patches dataset for each subset}{figure.5.4}{}}
20
+ \@writefile{lof}{\contentsline {figure}{\numberline {5.5}{\ignorespaces Examples of samples of the videos patches dataset for each class that composes it.}}{53}{figure.5.5}\protected@file@percent }
21
+ \newlabel{fig:video-patches-dataset}{{5.5}{53}{Examples of samples of the videos patches dataset for each class that composes it}{figure.5.5}{}}
22
+ \@writefile{lof}{\contentsline {figure}{\numberline {5.6}{\ignorespaces Examples of samples of the segmentation dataset. The first line represents the Images and the second the Masks. In the masks, the "grapevine" class is represented with the white color, while the "background" class is represented with the black color.}}{54}{figure.5.6}\protected@file@percent }
23
+ \newlabel{fig:segmentation-dataset}{{5.6}{54}{Examples of samples of the segmentation dataset. The first line represents the Images and the second the Masks. In the masks, the "grapevine" class is represented with the white color, while the "background" class is represented with the black color}{figure.5.6}{}}
24
+ \@writefile{lof}{\contentsline {figure}{\numberline {5.7}{\ignorespaces Distribution of images samples per grapevine variety in the 12 species dataset.}}{54}{figure.5.7}\protected@file@percent }
25
+ \newlabel{fig:distribution-DS12}{{5.7}{54}{Distribution of images samples per grapevine variety in the 12 species dataset}{figure.5.7}{}}
26
+ \@writefile{lof}{\contentsline {figure}{\numberline {5.8}{\ignorespaces Images samples for each class in the 12 species dataset.}}{55}{figure.5.8}\protected@file@percent }
27
+ \newlabel{fig:DS12-samples}{{5.8}{55}{Images samples for each class in the 12 species dataset}{figure.5.8}{}}
28
+ \@writefile{lof}{\contentsline {figure}{\numberline {5.9}{\ignorespaces Images samples for each class and acquisition device in the multidevice dataset.}}{56}{figure.5.9}\protected@file@percent }
29
+ \newlabel{fig:distribution-DS3}{{5.9}{56}{Images samples for each class and acquisition device in the multidevice dataset}{figure.5.9}{}}
30
+ \@writefile{lof}{\contentsline {figure}{\numberline {5.10}{\ignorespaces Images samples for each class in the multidevice dataset.}}{57}{figure.5.10}\protected@file@percent }
31
+ \newlabel{fig:DS3-samples}{{5.10}{57}{Images samples for each class in the multidevice dataset}{figure.5.10}{}}
32
+ \@writefile{lof}{\contentsline {figure}{\numberline {5.11}{\ignorespaces Images samples for each class in the merged dataset to (a) training, (b) validation and (b) test.}}{58}{figure.5.11}\protected@file@percent }
33
+ \newlabel{fig:DS5-samples}{{5.11}{58}{Images samples for each class in the merged dataset to (a) training, (b) validation and (b) test}{figure.5.11}{}}
34
+ \citation{Xiong2020}
35
+ \citation{Du2020}
36
+ \@writefile{toc}{\contentsline {section}{\numberline {5.2}Segmentation Experiments}{59}{section.5.2}\protected@file@percent }
37
+ \newlabel{sec:Segmentation Experiments}{{5.2}{59}{Segmentation Experiments}{section.5.2}{}}
38
+ \@writefile{lot}{\contentsline {table}{\numberline {5.2}{\ignorespaces Summary of the segmentation experiments conducted in this study.}}{60}{table.5.2}\protected@file@percent }
39
+ \newlabel{tab:segmentation_exp_sum}{{5.2}{60}{Summary of the segmentation experiments conducted in this study}{table.5.2}{}}
40
+ \newlabel{eq:loss-exp-segmentation}{{5.2}{61}{Segmentation Experiments}{table.5.2}{}}
41
+ \citation{Zhang}
42
+ \citation{chollet2017deep}
43
+ \citation{ADAO2019}
44
+ \newlabel{eq:iou}{{5.2}{62}{Segmentation Experiments}{equation.5.2}{}}
45
+ \@writefile{toc}{\contentsline {section}{\numberline {5.3}Classification Experiments}{62}{section.5.3}\protected@file@percent }
46
+ \newlabel{sec:met_classification}{{5.3}{62}{Classification Experiments}{section.5.3}{}}
47
+ \@writefile{lot}{\contentsline {table}{\numberline {5.3}{\ignorespaces Summary of the classification experiments conducted in this study.}}{63}{table.5.3}\protected@file@percent }
48
+ \newlabel{tab:classification_exp_sum}{{5.3}{63}{Summary of the classification experiments conducted in this study}{table.5.3}{}}
49
+ \citation{Zheng2016}
50
+ \newlabel{eq:step_decay}{{5.3}{64}{Classification Experiments}{equation.5.3}{}}
51
+ \newlabel{eq:DropRate}{{5.4}{64}{Classification Experiments}{equation.5.4}{}}
52
+ \citation{JasonBrownlee2014}
53
+ \@writefile{lot}{\contentsline {table}{\numberline {5.4}{\ignorespaces Trained blocks in the Xception model in the first four experiments conducted in this study.}}{65}{table.5.4}\protected@file@percent }
54
+ \newlabel{tab:fine-tuning-experiments}{{5.4}{65}{Trained blocks in the Xception model in the first four experiments conducted in this study}{table.5.4}{}}
55
+ \newlabel{eq:accuracy}{{5.5}{65}{Classification Experiments}{equation.5.5}{}}
56
+ \newlabel{eq:precision}{{5.6}{66}{Classification Experiments}{equation.5.6}{}}
57
+ \newlabel{eq:recall}{{5.7}{66}{Classification Experiments}{equation.5.7}{}}
58
+ \newlabel{eq:f1_score}{{5.8}{66}{Classification Experiments}{equation.5.8}{}}
59
+ \citation{Griffo2017}
60
+ \citation{Xiong2020}
61
+ \@writefile{lof}{\contentsline {figure}{\numberline {5.12}{\ignorespaces Images with background pixels set to black.}}{67}{figure.5.12}\protected@file@percent }
62
+ \newlabel{fig:segmented-images}{{5.12}{67}{Images with background pixels set to black}{figure.5.12}{}}
63
+ \citation{Woof2019}
64
+ \citation{Ribeiro2016}
65
+ \@setckpt{Cap3/CAP3}{
66
+ \setcounter{page}{70}
67
+ \setcounter{equation}{8}
68
+ \setcounter{enumi}{4}
69
+ \setcounter{enumii}{0}
70
+ \setcounter{enumiii}{0}
71
+ \setcounter{enumiv}{0}
72
+ \setcounter{footnote}{3}
73
+ \setcounter{mpfootnote}{0}
74
+ \setcounter{part}{0}
75
+ \setcounter{chapter}{5}
76
+ \setcounter{section}{3}
77
+ \setcounter{subsection}{0}
78
+ \setcounter{subsubsection}{0}
79
+ \setcounter{paragraph}{0}
80
+ \setcounter{subparagraph}{0}
81
+ \setcounter{figure}{12}
82
+ \setcounter{table}{4}
83
+ \setcounter{parentequation}{0}
84
+ \setcounter{NAT@ctr}{0}
85
+ \setcounter{subfigure}{0}
86
+ \setcounter{lofdepth}{1}
87
+ \setcounter{subtable}{0}
88
+ \setcounter{lotdepth}{1}
89
+ \setcounter{LT@tables}{1}
90
+ \setcounter{LT@chunks}{5}
91
+ \setcounter{tcbbreakpart}{1}
92
+ \setcounter{tcblayer}{0}
93
+ \setcounter{tcolorbox@number}{5}
94
+ \setcounter{tcbrastercolumn}{0}
95
+ \setcounter{tcbrasterrow}{0}
96
+ \setcounter{tcbrasternum}{0}
97
+ \setcounter{tcbraster}{0}
98
+ \setcounter{nlinenum}{0}
99
+ \setcounter{r@tfl@t}{0}
100
+ \setcounter{float@type}{16}
101
+ \setcounter{FBl@b}{0}
102
+ \setcounter{FRobj}{0}
103
+ \setcounter{FRsobj}{0}
104
+ \setcounter{FBcnt}{0}
105
+ \setcounter{ALG@line}{0}
106
+ \setcounter{ALG@rem}{0}
107
+ \setcounter{ALG@nested}{0}
108
+ \setcounter{ALG@Lnr}{2}
109
+ \setcounter{ALG@blocknr}{10}
110
+ \setcounter{ALG@storecount}{0}
111
+ \setcounter{ALG@tmpcounter}{0}
112
+ \setcounter{AM@survey}{0}
113
+ \setcounter{Item}{17}
114
+ \setcounter{Hfootnote}{7}
115
+ \setcounter{bookmark@seq@number}{30}
116
+ \setcounter{code}{0}
117
+ \setcounter{FBLTpage}{3}
118
+ \setcounter{section@level}{0}
119
+ }
Dissertação/Cap3/CAP3.tex ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ %-------------------------------------------------------------------------
2
+
3
+
4
+ \chapter{Methods}
5
+ \label{Ch:Methods}
6
+ %------------------------------------------------------------------------------------------------------------
7
+ In this chapter the methods used to construct the work will be described. In the Fig. \ref{fig:general_flowchat} a flowchart of the methods can be seen.
8
+
9
+ \begin{figure}[htb!]
10
+ \centering
11
+ \includegraphics[width=1\textwidth]{Cap3/fluxograma geral.jpg}
12
+ \caption{General flowchart of the methods used in this work.}
13
+ \label{fig:general_flowchat}
14
+ \end{figure}
15
+
16
+ In the first step grapevine leaves images acquired in-field were annotated for classification and segmentation, resulting in raw datasets. As pre-processing, the images were cropped, resized, normalized and a segmentation model was trained and applied to separate leaves regions from background regions, aiming to improve the grapevine species identification. The pre-processed datasets were used to training different DL-based classification models, using different losses and the SGD optimizer. In the last step the trained models were evaluated using metrics and XAI approaches. The XAI approaches allowed to identify if the models are using relevant features in the classification process, turning it more transparent, which directly influence in the human-confidence in its predictions.
17
+
18
+ In the Section \ref{sec:met_datasets} images acquisition and the pre-processing process is detailed. Segmentation experiments are described in the Section \ref{sec:Segmentation Experiments}, including models used, the training process and the metrics used to evaluate them. Then, in Section \ref{sec:met_classification}, the classification experiments are explained in detail, from the choice of models to the approaches to evaluate them.
19
+
20
+
21
+ \section{Datasets}\label{sec:met_datasets}
22
+
23
+ Four datasets of images of grapevines acquired in natural environment were created, three for classification and one for segmentation. Later, the three classification dataset were joined in order to make a single dataset. In the Table \ref{tab:dataset_sum} one can see a summary of the datasets used during the development of the work.
24
+
25
+ \afterpage{%
26
+ \clearpage% Flush earlier floats (otherwise order might not be correct)
27
+ \thispagestyle{empty}% empty page style (?)
28
+ \begin{landscape}% Landscape page
29
+ \input{Cap3/table-datasets}
30
+ \end{landscape}
31
+ \clearpage% Flush page
32
+ }
33
+
34
+ The first source of images was movies recorded in the research farm of the University of Trás-os-Montes and Alto Douro (Nossa Senhora de Lourdes farm, Vila Real, Portugal: 41°17’11.5”N, 7°44’14.1”W), between May and September of 2017 of 6 different grapevine species (2 plants per variety). Two smartphones were used as acquisition instrument, resulting in videos with spatial resolution of 1080 x 1920 and 25 FPS.
35
+
36
+ The quantity of videos per image was irregular. For the Códega (CD), Moscatel Galego (MG), Rabigato (RG), e Tinto Cão (TC) classes were recorded 40 videos, two videos per week in each month. For the Tinta Roriz (TR) class 37 videos were made, being that only one video was recorded per week in September, while for Touriga Nacional (TN) 39 videos were recorded, because in the last week of September only one video was recorded.
37
+
38
+ The videos were splitted in training, validation e test subsets before any processing. Considering that for most of the classes there were two videos per week, so, in these cases, the video with the longest duration was used in the training set, while the shorter one was used to validation or test, alternately. For example, for the class Touriga Nacional two videos were recorded in 04 of May of 2017, the video $A$ with 7 seconds of duration and the video $B$ with 10 seconds, and in 09 of May of 2017 more two were recorded, the video $C$ with 10 seconds and $D$ with 9 seconds. The training set in this case will be composed by the videos $B$ and $C$, the validation set by the video $A$ and the test set by the video $D$. To the Touriga Nacional the 3 videos recorded in September of 2017 were splitted equally between the 3 sets, as long as for Tinta Roriz the test set has one video less than the validation set.
39
+
40
+ With this distribution, a balanced sampling in terms of time was ensured, since the entire leaf season was included in all subsets. of the representation of time in the classification of grape species is highlighted in the Fig. \ref{fig:cap3_comparacao_tempo}, where a great difference can be seen between samples of the same vine collected in the months of May, July and September.
41
+
42
+
43
+ \begin{figure}[htb!]
44
+ \centering
45
+ \includegraphics[width=0.8\textwidth]{Cap3/comparacao_tempo.jpg}
46
+ \caption{Images acquired for the same grapevine in different epochs. The month of acquisition is specified in each sample.}
47
+ \label{fig:cap3_comparacao_tempo}
48
+ \end{figure}
49
+
50
+ In the next step, using the software Sensarea \citep{sensarea}, each video was roughly labeled between grapevine regions and background regions. The idea was to separate relevant regions for the classification (e.g. leaves, stem, fruits) of the irrelevant regions, (e.g. ground, sky). The automatic tracking of the Sensarea allowed a short time of labeling, if compared with manual labeling. However, due to some errors in the automatic tracking, the masks had to be manually adjusted sometimes. There were videos that were not labeled because they were not centered on the vines. At the end of this process, it was possible to obtain a mask to each frame in each labeled video, as shown in the Fig. \ref{fig:cap3_mascara}.
51
+
52
+ \begin{figure}[htb!]
53
+ \centering
54
+ \includegraphics[width=0.8\textwidth]{Cap3/mascara.jpg}
55
+ \caption{Example of annotation obtained with Sensarea. It is important to highlight that the labeling was rough, this means that parts of sky or ground can be included in the region that represents the grapevine (orange mask in this example).}
56
+ \label{fig:cap3_mascara}
57
+ \end{figure}
58
+
59
+ From the annotations it was possible to crop the frames in different sizes, then aiming to get a classification dataset with different perspectives from the grapevines and robust in terms of zoom which the frames were recorded, experimentally it was defined two sizes of crop: 500x500 and 800x800 pixels. This means that for each patch that was in a video, it was cropped with patches of three different sizes. Then, with help of the library Shapely it was chosen which frames will be used in the datasets. The Shapely allowed to calculate the intersection area between the patch and the grapevine of each frame, so that patches with low intersection (less than 95\% of intersection) were discarded. To avoid similar images in the dataset the crops were made with a pass of 30 frames.
60
+
61
+ The crop of the patches were made using a slide window and the stride of such window varied with the size of the patch, for 500x500 it was chosen a stride of 250 pixels, while for the 800x800 it was chosen a stride of 160 pixels. The values of stride were obtained experimentally. Considering that the CNNs tend to lose spacial information of the border with over the layers, the overlap of patches can be a way to give more information to the trained models, besides that similar approach is adopted when using data augmentation.
62
+
63
+ In the end, all selected patches were saved with size of 300x300, composing a dataset with 17014 training images, 6853 validation images and 6576 test images irregularly distributed between the 6 classes (see the Fig. \ref{fig:distribution-DS1}). The bilinear interpolation was the method chosen to do resize in the images. The result dataset from this step will be referred to by \textbf{video patches dataset} (CDS1) in the remaining of the text and some samples can be seen in the Fig. \ref{fig:video-patches-dataset}.
64
+
65
+ \begin{figure}[htb!]
66
+ \centering
67
+ \includegraphics[width=0.9\textwidth]{Cap3/distribution-ds1.png}
68
+ \caption{Distribution of images samples per grapevine variety in the video patches dataset for each subset.}
69
+ \label{fig:distribution-DS1}
70
+ \end{figure}
71
+
72
+
73
+ \begin{figure}[htb!]
74
+ \centering
75
+ \includegraphics[width=1\textwidth]{Cap3/video patches dataset.jpg}
76
+ \caption{Examples of samples of the videos patches dataset for each class that composes it.}
77
+ \label{fig:video-patches-dataset}
78
+ \end{figure}
79
+
80
+
81
+ A similar strategy was applied to generate a segmentation dataset. The same videos and approach to generating patches were employed. The difference was in the selected patches. In this dataset the chosen patches intersected with the mask between 50 \% and 85 \% to increase the background representativeness and there was no overlap between patches during clipping. In the end, all selected patches were saved with size of 512x512 pixels, composing a dataset with 5302 training images, 2567 validation images and 2727 test images. The result dataset from this step will be referred to by \textbf{segmentation dataset} (SDS) in the remaining of the text. Some samples of the segmentation dataset can be seen in the Fig. \ref{fig:segmentation-dataset}.
82
+
83
+ \begin{figure}[htb!]
84
+ \centering
85
+ \includegraphics[width=1\textwidth]{Cap3/segmentation-example.jpg}
86
+ \caption{Examples of samples of the segmentation dataset. The first row represents the Images and the second the Masks. In the masks, the "grapevine" class is represented with the white color, while the "background" class is represented with the black color.}
87
+ \label{fig:segmentation-dataset}
88
+ \end{figure}
89
+
90
+ The second source was a collection of images of grapevine leaves from 12 most representative species allowed in the DDR (2 plants per variety): Códega, Malvasia Fina (MF), Tinta Amarela (TA), Malvasia Preta (MP), Tinta Barroca (TB), Malvasia Rei (MR), Touriga Nacional, Tinto Cão, Moscatel Galego, Tinta Roriz, Rabigato and Mourisco Tinto (MT). The pictures were taken with a Canon EOS 600D camera, equipped with a 50mm \textit{f}/1.4 lens and 18 megapixels' resolution. The distribution of images per class was irregular and can be observed in the Fig. \ref{fig:distribution-DS12}. The images were acquired in the research farm of the University of Trás-os-Montes and Alto Douro (Nossa Senhora de Lourdes farm, Vila Real, Portugal: 41°17’11.5”N, 7°44’14.1”W), between May and September of 2017. This dataset will be referred to by \textbf{12 species dataset} (CDS2) in the remaining of the text.
91
+
92
+ \begin{figure}[htb!]
93
+ \centering
94
+ \includegraphics[width=0.7\textwidth]{Cap3/distribuition.png}
95
+ \caption{Distribution of images samples per grapevine variety in the 12 species dataset.}
96
+ \label{fig:distribution-DS12}
97
+ \end{figure}
98
+
99
+ The following guidelines were defined for this dataset creation: (1) each picture should contain only one grapevine variety; (2) the majority of the picture should be filled with leaves and (3) an expert should be able to identify the grapevine exclusively by observation.
100
+
101
+ The images were cropped into squares and randomly redistributed into training, validation and test sets, with the proportion of 70\%, 20\% and 10\%, respectively. It was applied data augmentation to the training set, generating 10 images for each image, applying shifts, variations in brightness and horizontal/vertical flips. The final dataset comprises 6718 training images, 132 validation images and 72 test images. Samples of the 12 species dataset can be observed in the Fig. \ref{fig:DS12-samples}.
102
+
103
+ \begin{figure}[htb!]
104
+ \centering
105
+ \includegraphics[width=1\textwidth]{Cap3/example_ds12.jpg}
106
+ \caption{Images samples for each class in the 12 species dataset.}
107
+ \label{fig:DS12-samples}
108
+ \end{figure}
109
+
110
+ The third source was a collection of images of grapevine leaves from 6 classes: Códega, Moscatel Galego, Rabigato, Tinta Amarela, Tinta Roriz and Touriga Nacional. The images were acquired in the research farm of the University of Trás-os-Montes and Alto Douro (Nossa Senhora de Lourdes farm, Vila Real, Portugal: 41°17’11.5”N, 7°44’14.1”W) and in the Casa de Mateus Fundation's farm (Mateus, Vila Real, Portugal: 41°17'49"N, 7°42'44"W) using a Apple iPhone 11 (\textit{f}/1.8, 12 Megapixels, 26 millimeters), a Samsung Galaxy S7 (\textit{f}/1.7, 12 Megapixels, 26 millimeters) and a Samsung Galaxy Tab S6 Lite (\textit{f}/1.9, 8 Megapixels, 26 millimeters) during the day 27 of July of 2021. The distribution of the images per class and acquisition device can be seen in the Fig. \ref{fig:distribution-DS3}. In the remaining of the text this dataset will be referred to by \textbf{multidevice dataset} (CDS3).
111
+
112
+ \begin{figure}[htb!]
113
+ \centering
114
+ \includegraphics[width=0.9\textwidth]{Cap3/distribution-ds3.png}
115
+ \caption{Images samples for each class and acquisition device in the multidevice dataset.}
116
+ \label{fig:distribution-DS3}
117
+ \end{figure}
118
+
119
+
120
+
121
+ The images were acquired in the square format, centered in a leaf. The idea was to capture images in different stages of growth, in order to improve the representativeness of time in the dataset. Samples of the multidevice dataset can be observed in the Fig. \ref{fig:DS3-samples}.
122
+
123
+ \begin{figure}[htb!]
124
+ \centering
125
+ \includegraphics[width=1\textwidth]{Cap3/ds3-examples.jpg}
126
+ \caption{Images samples for each class in the multidevice dataset.}
127
+ \label{fig:DS3-samples}
128
+ \end{figure}
129
+
130
+
131
+ Seeking to keep only species that were in the CDS1, it was decided that the class TA would be removed from this collection to the experiments. Then, the images from the 5 remaining classes were randomly redistributed into training, validation and test sets, with the proportion of 70\%, 20\% and 10\%, respectively. Data augmentation was employed to the training set, generating 6 images for each image, applying shifts, variations in brightness and horizontal/vertical flips. The final dataset comprises 12959 training images, 509 validation images and 286 test images.
132
+
133
+ In order to verify the DL-model's ability to learn from the CDS1, a Xception model was fine-tuned using its training and validation subsets. To test the model,its test subset and the entire CDS2 were used. As a result, the model achieved in CDS1's test set accuracy of 0.87 and F1 score of 0.85, while in the CDS2 accuracy of 0.56 and F1 score 0.55 were attained.
134
+
135
+ Our hypothesis to the weak result obtained by the model for the CDS2's classification is that the CDS1 is not sufficiently representative, because all the videos were made for two plants for each species. This is well-founded by the good result obtained in the CDS1's test subset. As the same plants are in the training, validation and test subsets, the model can not generalize well.
136
+
137
+ Thus, it was decided to merge the three image sources, creating a more robust dataset in terms of acquisition device and plant specimens. Nonetheless, to keep the dataset with leaf-centered images centered, for the CDS1, 1224 training images, 510 validation images and 335 test images irregularly distributed among the 6 classes were manually chosen. After that, the data augmentation technique was employed in the training subset, generating 5 images for each image, applying shifts, variations in brightness and horizontal/vertical flips. So, the dataset comprises 7344 training images, 510 validation images and 335 test images and will be referred to by \textbf{simplified video patches dataset} (CDS4) in the remaining of the text.
138
+
139
+ At the end, the merged dataset was composed by the 6 classes of CDS1/CDS4. This means that only images of these classes were copied from the CDS2 and CDS3. So, the subsets from the CDS4, CDS2 and CDS3 for the classes CD, MG, RG, TR, TC and TN were merged into a dataset with 27060 training images, 846 validation images and 521 test images. Aiming to get a dataset with less difference between the samples per class in the test and validation subsets, randomly, the images from these CDS3's subsets were chosen, which allowed to keep a balanced quantity of samples in terms of acquisition device. The distribution of samples per class and subsets can be observed in the Fig. \ref{fig:DS5-samples}. As the CDS3 does not have images for the TC class, there is a big difference of the sample quantity between this class and the others. This dataset will be referred to by \textbf{merged dataset} (CDS5) in the remaining of the text.
140
+
141
+ \begin{figure}[htb!]
142
+ \centering
143
+ \includegraphics[width=1\textwidth]{Cap3/cds5-image-distribution.png}
144
+ \caption{Images samples for each class in the merged dataset to (a) training, (b) validation and (b) test.}
145
+ \label{fig:DS5-samples}
146
+ \end{figure}
147
+
148
+ As the test subset of CDS5 is composed by 531, almost 90 images per class, a new simplification was made, aiming to analyze the output of XAI techniques. The idea was separate 20 test images from the CDS5's test set, seeking to compare the result obtained with LIME, Grad-CAM and Grad-CAM++ applied in their classification. The choice of the images was made so that several situations were covered: images from different sources, images from leaves with different growth state, different angles of acquisition and different devices of acquisition. As a result, a dataset with 120 images (20 for each class) was obtained. This dataset will be referred to by \textbf{XAI dataset} (XDS) in remaining of the text.
149
+
150
+
151
+
152
+
153
+
154
+
155
+
156
+ \section{Segmentation Experiments}\label{sec:Segmentation Experiments}
157
+
158
+ To build systems capable of identify grapevine species precisely like ampelographs, images acquired in-field should be used to train them. This needed increases the complexity of the problem, since this type of image contains a variety of information that is not related to the grapevine species classification goal, which can lead DL-models to error \citep{Xiong2020}.
159
+
160
+ In this situation, segmentation becomes an effective way to separate background regions from the grapevine regions in images acquired in-field. In this section, details about the 4 segmentation experiments performed in this study will be presented. Its objective was to separate leaves regions from grapevine regions in order to improve the grapevine species identification.
161
+
162
+ In the Table \ref{tab:segmentation_exp_sum} a summary of this experiments can be seen. Note that the experiments will be referred by the aliases defined in the Table \ref{tab:segmentation_exp_sum} in the remainder of the document. All the experiments were conducting using the SDS. Two architectures were chosen to be trained, U-Net and SegNet. The U-Net architecture was chosen due to its high accuracy achieved in several works in different fields of research. The SegNet was chosen on account of its speed in inference and training, which allow its use in constrained devices, e.g. smartphones.
163
+
164
+ \afterpage{%
165
+ \clearpage% Flush earlier floats (otherwise order might not be correct)
166
+ \thispagestyle{empty}% empty page style (?)
167
+ \begin{landscape}% Landscape page
168
+ \input{Cap3/table-segmentation-experiments}
169
+ \end{landscape}
170
+ \clearpage% Flush page
171
+ }
172
+
173
+ In the SEXP1 a U-Net architecture was trained using a ResNet-50 as backbone. The ResNet-50 was chosen due to its ability of extract high quality features from images, being widely used in previous works aiming to segment leaves (see Section \ref{sec:lr-leabes-segmentation}). A combination between Focal Loss and Dice Loss was chosen for this experiment. The Dice Loss contributes with information about the loss globally and locally \citep{Du2020} and the Focal Loss (with default parameters $alpha$ 0.25 and $gamma$ 2), handles with the unbalanced data (see Sec. \ref{sec:training-process}). In the Equation \ref{eq:loss-exp-segmentation} the used combination of losses can be seen.
174
+
175
+ \begin{equation}
176
+ L_{total} = Dice Loss + Focal Loss
177
+ \end{equation}\label{eq:loss-exp-segmentation}
178
+
179
+ The model was trained in two steps. In the first step the decoder was trained, keeping the backbone's weights frozen. In the second step, the fine-tuning was applied, so that the backbone's weights was unfrozen and the entire model was trained. The Segmentation Models\footnote{see the documentation in https://segmentation-models.readthedocs.io/en/latest/index.html} framework was used to conduct this experiment.
180
+
181
+ In both steps the Adam optimizer was used and the model were trained for 50 epochs. In the first step a LR was set in 0.0001 for training, while in the second it was set in 0.000001. Aiming to improve the training, a reducer for the LR was also applied in both steps. If the validation subset's IoU score did not improve in 2 epoch the LR was reduced with a factor of 0.95, meaning that its value would be decreased by 5\%.
182
+
183
+ In the SEXP2 all SEXP1's configurations were kept, changing only the used backbone to the EfficientNetB3. The objective of this experiment is to verify the impact of the change of the backbone in the grapevine leaves segmentation. The EfficientNetB3 was chosen due to its better performance if compared to the ResNet-50 in the ImageNet, using few parameters.
184
+
185
+ In the SEXP3 and SEXP4 the SegNet model was used to segment the images. The level of the encoder, parameter that configure the quantity of backbone's blocks used in the encoder-decoder architecture was set 3 in the SEXP3 and 4 in the SEXP4. The remaining configurations are the same in the SEXP1.
186
+
187
+ A modification of the Image Segmentation Keras\footnote{https://github.com/divamgupta/image-segmentation-keras} framework was used in the experiments, because the Segmentation Models do not implements the SegNet architecture. The source code of the modification made is available on GitHub\footnote{source code available on: https://github.com/gabri14el/image-segmentation-keras}.
188
+
189
+ There are a limitation in the Segmentation Models' implementation of the U-Net. Only input sizes with area multiple of 32 are allowed. Considering the objective of using the segmented images to improve the classification and that the input size used in the classification was 300x300 pixels, the input of all the segmentation model was set in 320x320 pixels. The resize of the images was made using the bilinear interpolation.
190
+
191
+ All the experiments were executed in a computer with a Intel Xeon E5-2680 CPU, 2 GPU NVIDIA Quado M4000 and RAM of 128Gb running a Microsoft Windows 10 operating system. Only one GPU was used to the training. The batch size was set to 8. To evaluate the experiments the IoU metric (Eq. \ref{eq:iou}) was chosen.
192
+
193
+
194
+
195
+
196
+
197
+ \section{Classification Experiments}\label{sec:met_classification}
198
+
199
+ In this section will be presented the details about 10 classification experiments conducted in this study. In the Table \ref{tab:classification_exp_sum} a summary of these experiments can be seen. Note that the experiments will be referred by the aliases defined in the Table \ref{tab:classification_exp_sum} in the remainder of the document.
200
+
201
+ \afterpage{%
202
+ \clearpage% Flush earlier floats (otherwise order might not be correct)
203
+ \thispagestyle{empty}% empty page style (?)
204
+ \begin{landscape}% Landscape page
205
+ \input{Cap3/table-classification-experiments}
206
+ \end{landscape}
207
+ \clearpage% Flush page
208
+ }
209
+
210
+ In the first 4 experiments (CEXP1, CEXP2, CEXP3, CEXP4) the objective was to verify the impact of different layer's configuration in the grapevine species identification.
211
+
212
+ The dataset used in this experiments was the CDS2. The Xception model was chosen because of its excellent performance in the ImageNet, combining the Inception module and the residual connections. The four models were trained using Focal Loss, because the CDS2 is unbalanced. The \cite{Zhang}'s implementation was used with $alpha$ 2.0 and $gamma$ 4.0 (default values in this implementation).
213
+
214
+ The approach described by \cite{chollet2017deep} was employed for the fine-tuning. The weights of the ImageNet were used, replacing the classifier at the top of the network. During the training, first the model's weights of the convolutional part were frozen and the classifier was trained during 100 epochs, using a SGD optimizer and step-decay adaptive learning rate starting at 0.1. The Equation \ref{eq:step_decay} shown mathematically the expression used to the step-decay adaptive learning rate. The $LRInitial$ is the initial learn rate, the $DropRate$ is given by Equation \ref{eq:DropRate}, $flatten\_factor$ is the constant $LRInitial^{2.25}$ that was used to conduct a soft LR plateauing \citep{ADAO2019} and $epochs\_drop$ is the number of epochs in which the LR will have its value decreased, defined as 5. It is important to highlight that we conducted initial experiments using the optimizers Adam and RMSProp, however in both cases the training did not converged.
215
+
216
+
217
+
218
+ \begin{equation}\label{eq:step_decay}
219
+ LR = LRInitial * \mathrm{DropRate}^{floor(\frac{epoch}{epochs\_drop})}
220
+ \end{equation}
221
+
222
+ \begin{equation}\label{eq:DropRate}
223
+ DropRate = \mathrm{LRInitial}^{\frac{flatten\_factor}{epochs\_drop}}
224
+ \end{equation}
225
+
226
+
227
+ Then, depending of the experiment, the weights of the specific convolutional blocks were unfrozen and, so the model was retrained. The hyper-parameters and equations are the same in the previous step, except the initial learn rate that was kept in 0.0001. This optimizer and LR configuration was employed to all fine-tuned classification experiments described in this work.
228
+
229
+
230
+ In the Table \ref{tab:fine-tuning-experiments} one can see the model's trained blocks in this step for each experiment. In the CEXP1 no layer was trained, which means that the model act as a feature extractor. In the CEXP2 all the convolutional blocks were trained. In the CEXP3 the model's last two blocks were trained and in the CEXP3 the model's last six blocks have been chosen to be trained. The last two configurations were defined considering that model's more-on-top layers are sensitive to semantics, while more intermediate layers are specific to low level features \citep{Zheng2016}.
231
+
232
+ \begin{table}[htp!]
233
+ \begin{tabular}{@{}ll@{}}
234
+ \toprule
235
+ Experiment & \begin{tabular}[c]{@{}l@{}}Trained Blocks in\\ Fine-Tuning\end{tabular} \\ \midrule
236
+ CEXP1 & - \\
237
+ CEXP2 & All \\
238
+ CEXP3 & 13, 14 \\
239
+ CEXP4 & 9, 10 , 11, 12, 13, 14 \\ \bottomrule
240
+ \end{tabular}\caption{Trained blocks in the Xception model in the first four experiments conducted in this study.}\label{tab:fine-tuning-experiments}
241
+ \end{table}
242
+
243
+ The Global Average Pooling was used in the output of the convolutional part of the model, as in the standard Xception model.
244
+
245
+ The size of the models' input was experimentally defined in 300x300. It was tried the default Xception size (299x299 pixels) for the input in initial tests, however the model with input size of 300x300 pixels achieved better performance.
246
+
247
+ As a classifier, it was chosen a dense layer with 40 neurons, followed by a ReLU activation plus Dropout of 25\% and the output was defined with a 12 neurons dense layer followed by a Softmax activation function.
248
+
249
+ The Keras with Tensorflow as backend was used to train the models, running in the Google Collaboratory. A batch size of 12 was employed, because of the resources of the given virtual machines.
250
+
251
+
252
+ To evaluate these models, it were used F1 score and accuracy metrics and the Grad-CAM as XAI approach. The accuracy measure the percent of True Positives in the model's classification results. The F1 score is a harmonic mean of \textit{Precision} and \textit{Recall} metrics. Precision denotes the number of positive predictions divided by the total number of positive class values predicted, while recall the number of positive predictions by the number of positive class values in the dataset \citep{JasonBrownlee2014}. The Equation for Accuracy, Precision, Recall and F1 Score can be seen respectively in the Equations \ref{eq:accuracy}, \ref{eq:precision}, \ref{eq:recall} and \ref{eq:f1_score}. $TP$ represents the True Positives, $FP$ the False Positives, $TN$ the True Negatives and $FN$ the False Negatives.
253
+
254
+ \begin{equation}\label{eq:accuracy}
255
+ Accuracy = \frac{TP + VN}{Total}
256
+ \end{equation}
257
+
258
+ \begin{equation}\label{eq:precision}
259
+ Precision = \frac{TP}{TP + FP}
260
+ \end{equation}
261
+
262
+ \begin{equation}\label{eq:recall}
263
+ Recall = \frac{TP}{TP + FN}
264
+ \end{equation}
265
+
266
+ \begin{equation}\label{eq:f1_score}
267
+ F1 = \frac{2 * precision * recall}{precision + recall}
268
+ \end{equation}
269
+
270
+ The Grad-CAM allowed to visualize the impact of the fine-tuning in the pixels' contribution in the classifications in a layer. In order to visualize only pixels with relevant contributions to the classes classification, we applied a threshold of 10\% of the greatest contribution in a Grad-CAM heatmap. This means that in the analysis, only pixels with contribution more then 10\% of the greatest contribution in a Grad-CAM heatmap were considered. This value was defined empirically observing some results in the experiments. The last model's convolutional layer was chosen as target on account of it extract high level features. The CDS2's test subset was used in the analysis.
271
+
272
+ In the CEXP 5 the capacity of the Xception model learn to classify grapevine using images from different acquisition devices was explored. While in the CEXP1 the CDS2 was used to train the model, in the CEXP5 the CDS5 was the explored dataset. Note that the CDS5 is composed by images acquired in different devices, since it is a fusion of 3 different datasets. It is important to highlight that this model was trained to identify 6 grapevine species instead 12.
273
+
274
+ The classification part was composed by a two Dense layer with 512 neurons and activation ReLU with dropout of 0.25 between then. A Global Average Pooling was used to connect the Xception's convolutional part and the classification part. The training's configurations and approaches were the same to the CEXP1, except the loss that in this experiment was the Cross Entropy Loss.
275
+
276
+ The experiment was executed in a computer with a Intel Xeon E5-2680 CPU, 2 GPU NVIDIA Quado M4000 and RAM of 128Gb running a Microsoft Windows 10 operating system. Only one GPU was used to train. The bath size was set to 16. This configuration was used to execute all remaining experiments.
277
+
278
+ In the CEXP6 the objective is to investigate the use of Focal Loss instead the Cross Entropy Loss in the fine-tuned Xception. The CDS5 has an imbalance between classes, when compared with CDS2, making it interesting to investigate the impact between using and not using Focal Loss to improve the grapevine species identification.
279
+
280
+ Different from the CEXP1-CEXP4, in the CEXP6 the \cite{Griffo2017}'s implementation of the Focal Loss was used. It changed due to the difference between Tensorflow's version used in the CEXP1-CEXP4 and the rest of the experiments. Also the $gamma$ parameter was set to 0.25 and the $alpha$ was kept in 2.0 (default values in this implementation). The remaining training's hyper-parameters and configurations were the same that in the CEXP6.
281
+
282
+ In the CEXP7 was made aiming to investigate the impact of using a DL-segmentation model acting a pre-processing technique in the classification. The model resulting from the SEXP1 was used to segment the CDS5, separating the grapevines regions from the background, setting the background pixels to the black color (e.g. Fig \ref{fig:segmented-images}). Then, the Xception model was trained using the segmented-CDS5, considering that the background can lead the model to error, since there are not relevant information to grapevine species classification \citep{Xiong2020}. The remaining training's hyper-parameters and configurations were the same that in the CEXP6.
283
+
284
+ \begin{figure}[htb!]
285
+ \centering
286
+ \includegraphics[width=0.9\textwidth]{Cap3/segmented-images.jpg}
287
+ \caption{Images with background pixels set to black.}
288
+ \label{fig:segmented-images}
289
+ \end{figure}
290
+
291
+ The experiments CEXP8, CEXP9 and CEXP10 was made aiming investigate the performance of another pre-trained models in the grapevine species classification. The only difference between each one of them and the CEXP6 is the pre-trained model used. The remaining training's hyper-parameters and configurations were the same that in the CEXP6.
292
+
293
+ In the CEXP8 the MobileNetV2 is used, because of the small size and inference time, which allow that this model can be employed in offline applications in handheld devices. In the CEXP9 the ResNet-101 is used. This model is deeper, when compared with Xception, MobileNetV2 and EfficientNetB3, which can lead to better feature extraction of images. In the CEXP9 the EfficientNetB3 is used. As well as the MobileNetV2, this model can be easily used in offline applications in handheld devices.
294
+
295
+ To evaluate the experiments CEXP6-CEXP10 it were used F1 score and accuracy metrics and the Grad-CAM, Grad-CAM++ and LIME as XAI approaches. As well as in the previous experiments, a threshold of 10\% of the greatest contribution was applied in a Grad-CAM and also in the Grad-CAM++ heatmaps. The \cite{Woof2019}'s implementation of Grad-CAM++ was used. The XDS was the dataset used for the analisys using the XAI approaches.
296
+
297
+ %fazer parte do LIME, porque agora já não estou com cabeça
298
+
299
+ The \cite{Ribeiro2016}'s implementation of LIME was used. Four parameters were defined, aiming to reduce the LIME's instability to each explainer:
300
+
301
+ \begin{enumerate}
302
+ \item \textit{random\_seed}: the seed for the segmentation algorithm, responsible for generate the image's regions that will be perturbed. It was set in 101.
303
+ \item \textit{num\_samples}: the size of the neighborhood around a region selected by the LIME to learn the linear model used in the explanations. It was set in 1000 (the default value).
304
+ \item \textit{num\_features}: the maximum number of regions present in a explanation. It was set 100000 (the default value).
305
+ \item \textit{distance\_metric}: the distance used to calculate the weights. It was set to the Cosine Distance (the default value), since the size of the vectors that represents the images have fixed size.
306
+ \end{enumerate}
307
+
308
+ On account of the chosen \textit{num\_features}, the selection of the quantity of features used in the experiments was based in the highest product of absolute weight * original data point when learning. This configuration was made using the '\textit{auto}' option in the \textit{feature\_selection } parameter in the \textbf{LimeImageExplainer} object. The regressor used was the Ridge Regression, LIME's default in images explainer. The remaining parameters was kept with the default values.
309
+
310
+
311
+
312
+
313
+
314
+
315
+
316
+
317
+
318
+
319
+
320
+
321
+
322
+
323
+
324
+
325
+
326
+
327
+
328
+
329
+
Disserta/303/247/303/243o/Cap3/cds5-image-distribution.png ADDED
Disserta/303/247/303/243o/Cap3/comparacao_tempo.jpg ADDED
Disserta/303/247/303/243o/Cap3/distribuition.png ADDED
Disserta/303/247/303/243o/Cap3/distribution-ds1.png ADDED
Disserta/303/247/303/243o/Cap3/distribution-ds3.png ADDED
Disserta/303/247/303/243o/Cap3/ds3-examples.jpg ADDED
Disserta/303/247/303/243o/Cap3/example_ds12.jpg ADDED
Disserta/303/247/303/243o/Cap3/fluxograma geral.jpg ADDED
Disserta/303/247/303/243o/Cap3/mascara.jpg ADDED
Disserta/303/247/303/243o/Cap3/segmentation-example.jpg ADDED
Disserta/303/247/303/243o/Cap3/segmented-images.jpg ADDED