5using System.Collections.Generic;
8using System.Threading.Tasks;
18 List<int> m_rgGpuID =
new List<int>();
20 int m_nAccumBatchSize;
21 int m_nBatchSizePerDevice;
24 double m_dfLrMult = 1;
25 bool m_bUseBatchNorm =
false;
26 int m_nNumTestImage = 4952;
27 int m_nTestBatchSize = 8;
29 string m_strTrainDataSource =
"VOC0712.training";
30 string m_strTestDataSource =
"VOC0712.testing";
31 string m_strNameSizeFile =
"data\\ssd\\VOC0712\\test_name_size.txt";
32 string m_strLabelMapFile =
"data\\ssd\\VOC0712\\labelmap_voc.prototxt";
33 string m_strPreTrainModel =
"models\\VGGNet\\VGG_ILSVRC_16_layers_fc_reduced.caffemodel";
40 int m_nResizeWidth = 300;
41 int m_nResizeHeight = 300;
42 List<BatchSampler> m_rgBatchSampler =
new List<BatchSampler>();
49 List<float> m_rgPriorVariance;
52 int m_nNumClasses = 21;
53 bool m_bShareLocation =
true;
54 int m_nBackgroundLabelId = 0;
55 double m_dfNegPosRatio = 3.0;
57 List<MultiBoxHeadInfo> m_rgMultiBoxInfo;
71 : base(strBaseDirectory, net)
76 m_rgGpuID =
new List<int>(rgGpuId);
78 m_strJob =
"SSD_" + m_nResizeWidth.ToString() +
"x" + m_nResizeHeight.ToString();
80 m_strModel =
"VGG_VOC0712_" + m_strJob;
82 m_bUseBatchNorm = bUseBatchNorm;
83 m_normalizationMode = normMode;
85 m_nBatchSize = nBatchSize;
86 m_nAccumBatchSize = nAccumBatchSize;
87 m_nIterSize = m_nAccumBatchSize / m_nBatchSize;
89 m_nBatchSizePerDevice = (m_rgGpuID.Count == 1) ? m_nBatchSize : m_nBatchSize / m_rgGpuID.Count;
90 m_nIterSize = (
int)Math.Ceiling((
float)m_nAccumBatchSize / (m_nBatchSizePerDevice * m_rgGpuID.Count));
91 m_nGpuID = m_rgGpuID[0];
94 m_dfLocWeight = (m_dfNegPosRatio + 1.0) / 4.0;
95 m_dfBaseLr = (m_bUseBatchNorm) ? 0.0004 : 0.00004;
97 switch (m_normalizationMode)
100 m_dfBaseLr /= m_nBatchSizePerDevice;
104 m_dfBaseLr *= 25.0 / m_dfLocWeight;
115 m_nTestIter = (int)Math.Ceiling((
float)m_nNumTestImage / (float)m_nTestBatchSize);
121 m_transformTrain.
mirror =
true;
171 m_rgBatchSampler.Add(sampler);
173 sampler = createSampler(50, 1, 0.3f, 1.0f, 0.5f, 2.0f, 0.1f);
174 m_rgBatchSampler.Add(sampler);
176 sampler = createSampler(50, 1, 0.3f, 1.0f, 0.5f, 2.0f, 0.3f);
177 m_rgBatchSampler.Add(sampler);
179 sampler = createSampler(50, 1, 0.3f, 1.0f, 0.5f, 2.0f, 0.5f);
180 m_rgBatchSampler.Add(sampler);
182 sampler = createSampler(50, 1, 0.3f, 1.0f, 0.5f, 2.0f, 0.7f);
183 m_rgBatchSampler.Add(sampler);
185 sampler = createSampler(50, 1, 0.3f, 1.0f, 0.5f, 2.0f, 0.9f);
186 m_rgBatchSampler.Add(sampler);
188 sampler = createSampler(50, 1, 0.3f, 1.0f, 0.5f, 2.0f,
null, 1.0f);
189 m_rgBatchSampler.Add(sampler);
213 m_rgPriorVariance =
new List<float>() { 0.1f, 0.1f, 0.2f, 0.2f };
215 m_rgPriorVariance =
new List<float>() { 0.1f };
256 List<string> rgstrMboxSourceLayers =
new List<string>() {
"conv4_3",
"fc7",
"conv6_2",
"conv7_2",
"conv8_2",
"conv9_2" };
257 List<double> rgAspectWid =
new List<double>() { 2, 2, 2, 2, 2, 2 };
258 List<double> rgAspectHt =
new List<double>() { 2, 3, 3, 3, 2, 2 };
260 List<double> rgNormalization =
new List<double>() { 20, -1, -1, -1, -1, -1 };
261 List<double> rgStepsW =
new List<double>() { 8, 16, 32, 64, 100, 300 };
262 List<double> rgStepsH =
new List<double>() { 8, 16, 32, 64, 100, 300 };
265 double dfMinRatio = 20;
266 double dfMaxRatio = 90;
267 double dfRatioStep = (int)Math.Floor((dfMaxRatio - dfMinRatio) / (rgstrMboxSourceLayers.Count - 2));
268 List<double> rgMinSizes =
new List<double>();
269 List<double> rgMaxSizes =
new List<double>();
271 for (
double dfRatio = dfMinRatio; dfRatio < dfMaxRatio + 1; dfRatio += dfRatioStep)
273 rgMinSizes.Add(nMinDim * dfRatio / 100.0);
274 rgMaxSizes.Add(nMinDim * (dfRatio + dfRatioStep) / 100.0);
277 rgMinSizes.Insert(0, nMinDim * 10 / 100.0);
278 rgMaxSizes.Insert(0, nMinDim * 20 / 100.0);
280 m_rgMultiBoxInfo =
new List<MultiBoxHeadInfo>();
282 for (
int i = 0; i < rgstrMboxSourceLayers.Count; i++)
284 string strSrc = rgstrMboxSourceLayers[i];
285 double dfMinSize = rgMinSizes[i];
286 double dfMaxSize = rgMaxSizes[i];
287 double dfStepW = rgStepsW[i];
288 double dfStepH = rgStepsH[i];
289 double dfAspectW = rgAspectWid[i];
290 double dfAspectH = rgAspectHt[i];
291 double dfNorm = rgNormalization[i];
293 m_rgMultiBoxInfo.Add(
new MultiBoxHeadInfo(strSrc, dfMinSize, dfMaxSize, dfStepW, dfStepH, dfAspectW, dfAspectH, dfNorm,
null));
297 private BatchSampler createSampler(
int nMaxTrials,
int nMaxSample,
float fMinScale = 1.0f,
float fMaxScale = 1.0f,
float fMinAspectRatio = 1.0f,
float fMaxAspectRatio = 1.0f,
float? fMinJaccardOverlap =
null,
float? fMaxJaccardOverlap =
null)
307 if (fMinJaccardOverlap.HasValue)
310 if (fMaxJaccardOverlap.HasValue)
357 string strLabelMapFile =
getFileName(m_strLabelMapFile,
null);
370 List<LayerParameter> rgMboxLayers =
createMultiBoxHead(data, m_nNumClasses, m_rgMultiBoxInfo, m_rgPriorVariance,
false, m_bUseBatchNorm, m_dfLrMult,
true, 0, 0, m_bShareLocation, m_bFlip, m_bClip, 0.5, 3, 1);
375 string strName =
"mbox_loss";
377 mbox_loss.
name = strName;
381 mbox_loss.
propagate_down =
new List<bool>() {
true,
true,
false,
false };
382 mbox_loss.
top.Add(mbox_loss.
name);
391 string strConfName =
"mbox_conf";
396 string strReshapeName = strConfName +
"_reshape";
398 reshape.
name = strReshapeName;
408 string strSoftmaxName = strConfName +
"_softmax";
410 softmax.
name = strSoftmaxName;
420 string strFlattentName = strConfName +
"_flatten";
422 flatten.
name = strFlattentName;
432 rgMboxLayers[1] = lastLayer;
436 string strSigmoidName = strConfName +
"_sigmoid";
438 sigmoid.
name = strSigmoidName;
447 rgMboxLayers[1] = lastLayer;
451 detectionOut.
name =
"detection_output";
452 detectionOut.
top.Add(detectionOut.
name);
464 detectionEval.
name =
"detection_eval";
465 detectionEval.
top.Add(detectionEval.
name);
490 bool bUseRelU =
true;
496 strOutLayer =
"conv6_1";
497 lastLayer =
addConvBNLayer(strFromLayer, strOutLayer, bUseBatchNorm, bUseRelU, 256, 1, 0, 1, dfLrMult);
498 strFromLayer = strOutLayer;
500 strOutLayer =
"conv6_2";
501 lastLayer =
addConvBNLayer(strFromLayer, strOutLayer, bUseBatchNorm, bUseRelU, 512, 3, 1, 2, dfLrMult);
502 strFromLayer = strOutLayer;
505 strOutLayer =
"conv7_1";
506 lastLayer =
addConvBNLayer(strFromLayer, strOutLayer, bUseBatchNorm, bUseRelU, 128, 1, 0, 1, dfLrMult);
507 strFromLayer = strOutLayer;
509 strOutLayer =
"conv7_2";
510 lastLayer =
addConvBNLayer(strFromLayer, strOutLayer, bUseBatchNorm, bUseRelU, 256, 3, 1, 2, dfLrMult);
511 strFromLayer = strOutLayer;
514 strOutLayer =
"conv8_1";
515 lastLayer =
addConvBNLayer(strFromLayer, strOutLayer, bUseBatchNorm, bUseRelU, 128, 1, 0, 1, dfLrMult);
516 strFromLayer = strOutLayer;
518 strOutLayer =
"conv8_2";
519 lastLayer =
addConvBNLayer(strFromLayer, strOutLayer, bUseBatchNorm, bUseRelU, 256, 3, 0, 1, dfLrMult);
520 strFromLayer = strOutLayer;
523 strOutLayer =
"conv9_1";
524 lastLayer =
addConvBNLayer(strFromLayer, strOutLayer, bUseBatchNorm, bUseRelU, 128, 1, 0, 1, dfLrMult);
525 strFromLayer = strOutLayer;
527 strOutLayer =
"conv9_2";
528 lastLayer =
addConvBNLayer(strFromLayer, strOutLayer, bUseBatchNorm, bUseRelU, 256, 3, 0, 1, dfLrMult);
529 strFromLayer = strOutLayer;
The SimpleDatum class holds a data input within host memory.
ANNOTATION_TYPE
Specifies the annotation type when using annotations.
The ModelBuilder is an abstract class that is overridden by a base class used to programically build ...
LayerParameter addVGGNetBody(LayerParameter lastLayer, bool bNeedFc=true, bool bFullConv=true, bool bReduced=true, bool bDilated=true, bool bNoPool=false, bool bDropout=false, List< string > rgstrFreezeLayers=null, bool bDilatePool4=false)
Adds the full VGG body to the network, connecting it to the 'lastLayer'.
SolverParameter m_solver
Specifies the base solver to use.
LayerParameter addAnnotatedDataLayer(string strSource, Phase phase, int nBatchSize=32, bool bOutputLabel=true, string strLabelMapFile="", SimpleDatum.ANNOTATION_TYPE anno_type=SimpleDatum.ANNOTATION_TYPE.NONE, TransformationParameter transform=null, List< BatchSampler > rgSampler=null)
Add the Annotated Data layer.
NetParameter m_net
Specifies the base net to be altered.
List< LayerParameter > createMultiBoxHead(LayerParameter data, int nNumClasses, List< MultiBoxHeadInfo > rgInfo, List< float > rgPriorVariance, bool bUseObjectness=false, bool bUseBatchNorm=true, double dfLrMult=1.0, bool useScale=true, int nImageHt=0, int nImageWd=0, bool bShareLocation=true, bool bFlip=true, bool bClip=true, double dfOffset=0.5, int nKernelSize=1, int nPad=0, string strConfPostfix="", string strLocPostfix="")
Create the multi-box head layers.
LayerParameter addConvBNLayer(string strInputLayer, string strOutputLayer, bool bUseBatchNorm, bool bUseRelU, int nNumOutput, int nKernelSize, int nPad, int nStride, double dfLrMult=1.0, int nDilation=1, SCALE_BIAS useScale=SCALE_BIAS.SCALE, string strConvPrefix="", string strConvPostfix="", string strBnPrefix="", string strBnPostfix="_bn", string strScalePrefix="", string strScalePostFix="_scale", string strBiasPrefix="", string strBiasPostfix="_bias", bool bNamedParams=false, string strLayerPostfix="", Phase phaseExclude=Phase.NONE)
Add convolution, batch-norm layers.
string m_strBaseDir
Specifies the base directory that contains the data and models.
NetParameter createNet(string strName)
Create the base network parameter for the model and set its name to the 'm_strModel' name.
LayerParameter findLayer(string strName)
Find a layer with a given name.
LayerParameter connectAndAddLayer(string fromLayer, LayerParameter toLayer, string fromLayer2=null)
Connect the from layer to the 'to' layer.
string getFileName(string strFile, string strSubDir)
Returns the full path of the filename using the base directory original set when creating the ModelBu...
The MultiBoxHeadInfo contains information used to build the multi-box head of layers.
The SsdPascalModelBuilder adds the extra layers to a 'base' model for the Pascal model used with SSD.
override SolverParameter CreateSolver()
Create the base solver to use.
override NetParameter CreateDeployModel()
Create the testing SSD model for the pascal dataset.
SsdPascalModelBuilder(string strBaseDirectory, int nBatchSize=32, int nAccumBatchSize=32, List< int > rgGpuId=null, bool bUseBatchNorm=false, LossParameter.NormalizationMode normMode=LossParameter.NormalizationMode.VALID, NetParameter net=null)
The constructor.
override LayerParameter addExtraLayers(bool bUseBatchNorm=true, double dfLrMult=1)
Add extra layers (for SSD with the Pascal dataset) on top of a 'base' network (e.g....
override NetParameter CreateModel(bool bDeploy=false)
Create the training SSD model for the pascal dataset.
Specifies the shape of a Blob.
int axis
Specifies the first axis to flatten: all preceding axes are retained in the output....
Specifies the base parameter for all layers.
string name
Specifies the name of this LayerParameter.
DetectionOutputParameter detection_output_param
Returns the parmeter set when initialized with LayerType.DETECTION_OUTPUT
MultiBoxLossParameter multiboxloss_param
Returns the parameter set when initializing with LayerType.MULTIBOX_LOSS
SoftmaxParameter softmax_param
Returns the parameter set when initialized with LayerType.SOFTMAX
List< bool > propagate_down
Specifies whether or not the LayerParameter (or protions of) should be backpropagated.
List< NetStateRule > include
Specifies the NetStateRule's for which this LayerParameter should be included.
List< string > top
Specifies the active top connections (in the bottom, out the top)
ReshapeParameter reshape_param
Returns the parameter set when initialized with LayerType.RESHAPE
DetectionEvaluateParameter detection_evaluate_param
Returns the parmeter set when initialized with LayerType.DETECTION_EVALUATE
FlattenParameter flatten_param
Returns the parameter set when initialized with LayerType.FLATTEN
List< string > bottom
Specifies the active bottom connections (in the bottom, out the top).
LayerType
Specifies the layer type.
LossParameter loss_param
Returns the parameter set when initialized with LayerType.LOSS
Stores the parameters used by loss layers.
NormalizationMode
How to normalize the loss for loss layers that aggregate across batches, spatial dimensions,...
NormalizationMode? normalization
Specifies the normalization mode (default = VALID).
Specifies the parameters use to create a Net
List< LayerParameter > layer
The layers that make up the net. Each of their configurations, including connectivity and behavior,...
Specifies a NetStateRule used to determine whether a Net falls within a given include or exclude patt...
BlobShape shape
Specifies the output dimensions.
int axis
The axis along which to perform the softmax – may be negative to index from the end (e....
The SolverParameter is a parameter for the solver, specifying the train and test networks.
int max_iter
The maximum number of iterations.
List< int > test_iter
The number of iterations for each test.
bool debug_info
If true, print information about the state of the net that may help with debugging learning problems.
LearningRatePolicyType
Defines the learning rate policy to use.
SolverType
Defines the type of solver.
LearningRatePolicyType LearningRatePolicy
The learning rate decay policy.
int device_id
The device id that will be used when run on the GPU.
ApVersion ap_version
Specifies the AP Version to use for average precision when using Single-Shot Detection (SSD) - (defau...
int average_loss
Display the loss averaged over the last average_loss iterations.
int test_interval
The number of iterations between two testing phases.
int iter_size
Accumulate gradients over 'iter_size' x 'batch_size' instances.
EvaluationType
Defines the evaluation method used in the SSD algorithm.
double gamma
Specifies the 'gamma' parameter to compute the 'step', 'exp', 'inv', and 'sigmoid' learning policy (d...
bool snapshot_after_train
If false, don't save a snapshot after training finishes.
EvaluationType eval_type
Specifies the evaluation type to use when using Single-Shot Detection (SSD) - (default = NONE,...
bool test_initialization
If true, run an initial test pass before the first iteration, ensuring memory availability and printi...
int display
The number of iterations between displaying info. If display = 0, no info will be displayed.
double weight_decay
Specifies the weight decay (default = 0.0005).
List< int > stepvalue
The step values for learning rate policy 'multistep'.
double momentum
Specifies the momentum value - used by all solvers EXCEPT the 'AdaGrad' and 'RMSProp' solvers....
int snapshot
Specifies the snapshot interval.
double base_lr
The base learning rate (default = 0.01).
SolverType type
Specifies the solver type.
Specifies a sample of batch of bboxes with provided constraints in SSD.
SamplerConstraint sample_constraint
Get/set the sample constraint.
Sampler sampler
Specifies the constraints for sampling the bbox
uint max_trials
Maximum number of trials for sampling to avoid an infinite loop.
uint max_sample
If provided (greater than zero), break when found certain number of samples satisfying the sample con...
Specifies the parameters for the DetectionEvaluateLayer.
uint background_label_id
Specifies the background class.
bool evaulte_difficult_gt
Specifies whether or not to consider the ground truth for evaluation.
string name_size_file
Specifies the file which contains a list of names and sizes in the same order of the input database....
uint num_classes
Specifies the number of classes that are actually predicted - required!
float overlap_threshold
Specifies the threshold for deciding true/false positive.
Specifies the parameters for the DetectionOutputLayer.
PriorBoxParameter.CodeType code_type
Specifies the coding method for the bbox.
float? confidence_threshold
Specifies the threshold for deciding which detections to consider - only those which are larger than ...
NonMaximumSuppressionParameter nms_param
Specifies the parameter used for non maximum suppression.
bool share_location
Specifies whether or not to sare the bounding box is shared among different classes (default = true).
uint num_classes
Specifies the number of classes that are actually predicted - required!
SaveOutputParameter save_output_param
Specifies the parameter used for saving the detection results.
int background_label_id
Specifies the background class.
int keep_top_k
Specifies the number of total bboxes to be kept per image after nms step, -1 means keeping all bboxes...
Specifies the parameters for the DistortionParameter used with SSD.
float contrast_lower
Get/set lower bound for random contrast factor (default = 0.5).
float random_order_prob
Get/set the probability of randomly ordering the image channels (default = 0).
float brightness_delta
Get/set amount to add to the pixel values within [-delta,delta] (default = 0)
float saturation_prob
Get/set probability of adjusting the saturation (default = 0).
float saturation_lower
Get/set lower bound for random saturation factor (default = 0.5).
float saturation_upper
Get/set upper bound for random saturation factor (default = 1.5).
float brightness_prob
Get/set probability of adjusting the brightness (default = 0).
float contrast_prob
Get/set probability of adjusting the contrast (default = 0).
float contrast_upper
Get/set upper bound for random contrast factor (default = 1.5).
Specifies the parameters for the EmitConstraint used with SSD.
EmitType emit_type
Get/set the emit type.
EmitType
Specifies the emit type.
Specifies the parameters for the ExpansionParameter used with SSD.
float max_expand_ratio
Get/set the ratio to expand the image.
float prob
Get/set probability of using this expansion policy.
Specifies the parameters for the MultiBoxLossParameter.
float loc_weight
Get/set the weight for the localization loss (default = 1.0).
float overlap_threshold
Get/set the overlap threshold (default = 0.5).
bool use_difficult_gt
Get/set whether or not to consider the difficult ground truth (defalt = true).
MatchType
Defines the matching method used during training.
PriorBoxParameter.CodeType code_type
Get/set the coding method for the bounding box.
float neg_overlap
Get/set the negative overlap upperbound for the unmatched predictions (default = 0....
LocLossType
Defines the localization loss types.
float neg_pos_ratio
Get/set the negative/positive ratio (default = 3.0).
bool share_location
Get/sets whether or not the bounding box is shared among different classes (default = true).
MiningType
Defines the mining type used during training.
LocLossType loc_loss_type
Get/set the localization loss type (default = SMOOTH_L1).
uint background_label_id
Get/set the background label id.
ConfLossType
Defines the confidence loss types.
bool ignore_cross_boundary_bbox
Get/set whether or not to ignore cross boundary bbox during matching (default = false)....
ConfLossType conf_loss_type
Get/set the confidence loss type (default = SOFTMAX).
bool use_prior_for_matching
Get/set whether or not to use prior for matching.
uint num_classes
Get/set the number of classes to be predicted - required!
MiningType mining_type
Get/set the mining type used during training (default = MAX_NEGATIVE).
MatchType match_type
Get/set the matching method used during training (default = PER_PREDICTION).
Specifies the parameters for the NonMaximumSuppressionParameter used with SSD.
int? top_k
Get/set the maximum number of results kept.
float nms_threshold
Get/set the threshold to be used in nms.
Specifies the parameters for the PriorBoxParameter.
CodeType
Defines the encode/decode type.
Specifies the parameters for the ResizeParameter for use with SSD.
InterpMode
Defines the interpolation mode.
uint height
Get/set the resizing height.
List< InterpMode > interp_mode
Get/set the interp mode which is repeated once for all channels, or provided once per channel.
uint width
Get/set the resizing width.
ResizeMode
Defines the resizing mode.
ResizeMode resize_mode
Get/set the resizing mode.
float prob
Get/set probability of using this resize policy.
float? max_jaccard_overlap
Get/set the maximum Jaccard overlap between sampled bbox and all boxes in AnnotationGroup.
float? min_jaccard_overlap
Get/set the minimum Jaccard overlap between sampled bbox and all boxes in AnnotationGroup.
float max_scale
Get/set the maximum scale of the sampled bbox.
float max_aspect_ratio
Get/set the maximum aspect ratio of the sampled bbox.
float min_scale
Get/set the minimum scale of the sampled bbox.
float min_aspect_ratio
Get/set the minimum aspect ratio of the sampled bbox.
Specifies the parameters for the SaveOutputLayer.
string output_directory
Specifies the output directory - if not empty, the results will be saved.
string name_size_file
Optionally, specifies the output name size file.
string label_map_file
Optionally, specifies the output label map file.
string output_name_prefix
Specifies the output name prefix.
uint? num_test_image
Specifies the number of test images.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Phase
Defines the Phase under which to run a Net.
ApVersion
Defines the different way of computing average precision.
The MyCaffe.model namespace contains all classes used to programically create new model scripts.
The MyCaffe.param.ssd namespace contains all SSD related parameter objects that correspond to the nat...
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...