2using System.Collections.Generic;
27 bool m_bShareLocations;
29 int m_nBackgroundLabelId;
31 bool m_bVarianceEncodedInTarget;
33 float m_fConfidenceThreshold;
35 float m_fNmsThreshold;
39 bool m_bNeedSave =
false;
40 string m_strOutputDir;
41 string m_strOutputNamePrefix;
43 Dictionary<int, string> m_rgLabelToName =
new Dictionary<int, string>();
44 Dictionary<int, string> m_rgLabelToDisplayName =
new Dictionary<int, string>();
45 List<string> m_rgstrNames =
new List<string>();
46 List<SizeF> m_rgSizes =
new List<SizeF>();
54 float m_fVisualizeThreshold;
73 m_blobBboxPreds =
new Blob<T>(cuda, log);
75 m_blobBboxPermute =
new Blob<T>(cuda, log);
77 m_blobConfPermute =
new Blob<T>(cuda, log);
89 if (m_bboxUtil !=
null)
95 if (m_transformer !=
null)
110 col.
Add(m_blobBboxPreds);
111 col.
Add(m_blobBboxPermute);
112 col.
Add(m_blobConfPermute);
150 m_nNumLocClasses = (m_bShareLocations) ? 1 : m_nNumClasses;
159 m_log.
CHECK_GE(m_fNmsThreshold, 0,
"The nms_threshold must be non negative.");
167 m_bNeedSave = !
string.IsNullOrEmpty(m_strOutputDir);
168 if (m_bNeedSave && !Directory.Exists(m_strOutputDir))
169 Directory.CreateDirectory(m_strOutputDir);
177 if (!File.Exists(strLabelMapFile))
180 m_log.
WriteLine(
"WARNING: Could not find the label_map_file '" + strLabelMapFile +
"'!");
192 catch (Exception excpt)
194 throw new Exception(
"Failed to read label map file!", excpt);
201 catch (Exception excpt)
203 throw new Exception(
"Failed to convert the label to name!", excpt);
210 catch (Exception excpt)
212 throw new Exception(
"Failed to convert the label to display name!", excpt);
224 if (!File.Exists(strNameSizeFile))
227 m_log.
WriteLine(
"WARNING: Could not find the name_size_file '" + strNameSizeFile +
"'!");
232 using (StreamReader sr =
new StreamReader(strNameSizeFile))
238 string strLine = sr.ReadLine();
239 while (strLine !=
null)
241 string[] rgstr = strLine.Split(
' ');
242 if (rgstr.Length != 3 && rgstr.Length != 4)
243 throw new Exception(
"Invalid name_size_file format, expected 'name' 'height' 'width'");
245 int nNameIdx = (rgstr.Length == 4) ? 1 : 0;
246 strName = rgstr[nNameIdx].Trim(
',');
247 nHeight =
int.Parse(rgstr[nNameIdx + 1].Trim(
','));
248 nWidth =
int.Parse(rgstr[nNameIdx + 2].Trim(
','));
250 m_rgstrNames.Add(strName);
251 m_rgSizes.Add(
new SizeF(nWidth, nHeight));
253 strLine = sr.ReadLine();
260 m_nNumTestImage = m_rgstrNames.Count;
262 m_log.
CHECK_LE(m_nNumTestImage, m_rgstrNames.Count,
"The number of test images cannot exceed the number of names.");
286 if (!m_bShareLocations)
301 m_log.
CHECK_LE(m_nNameCount, m_rgstrNames.Count,
"The name count must be <= the number of names.");
303 if (m_nNameCount % m_nNumTestImage == 0)
308 string strDir = m_strOutputDir;
310 foreach (KeyValuePair<int, string> kv
in m_rgLabelToName)
312 if (kv.Key == m_nBackgroundLabelId)
315 string strFile = strDir.TrimEnd(
'\\') +
"\\" + kv.Value +
".txt";
316 if (File.Exists(strFile))
317 File.Delete(strFile);
323 m_log.
CHECK_EQ(colBottom[0].num, colBottom[1].num,
"The bottom[0] and bottom[1] must have the same 'num'.");
327 if (!m_bShareLocations)
332 m_nNumPriors = colBottom[2].height / 4;
333 m_log.
CHECK_EQ(m_nNumPriors * m_nNumLocClasses * 4, colBottom[0].channels,
"The number of priors must match the number of location predictions (bottom[0]).");
334 m_log.
CHECK_EQ(m_nNumPriors * m_nNumClasses, colBottom[1].channels,
"The number of priors must match the number of confidence predictions (bottom[1]).");
347 private string getFileName(
string strLabel,
string strExt)
349 string strFile = m_strOutputDir.TrimEnd(
'\\');
351 strFile += m_strOutputNamePrefix;
372 float[] rgfLocData =
convertF(colBottom[0].mutable_cpu_data);
373 float[] rgfConfData =
convertF(colBottom[1].mutable_cpu_data);
374 float[] rgfPriorData =
convertF(colBottom[2].mutable_cpu_data);
375 int nNum = colBottom[0].num;
378 List<LabelBBox> rgAllLocPreds = m_bboxUtil.
GetLocPredictions(rgfLocData, nNum, m_nNumPriors, m_nNumLocClasses, m_bShareLocations);
381 List<Dictionary<int, List<float>>> rgAllConfScores = m_bboxUtil.
GetConfidenceScores(rgfConfData, nNum, m_nNumPriors, m_nNumClasses);
385 List<List<float>> rgrgPriorVariances;
386 List<NormalizedBBox> rgPriorBboxes = m_bboxUtil.
GetPrior(rgfPriorData, m_nNumPriors, out rgrgPriorVariances);
389 bool bClipBbox =
false;
390 List<LabelBBox> rgAllDecodeBboxes = m_bboxUtil.
DecodeAll(rgAllLocPreds, rgPriorBboxes, rgrgPriorVariances, nNum, m_bShareLocations, m_nNumLocClasses, m_nBackgroundLabelId, m_codeType, m_bVarianceEncodedInTarget, bClipBbox);
393 List<Dictionary<int, List<int>>> rgAllIndices =
new List<Dictionary<int, List<int>>>();
395 for (
int i=0; i < nNum; i++)
397 LabelBBox decode_bboxes = rgAllDecodeBboxes[i];
398 Dictionary<int, List<float>> rgConfScores = rgAllConfScores[i];
399 Dictionary<int, List<int>> rgIndices =
new Dictionary<int, List<int>>();
402 for (
int c = 0; c < m_nNumClasses; c++)
405 if (c == m_nBackgroundLabelId)
409 if (!rgConfScores.ContainsKey(c))
410 m_log.
FAIL(
"Could not find confidence predictions for label '" + c.ToString() +
"'!");
412 List<float> rgfScores = rgConfScores[c];
413 int nLabel = (m_bShareLocations) ? -1 : c;
416 if (!decode_bboxes.
Contains(nLabel))
417 m_log.
FAIL(
"Could not find location predictions for the label '" + nLabel.ToString() +
"'!");
419 List<NormalizedBBox> rgBboxes = decode_bboxes[nLabel];
421 m_bboxUtil.
ApplyNMSFast(rgBboxes, rgfScores, m_fConfidenceThreshold, m_fNmsThreshold, m_fEta, m_nTopK, out rgIndexes);
422 rgIndices[c] = rgIndexes;
423 nNumDet += rgIndices[c].Count;
426 if (m_nKeepTopK > -1 && nNumDet > m_nKeepTopK)
428 List<Tuple<float, Tuple<int, int>>> rgScoreIndexPairs =
new List<Tuple<float, Tuple<int, int>>>();
430 foreach (KeyValuePair<
int, List<int>> kv
in rgIndices)
433 List<int> rgLabelIndices = kv.Value;
436 if (!rgConfScores.ContainsKey(nLabel))
437 m_log.
FAIL(
"Could not find location predictions for label " + nLabel.ToString() +
"!");
439 List<float> rgScores = rgConfScores[nLabel];
440 for (
int j = 0; j < rgLabelIndices.Count; j++)
442 int nIdx = rgLabelIndices[j];
443 m_log.
CHECK_LT(nIdx, rgScores.Count,
"The current index must be less than the number of scores!");
444 rgScoreIndexPairs.Add(
new Tuple<
float, Tuple<int, int>>(rgScores[nIdx],
new Tuple<int, int>(nLabel, nIdx)));
449 rgScoreIndexPairs = rgScoreIndexPairs.OrderByDescending(p => p.Item1).ToList();
450 if (rgScoreIndexPairs.Count > m_nKeepTopK)
451 rgScoreIndexPairs = rgScoreIndexPairs.Take(m_nKeepTopK).ToList();
454 Dictionary<int, List<int>> rgNewIndices =
new Dictionary<int, List<int>>();
455 for (
int j = 0; j < rgScoreIndexPairs.Count; j++)
457 int nLabel = rgScoreIndexPairs[j].Item2.Item1;
458 int nIdx = rgScoreIndexPairs[j].Item2.Item2;
460 if (!rgNewIndices.ContainsKey(nLabel))
461 rgNewIndices.Add(nLabel,
new List<int>());
463 rgNewIndices[nLabel].Add(nIdx);
466 rgAllIndices.Add(rgNewIndices);
467 nNumKept += m_nKeepTopK;
471 rgAllIndices.Add(rgIndices);
477 rgTopShape.Add(nNumKept);
479 float[] rgfTopData =
null;
484 rgTopShape[2] = nNum;
488 rgfTopData =
convertF(colTop[0].mutable_cpu_data);
492 for (
int i = 0; i < nNum; i++)
494 rgfTopData[nOffset + 0] = i;
501 rgfTopData =
convertF(colTop[0].mutable_cpu_data);
505 string strDir = m_strOutputDir;
507 for (
int i = 0; i < nNum; i++)
509 Dictionary<int, List<float>> rgConfScores = rgAllConfScores[i];
510 LabelBBox decode_bboxes = rgAllDecodeBboxes[i];
512 foreach (KeyValuePair<
int, List<int>> kv
in rgAllIndices[i])
517 if (!rgConfScores.ContainsKey(nLabel))
518 m_log.
FAIL(
"Could not find confidence predictions for label '" + nLabel.ToString() +
"'!");
520 List<float> rgfScores = rgConfScores[nLabel];
521 int nLocLabel = (m_bShareLocations) ? -1 : nLabel;
524 if (!decode_bboxes.Contains(nLocLabel))
525 m_log.
FAIL(
"COuld not find location predictions for label '" + nLabel.ToString() +
"'!");
527 List<NormalizedBBox> rgBboxes = decode_bboxes[nLocLabel];
528 List<int> rgIndices = kv.Value;
532 m_log.
CHECK(m_rgLabelToName.ContainsKey(nLabel),
"The label to name mapping does not contain the label '" + nLabel.ToString() +
"'!");
533 m_log.
CHECK_LT(m_nNameCount, m_rgstrNames.Count,
"The name count must be less than the number of names.");
536 for (
int j = 0; j < rgIndices.Count; j++)
538 int nIdx = rgIndices[j];
539 rgfTopData[nCount * 7 + 0] = i;
540 rgfTopData[nCount * 7 + 1] = nLabel;
541 rgfTopData[nCount * 7 + 2] = rgfScores[nIdx];
544 rgfTopData[nCount * 7 + 3] = bbox.
xmin;
545 rgfTopData[nCount * 7 + 4] = bbox.
ymin;
546 rgfTopData[nCount * 7 + 5] = bbox.
xmax;
547 rgfTopData[nCount * 7 + 6] = bbox.
ymax;
553 float fScore = rgfTopData[nCount * 7 + 2];
554 float fXmin = out_bbox.
xmin;
555 float fYmin = out_bbox.
ymin;
556 float fXmax = out_bbox.
xmax;
557 float fYmax = out_bbox.
ymax;
560 pt_xmin.
Put(
"", Math.Round(fXmin * 100) / 100);
563 pt_ymin.
Put(
"", Math.Round(fYmin * 100) / 100);
566 pt_wd.
Put(
"", Math.Round((fXmax - fXmin) * 100) / 100);
569 pt_ht.
Put(
"", Math.Round((fYmax - fYmin) * 100) / 100);
578 cur_det.
Put(
"image_id", m_rgstrNames[m_nNameCount]);
580 cur_det.
Put(
"category_id", nLabel);
582 cur_det.
Put(
"category_id", m_rgLabelToName[nLabel]);
585 cur_det.
Put(
"score", fScore);
598 if (m_nNameCount % m_nNumTestImage == 0)
602 Dictionary<string, StreamWriter> rgOutFiles =
new Dictionary<string, StreamWriter>();
604 for (
int c = 0; c < m_nNumClasses; c++)
606 if (c == m_nBackgroundLabelId)
609 string strLabelName = m_rgLabelToName[c];
610 string strFile = getFileName(strLabelName,
"txt");
611 rgOutFiles.Add(strLabelName,
new StreamWriter(strFile));
616 string strLabel = pt.
Get(
"category_id").
Value;
617 if (!rgOutFiles.ContainsKey(strLabel))
619 m_log.
WriteLine(
"WARNING! Cannot find '" + strLabel +
"' label in the output files!");
623 string strImageName = pt.
Get(
"image_id").
Value;
624 float fScore = (float)pt.
Get(
"score").
Numeric;
626 List<int> bbox =
new List<int>();
632 string strLine = strImageName;
633 strLine +=
" " + fScore.ToString();
634 strLine +=
" " + bbox[0].ToString() +
" " + bbox[1].ToString();
635 strLine +=
" " + (bbox[0] + bbox[2]).ToString();
636 strLine +=
" " + (bbox[1] + bbox[3]).ToString();
637 rgOutFiles[strLabel].WriteLine(strLine);
640 for (
int c = 0; c < m_nNumClasses; c++)
642 if (c == m_nBackgroundLabelId)
645 string strLabel = m_rgLabelToName[c];
646 rgOutFiles[strLabel].Flush();
647 rgOutFiles[strLabel].Close();
648 rgOutFiles[strLabel].Dispose();
653 string strFile = getFileName(
"",
"json");
654 using (StreamWriter sw =
new StreamWriter(strFile))
657 output.
AddChild(
"detections", m_detections);
658 string strOut = output.
ToJson();
664 string strFile = getFileName(
"",
"txt");
665 using (StreamWriter sw =
new StreamWriter(strFile))
669 int nLabel = (int)pt.
Get(
"category_id").
Numeric;
670 string strImageName = pt.
Get(
"image_id").
Value;
671 float fScore = (float)pt.
Get(
"score").
Numeric;
673 List<int> bbox =
new List<int>();
679 string strLine = strImageName;
680 strLine +=
" " + fScore.ToString();
681 strLine +=
" " + bbox[0].ToString() +
" " + bbox[1].ToString();
682 strLine +=
" " + (bbox[0] + bbox[2]).ToString();
683 strLine +=
" " + (bbox[1] + bbox[3]).ToString();
684 sw.WriteLine(strLine);
690 m_detections.
Clear();
696#warning DetectionOutputLayer - does not visualize detections yet.
701 colTop[0].mutable_cpu_data =
convert(rgfTopData);
713 throw new NotImplementedException();
The LabelBBox manages a bounding box used in SSD.
bool Contains(int nLabel)
Returns whether or not the label is contained in the label bounding boxe set.
The Log class provides general output in text form.
void CHECK(bool b, string str)
Test a flag for true.
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
void CHECK_GT(double df1, double df2, string str)
Test whether one number is greater than another.
void CHECK_LE(double df1, double df2, string str)
Test whether one number is less than or equal to another.
void CHECK_GE(double df1, double df2, string str)
Test whether one number is greater than or equal to another.
void CHECK_LT(double df1, double df2, string str)
Test whether one number is less than another.
The NormalizedBBox manages a bounding box used in SSD.
float ymax
Get/set the y maximum.
float xmax
Get/set the x maximum.
float xmin
Get/set the x minimum.
float ymin
Get/set the y minimum.
The RawProto class is used to parse and output Google prototxt file data.
static RawProto FromFile(string strFileName)
Parses a prototxt from a file and returns it as a RawProto.
The Utility class provides general utility funtions.
static List< int > Create(int nCount, int nStart, int nInc)
Create a new List and fill it with values starting with start and incrementing by inc.
The BBox class processes the NormalizedBBox data used with SSD.
List< NormalizedBBox > GetPrior(float[] rgPriorData, int nNumPriors, out List< List< float > > rgPriorVariances)
Get the prior boundary boxes from the rgPriorData.
List< LabelBBox > GetLocPredictions(float[] rgLocData, int nNum, int nNumPredsPerClass, int nNumLocClasses, bool bShareLocation)
Create a set of local predictions from the rgLocData.
void Dispose()
Clean up all resources.
List< Dictionary< int, List< float > > > GetConfidenceScores(float[] rgConfData, int nNum, int nNumPredsPerClass, int nNumClasses)
Calculate the confidence predictions from rgConfData.
List< LabelBBox > DecodeAll(List< LabelBBox > rgAllLocPreds, List< NormalizedBBox > rgPriorBboxes, List< List< float > > rgrgfPrioVariances, int nNum, bool bShareLocation, int nNumLocClasses, int nBackgroundLabelId, PriorBoxParameter.CodeType codeType, bool bVarianceEncodedInTarget, bool bClip)
Decode all bboxes in a batch.
void ApplyNMSFast(List< NormalizedBBox > rgBBoxes, List< float > rgScores, float fScoreThreshold, float fNmsThreshold, float fEta, int nTopK, out List< int > rgIndices)
Do a fast non maximum supression given bboxes and scores.
NormalizedBBox Output(NormalizedBBox bbox, SizeF szImg, ResizeParameter p)
Output the predicted bbox on the actual image.
The BlobCollection contains a list of Blobs.
void Add(Blob< T > b)
Add a new Blob to the collection.
void SetData(double df)
Set all blob data to the value specified.
int Count
Returns the number of items in the collection.
void Reshape(int[] rgShape)
Reshapes all blobs in the collection to the given shape.
The Blob is the main holder of data that moves through the Layers of the Net.
void ReshapeLike(Blob< T > b, bool? bUseHalfSize=null)
Reshape this Blob to have the same shape as another Blob.
string Name
Get/set the name of the Blob.
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
The Property class stores both a numeric and text value.
double? Numeric
Returns the numeric value.
string Value
Returns the text value.
The PropertyTree class implements a simple property tree similar to the ptree in Boost.
void Clear()
Clear all nodes and values from the tree.
List< PropertyTree > Children
Returns a list of all child property trees within the tree.
void Put(string str, string strVal)
Add a new property string value.
void AddChild(string str, PropertyTree pt)
Add a new child to the Property tree.
string ToJson()
Converts the property tree to a Json representation.
Property Get(string strName)
Retrieves a property at the current level of the tree.
List< Property > GetChildren(string strName)
Retrieves all properties with the given key at the current level of the tree.
An interface for the units of computation which can be composed into a Net.
Log m_log
Specifies the Log for output.
LayerParameter m_param
Specifies the LayerParameter describing the Layer.
void convert(BlobCollection< T > col)
Convert a collection of blobs from / to half size.
float convertF(T df)
Converts a generic to a float value.
Phase m_phase
Specifies the Phase under which the Layer is run.
CudaDnn< T > m_cuda
Specifies the CudaDnn connection to Cuda.
LayerParameter.LayerType m_type
Specifies the Layer type.
The DetectionOutputLayer generates the detection output based on location and confidence predictions ...
override int MaxBottomBlobs
Returns the maximum number of bottom (input) Blobs: loc pred, conf pred, prior bbox
override void LayerSetUp(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Setup the layer.
override void backward(BlobCollection< T > colTop, List< bool > rgbPropagateDown, BlobCollection< T > colBottom)
Does not implement.
override void forward(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Do non-maximum suppression (nms) on prediction results.
override void setup_internal_blobs(BlobCollection< T > col)
Derivative layers should add all internal blobws to the 'col' provided.
override int MinBottomBlobs
Returns the minimum number of bottom (input) Blobs: loc pred, conf pred, prior bbox
override void Reshape(BlobCollection< T > colBottom, BlobCollection< T > colTop)
Reshape the bottom (input) and top (output) blobs.
override int ExactNumTopBlobs
Returns the exact number of required top (output) Blobs: det
DetectionOutputLayer(CudaDnn< T > cuda, Log log, LayerParameter p)
The DetectionOutputLayer constructor.
override void dispose()
Releases all GPU and host resources used by the Layer.
Specifies the base parameter for all layers.
string name
Specifies the name of this LayerParameter.
DetectionOutputParameter detection_output_param
Returns the parmeter set when initialized with LayerType.DETECTION_OUTPUT
TransformationParameter transform_param
Returns the parameter set when initialized with LayerType.TRANSFORM
LayerType
Specifies the layer type.
bool Active
When active, the parameter is used, otherwise it is ignored.
PriorBoxParameter.CodeType code_type
Specifies the coding method for the bbox.
bool visualize
Specifies whether or not to visualize the detection results.
float? confidence_threshold
Specifies the threshold for deciding which detections to consider - only those which are larger than ...
float? visualize_threshold
Specifies the theshold used to visualize detection results.
NonMaximumSuppressionParameter nms_param
Specifies the parameter used for non maximum suppression.
bool variance_encoded_in_target
Specifies whether or not the variance is encoded in the target; otherwise we need to adjust the predi...
string save_file
When provided, specifies the outputs to the video file.
bool share_location
Specifies whether or not to sare the bounding box is shared among different classes (default = true).
uint num_classes
Specifies the number of classes that are actually predicted - required!
SaveOutputParameter save_output_param
Specifies the parameter used for saving the detection results.
int background_label_id
Specifies the background class.
int keep_top_k
Specifies the number of total bboxes to be kept per image after nms step, -1 means keeping all bboxes...
Specifies the LabelMap used with SSD.
Dictionary< int, string > MapToName(Log log, bool bStrict, bool bDisplayName)
Map the labels into a dictionary.
static LabelMap FromProto(RawProto rp)
Parses the parameter from a RawProto.
float eta
Get/set the parameter for adaptive nms.
int? top_k
Get/set the maximum number of results kept.
float nms_threshold
Get/set the threshold to be used in nms.
Specifies the parameters for the PriorBoxParameter.
CodeType
Defines the encode/decode type.
Specifies the parameters for the ResizeParameter for use with SSD.
Specifies the parameters for the SaveOutputLayer.
OUTPUT_FORMAT output_format
Specifies the output format.
string output_directory
Specifies the output directory - if not empty, the results will be saved.
string name_size_file
Optionally, specifies the output name size file.
ResizeParameter resize_param
Specifies the resize parameter used in saving the data.
string label_map_file
Optionally, specifies the output label map file.
OUTPUT_FORMAT
Defines the output format.
string output_name_prefix
Specifies the output name prefix.
uint? num_test_image
Specifies the number of test images.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
The MyCaffe.common namespace contains common MyCaffe classes.
BLOB_TYPE
Defines the tpe of data held by a given Blob.
The MyCaffe.data namespace contains dataset creators used to create common testing datasets such as M...
The MyCaffe.fillers namespace contains all fillers including the Filler class.
The MyCaffe.layers.ssd namespace contains all Single-Shot MultiBox (SSD) related layers.
The MyCaffe.param.ssd namespace contains all SSD related parameter objects that correspond to the nat...
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...