5using System.Collections.Generic;
10using System.Threading.Tasks;
37 m_blobDiff =
new Blob<T>(cuda, log);
45 if (m_blobDiff !=
null)
69 public float ComputeAP(List<Tuple<float, int>> rgTp,
int nNumPos, List<Tuple<float, int>> rgFp,
ApVersion apVersion, out List<float> rgPrec, out List<float> rgRec)
72 int nNum = rgTp.Count;
75 for (
int i = 0; i < nNum; i++)
77 m_log.
CHECK_LE(Math.Abs(rgTp[i].Item1 - rgFp[i].Item1), fEps,
"The Tp[i] - Fp[i] is less than the threshold " + fEps.ToString());
78 m_log.
CHECK_EQ(rgTp[i].Item2, 1 - rgFp[i].Item2,
"The Tp[i].second should be one less than Fp[i].second!");
81 rgPrec =
new List<float>();
82 rgRec =
new List<float>();
85 if (rgTp.Count == 0 || nNumPos == 0)
89 List<int> rgTpCumSum =
CumSum(rgTp);
90 m_log.
CHECK_EQ(rgTpCumSum.Count, nNum,
"The tp cumulative sum should equal the number of rgTp items (" + nNum.ToString() +
")");
93 List<int> rgFpCumSum =
CumSum(rgFp);
94 m_log.
CHECK_EQ(rgFpCumSum.Count, nNum,
"The fp cumulative sum should equal the number of rgFp items (" + nNum.ToString() +
")");
97 for (
int i = 0; i < nNum; i++)
99 rgPrec.Add((
float)rgTpCumSum[i] / (
float)(rgTpCumSum[i] + rgFpCumSum[i]));
103 for (
int i = 0; i < nNum; i++)
105 m_log.
CHECK_LE(rgTpCumSum[i], nNumPos,
"The Tp cumulative sum must be less than the num pos of " + nNumPos.ToString());
106 rgRec.Add((
float)rgTpCumSum[i] / nNumPos);
115 int nStartIdx = nNum - 1;
117 for (
int j = 10; j >= 0; j--)
119 for (
int i = nStartIdx; i >= 0; i--)
121 if (rgRec[i] < j / 10.0f)
125 rgMaxPrec[j - 1] = rgMaxPrec[j];
130 if (rgMaxPrec[j] < rgPrec[i])
131 rgMaxPrec[j] = rgPrec[i];
135 for (
int j = 10; j >= 0; j--)
137 fAp += rgMaxPrec[j] / 11.0f;
145 float fCurRec = rgRec.Last();
146 float fCurPrec = rgPrec.Last();
148 for (
int i = nNum - 2; i >= 0; i--)
150 fCurPrec = Math.Max(rgPrec[i], fCurPrec);
151 float fAbsRec = Math.Abs(fCurRec - rgRec[i]);
153 fAp += fCurPrec * fAbsRec;
156 fAp += fCurRec * fCurPrec;
163 float fPrevRec = 0.0f;
164 for (
int i = 0; i < nNum; i++)
166 float fAbsRec = Math.Abs(rgRec[i] - fPrevRec);
168 fAp += rgPrec[i] * fAbsRec;
175 m_log.
FAIL(
"Unknown ap version '" + apVersion.ToString() +
"'!");
187 public List<int>
CumSum(List<Tuple<float, int>> rgPairs)
190 List<Tuple<float, int>> rgSortPairs = rgPairs.OrderByDescending(p => p.Item1).ToList();
191 List<int> rgCumSum =
new List<int>();
193 for (
int i = 0; i < rgSortPairs.Count; i++)
196 rgCumSum.Add(rgSortPairs[i].Item2);
198 rgCumSum.Add(rgCumSum.Last() + rgSortPairs[i].Item2);
211 List<Tuple<float, int>> GetTopKScoreIndex(List<float> rgScores, List<int> rgIdx,
int nTopK)
213 List<Tuple<float, int>> rgItems =
new List<Tuple<float, int>>();
215 for (
int i = 0; i < rgScores.Count; i++)
217 rgItems.Add(
new Tuple<float, int>(rgScores[i], rgIdx[i]));
220 rgItems = rgItems.OrderByDescending(p => p.Item1).ToList();
222 if (nTopK > -1 && nTopK < rgItems.Count)
223 rgItems = rgItems.Take(nTopK).ToList();
235 List<Tuple<float, int>> GetMaxScoreIndex(List<float> rgScores,
float fThreshold,
int nTopK)
237 List<Tuple<float, int>> rgItems =
new List<Tuple<float, int>>();
239 for (
int i = 0; i < rgScores.Count; i++)
241 if (rgScores[i] > fThreshold)
242 rgItems.Add(
new Tuple<float, int>(rgScores[i], i));
245 rgItems = rgItems.OrderByDescending(p => p.Item1).ToList();
247 if (nTopK > -1 && nTopK < rgItems.Count)
248 rgItems = rgItems.Take(nTopK).ToList();
263 public void ApplyNMSFast(List<NormalizedBBox> rgBBoxes, List<float> rgScores,
float fScoreThreshold,
float fNmsThreshold,
float fEta,
int nTopK, out List<int> rgIndices)
265 rgIndices =
new List<int>();
268 m_log.
CHECK_EQ(rgBBoxes.Count, rgScores.Count,
"The number of BBoxes and scores must be the same.");
270 List<Tuple<float, int>> rgScoresIndex = GetMaxScoreIndex(rgScores, fScoreThreshold, nTopK);
273 float fAdaptiveThreshold = fNmsThreshold;
275 while (rgScoresIndex.Count > 0)
277 int nIdx = rgScoresIndex[0].Item2;
280 for (
int k = 0; k < rgIndices.Count; k++)
285 int nKeptIdx = rgIndices[k];
286 float fOverlap =
JaccardOverlap(rgBBoxes[nIdx], rgBBoxes[nKeptIdx]);
288 if (fOverlap <= fAdaptiveThreshold)
297 rgScoresIndex.RemoveAt(0);
299 if (bKeep && fEta < 1 && fAdaptiveThreshold > 0.5f)
300 fAdaptiveThreshold *= fEta;
312 public List<int>
ApplyNMS(List<NormalizedBBox> rgBBoxes, List<float> rgScores,
float fThreshold,
int nTopK)
314 Dictionary<int, Dictionary<int, float>> rgOverlaps;
315 return ApplyNMS(rgBBoxes, rgScores, fThreshold, nTopK,
false, out rgOverlaps);
328 public List<int>
ApplyNMS(List<NormalizedBBox> rgBBoxes, List<float> rgScores,
float fThreshold,
int nTopK,
bool bReuseOverlaps, out Dictionary<
int, Dictionary<int, float>> rgOverlaps)
330 List<int> rgIndices =
new List<int>();
331 rgOverlaps =
new Dictionary<int, Dictionary<int, float>>();
334 m_log.
CHECK_EQ(rgBBoxes.Count, rgScores.Count,
"The number of BBoxes and scores must be the same.");
337 List<int> rgIdx =
new List<int>();
338 for (
int i = 0; i < rgScores.Count; i++)
343 List<Tuple<float, int>> rgScoresIndex = GetTopKScoreIndex(rgScores, rgIdx, nTopK);
346 while (rgScoresIndex.Count > 0)
349 int nBestIdx = rgScoresIndex[0].Item2;
351 float fSize =
Size(best_bbox);
356 rgScoresIndex.RemoveAt(0);
360 rgIndices.Add(nBestIdx);
363 rgScoresIndex.RemoveAt(0);
366 if (nTopK > -1 && rgIndices.Count >= nTopK)
372 while (nIdx < rgScoresIndex.Count)
374 Tuple<float, int> item = rgScoresIndex[nIdx];
375 int nCurIdx = item.Item2;
377 fSize =
Size(cur_bbox);
381 rgScoresIndex.RemoveAt(nIdx);
385 float fCurOverlap = 0.0f;
389 if (rgOverlaps.ContainsKey(nBestIdx) &&
390 rgOverlaps[nBestIdx].ContainsKey(nCurIdx))
392 fCurOverlap = rgOverlaps[nBestIdx][nCurIdx];
393 else if (rgOverlaps.ContainsKey(nCurIdx) &&
394 rgOverlaps[nCurIdx].ContainsKey(nBestIdx))
396 fCurOverlap = rgOverlaps[nCurIdx][nBestIdx];
402 if (!rgOverlaps.ContainsKey(nBestIdx))
403 rgOverlaps.Add(nBestIdx,
new Dictionary<int, float>());
405 if (!rgOverlaps[nBestIdx].ContainsKey(nCurIdx))
406 rgOverlaps[nBestIdx].Add(nCurIdx, fCurOverlap);
408 rgOverlaps[nBestIdx][nCurIdx] = fCurOverlap;
417 if (fCurOverlap > fThreshold)
418 rgScoresIndex.RemoveAt(nIdx);
435 public Dictionary<int, Dictionary<int, List<NormalizedBBox>>>
GetDetectionResults(
float[] rgData,
int nNumDet,
int nBackgroundLabelId)
437 Dictionary<int, Dictionary<int, List<NormalizedBBox>>> rgAllDetections =
new Dictionary<int, Dictionary<int, List<NormalizedBBox>>>();
439 for (
int i = 0; i < nNumDet; i++)
441 int nStartIdx = i * 7;
442 int nItemId = (int)rgData[nStartIdx];
446 int nLabel = (int)rgData[nStartIdx + 1];
447 m_log.
CHECK_NE(nBackgroundLabelId, nLabel,
"Found background label in the detection results.");
450 rgData[nStartIdx + 4],
451 rgData[nStartIdx + 5],
452 rgData[nStartIdx + 6],
455 rgData[nStartIdx + 2]);
458 if (!rgAllDetections.ContainsKey(nItemId))
459 rgAllDetections.Add(nItemId,
new Dictionary<
int, List<NormalizedBBox>>());
461 if (!rgAllDetections[nItemId].ContainsKey(nLabel))
462 rgAllDetections[nItemId].Add(nLabel,
new List<NormalizedBBox>());
464 rgAllDetections[nItemId][nLabel].Add(bbox);
467 return rgAllDetections;
477 public List<NormalizedBBox>
GetPrior(
float[] rgPriorData,
int nNumPriors, out List<List<float>> rgPriorVariances)
479 List<NormalizedBBox> rgPriorBboxes =
new List<NormalizedBBox>();
480 rgPriorVariances =
new List<List<float>>();
482 for (
int i = 0; i < nNumPriors; i++)
484 int nStartIdx = i * 4;
486 rgPriorData[nStartIdx + 1],
487 rgPriorData[nStartIdx + 2],
488 rgPriorData[nStartIdx + 3]);
490 rgPriorBboxes.Add(bbox);
493 for (
int i = 0; i < nNumPriors; i++)
495 int nStartIdx = (nNumPriors + i) * 4;
496 List<float> rgVariance =
new List<float>();
498 for (
int j = 0; j < 4; j++)
500 rgVariance.Add(rgPriorData[nStartIdx + j]);
503 rgPriorVariances.Add(rgVariance);
506 return rgPriorBboxes;
509 private int getLabel(
int nPredIdx,
int nNumPredsPerClass,
int nNumClasses,
int nBackgroundLabel, DictionaryMap<List<int>> rgMatchIndices, List<NormalizedBBox> rgGtBoxes)
511 int nLabel = nBackgroundLabel;
513 if (rgMatchIndices !=
null && rgMatchIndices.Count > 0 && rgGtBoxes !=
null && rgGtBoxes.Count > 0)
515 List<KeyValuePair<int, List<int>>> rgMatches = rgMatchIndices.Map.ToList();
517 foreach (KeyValuePair<
int, List<int>> match
in rgMatches)
519 List<int> rgMatchIdx = match.Value;
520 m_log.
CHECK_EQ(rgMatchIdx.Count, nNumPredsPerClass,
"The match count should equal the number of predictions per class.");
522 if (rgMatchIdx[nPredIdx] > -1)
524 int nIdx = rgMatchIdx[nPredIdx];
525 m_log.
CHECK_LT(nIdx, rgGtBoxes.Count,
"The match index should be less than the number of ground truth boxes.");
526 nLabel = rgGtBoxes[nIdx].label;
528 m_log.
CHECK_GE(nLabel, 0,
"The label must be >= 0.");
529 m_log.
CHECK_NE(nLabel, nBackgroundLabel,
"The label cannot equal the background label.");
530 m_log.
CHECK_LT(nLabel, nNumClasses,
"The label must be less than the number of classes.");
553 List<List<float>> rgrgAllConfLoss =
new List<List<float>>();
556 for (
int i = 0; i < nNum; i++)
558 List<float> rgConfLoss =
new List<float>();
560 for (
int p = 0; p < nNumPredsPerClass; p++)
562 int nStartIdx = p * nNumClasses;
564 int nLabel = nBackgroundLabelId;
571 m_log.
CHECK_GE(nLabel, 0,
"The label must be >= 0 for the SOFTMAX loss type.");
572 m_log.
CHECK_LT(nLabel, nNumClasses,
"The label must be < NumClasses for the SOFTMAX loss type.");
575 float fMaxVal = -
float.MaxValue;
576 for (
int c = 0; c < nNumClasses; c++)
578 float fVal = rgConfData[nOffset + nStartIdx + c];
579 fMaxVal = Math.Max(fMaxVal, fVal);
583 for (
int c = 0; c < nNumClasses; c++)
585 float fVal = rgConfData[nOffset + nStartIdx + c];
586 fSum += (float)Math.Exp(fVal - fMaxVal);
589 float fValAtLabel = rgConfData[nOffset + nStartIdx + nLabel];
590 float fProb = (float)Math.Exp(fValAtLabel - fMaxVal) / fSum;
591 fLoss = (float)-Math.Log(Math.Max(fProb,
float.MinValue));
598 for (
int c = 0; c < nNumClasses; c++)
600 nTarget = (c == nLabel) ? 1 : 0;
601 float fInput = rgConfData[nOffset + nStartIdx + c];
602 fLoss -= fInput * (nTarget - ((fInput >= 0) ? 1.0f : 0.0f)) - (
float)Math.Log(1 + Math.Exp(fInput - 2 * fInput * ((fInput >= 0) ? 1.0f : 0.0f)));
608 m_log.
FAIL(
"Unknown loss type '" + loss_type.ToString() +
"'!");
612 rgConfLoss.Add(fLoss);
615 rgrgAllConfLoss.Add(rgConfLoss);
616 nOffset += nNumPredsPerClass * nNumClasses;
619 return rgrgAllConfLoss;
634 public List<List<float>>
ComputeConfLoss(
float[] rgConfData,
int nNum,
int nNumPredsPerClass,
int nNumClasses,
int nBackgroundLabelId,
MultiBoxLossParameter.
ConfLossType loss_type, List<DictionaryMap<List<int>>> rgAllMatchIndices, DictionaryMap<List<NormalizedBBox>> rgAllGtBoxes)
636 List<Dictionary<int, List<int>>> rgAllMatchIndices1 =
new List<Dictionary<int, List<int>>>();
637 foreach (DictionaryMap<List<int>> item
in rgAllMatchIndices)
639 rgAllMatchIndices1.Add(item.Map);
642 return ComputeConfLoss(rgConfData, nNum, nNumPredsPerClass, nNumClasses, nBackgroundLabelId, loss_type, rgAllMatchIndices1, rgAllGtBoxes.Map);
657 public List<List<float>>
ComputeConfLoss(
float[] rgConfData,
int nNum,
int nNumPredsPerClass,
int nNumClasses,
int nBackgroundLabelId,
MultiBoxLossParameter.
ConfLossType loss_type, List<Dictionary<
int, List<int>>> rgAllMatchIndices, Dictionary<
int, List<NormalizedBBox>> rgAllGtBoxes)
659 m_log.
CHECK_LT(nBackgroundLabelId, nNumClasses,
"The background id must be less than the number of classes!");
660 List<List<float>> rgrgAllConfLoss =
new List<List<float>>();
663 for (
int i = 0; i < nNum; i++)
665 List<float> rgConfLoss =
new List<float>();
666 Dictionary<int, List<int>> rgMatchIndices = rgAllMatchIndices[i];
668 for (
int p = 0; p < nNumPredsPerClass; p++)
670 int nStartIdx = p * nNumClasses;
672 int nLabel = nBackgroundLabelId;
674 foreach (KeyValuePair<
int, List<int>> kv
in rgMatchIndices)
676 List<int> rgMatchIndex = kv.Value;
677 m_log.
CHECK_EQ(rgMatchIndex.Count, nNumPredsPerClass,
"The number of match indexes must be equal to the NumPredsPerClass!");
679 if (rgMatchIndex[p] > -1)
681 m_log.
CHECK(rgAllGtBoxes.ContainsKey(i),
"The AllGtBoxes does not have the label '" + i.ToString() +
"'!");
682 List<NormalizedBBox> rgGtBboxes = rgAllGtBoxes[i];
684 m_log.
CHECK_LT(rgMatchIndex[p], rgGtBboxes.Count,
"The match index at '" + p.ToString() +
"' must be less than the number of Gt bboxes at label " + i.ToString() +
" (" + rgGtBboxes.Count.ToString() +
")!");
686 nLabel = rgGtBboxes[rgMatchIndex[p]].label;
687 m_log.
CHECK_GE(nLabel, 0,
"The label must be >= 0.");
688 m_log.
CHECK_NE(nLabel, nBackgroundLabelId,
"The label cannot be the background label of '" + nBackgroundLabelId.ToString() +
"'!");
689 m_log.
CHECK_LT(nLabel, nNumClasses,
"The label must be < NumClasses (" + nNumClasses.ToString() +
")!");
701 m_log.
CHECK_GE(nLabel, 0,
"The label must be >= 0 for the SOFTMAX loss type.");
702 m_log.
CHECK_LT(nLabel, nNumClasses,
"The label must be < NumClasses for the SOFTMAX loss type.");
705 float fMaxVal = rgConfData[nStartIdx];
706 for (
int c = 1; c < nNumClasses; c++)
708 float fVal = rgConfData[nOffset + nStartIdx + c];
709 fMaxVal = Math.Max(fMaxVal, fVal);
713 for (
int c = 0; c < nNumClasses; c++)
715 float fVal = rgConfData[nOffset + nStartIdx + c];
716 fSum += (float)Math.Exp(fVal - fMaxVal);
719 float fValAtLabel = rgConfData[nOffset + nStartIdx + nLabel];
720 float fProb = (float)Math.Exp(fValAtLabel - fMaxVal) / fSum;
721 fLoss = (float)-Math.Log(Math.Max(fProb,
float.MinValue));
728 for (
int c = 0; c < nNumClasses; c++)
730 nTarget = (c == nLabel) ? 1 : 0;
731 float fInput = rgConfData[nOffset + nStartIdx + c];
732 fLoss -= fInput * (nTarget - ((fInput >= 0) ? 1.0f : 0.0f)) - (
float)Math.Log(1 + Math.Exp(fInput - 2 * fInput * ((fInput >= 0) ? 1.0f : 0.0f)));
738 m_log.
FAIL(
"Unknown loss type '" + loss_type.ToString() +
"'!");
742 rgConfLoss.Add(fLoss);
745 rgrgAllConfLoss.Add(rgConfLoss);
746 nOffset += nNumPredsPerClass * nNumClasses;
749 return rgrgAllConfLoss;
760 public List<Dictionary<int, List<float>>>
GetConfidenceScores(
float[] rgConfData,
int nNum,
int nNumPredsPerClass,
int nNumClasses)
762 List<Dictionary<int, List<float>>> rgConfPreds =
new List<Dictionary<int, List<float>>>();
765 for (
int i = 0; i < nNum; i++)
767 Dictionary<int, List<float>> rgLabelScores =
new Dictionary<int, List<float>>();
769 for (
int p = 0; p < nNumPredsPerClass; p++)
771 int nStartIdx = p * nNumClasses;
773 for (
int c = 0; c < nNumClasses; c++)
775 float fConf = rgConfData[nOffset + nStartIdx + c];
777 if (!rgLabelScores.ContainsKey(c))
778 rgLabelScores.Add(c,
new List<float>());
780 rgLabelScores[c].Add(fConf);
784 rgConfPreds.Add(rgLabelScores);
785 nOffset += nNumPredsPerClass * nNumClasses;
802 public List<LabelBBox>
GetLocPredictions(
float[] rgLocData,
int nNum,
int nNumPredsPerClass,
int nNumLocClasses,
bool bShareLocation)
804 List<LabelBBox> rgLocPreds =
new List<LabelBBox>();
807 m_log.
CHECK_EQ(nNumLocClasses, 1,
"When shareing locations, the nNumLocClasses must be 1.");
811 for (
int i = 0; i < nNum; i++)
815 for (
int p = 0; p < nNumPredsPerClass; p++)
817 int nStartIdx = p * nNumLocClasses * 4;
819 for (
int c = 0; c < nNumLocClasses; c++)
821 int nLabel = (bShareLocation) ? -1 : c;
822 labelBbox[nLabel].
Add(
new NormalizedBBox(rgLocData[nStartIdx + nOffset + c * 4 + 0],
823 rgLocData[nStartIdx + nOffset + c * 4 + 1],
824 rgLocData[nStartIdx + nOffset + c * 4 + 2],
825 rgLocData[nStartIdx + nOffset + c * 4 + 3]));
829 nOffset += nNumPredsPerClass * nNumLocClasses * 4;
830 rgLocPreds.Add(labelBbox);
844 public DictionaryMap<List<NormalizedBBox>>
GetGroundTruth(
float[] rgGtData,
int nNumGt,
int nBackgroundLabelId,
bool bUseDifficultGt)
846 DictionaryMap<List<NormalizedBBox>> rgAllGt =
new DictionaryMap<List<NormalizedBBox>>(
null);
848 for (
int i = 0; i < nNumGt; i++)
850 int nStartIdx = i * 8;
851 int nItemId = (int)rgGtData[nStartIdx];
855 int nLabel = (int)rgGtData[nStartIdx + 1];
856 m_log.
CHECK_NE(nBackgroundLabelId, nLabel,
"Found the background label in the dataset!");
858 bool bDifficult = (rgGtData[nStartIdx + 7] == 0) ?
false :
true;
860 if (!bUseDifficultGt && bDifficult)
864 rgGtData[nStartIdx + 4],
865 rgGtData[nStartIdx + 5],
866 rgGtData[nStartIdx + 6],
871 if (rgAllGt[nItemId] ==
null)
872 rgAllGt[nItemId] =
new List<NormalizedBBox>();
874 rgAllGt[nItemId].Add(bbox);
888 public Dictionary<int, LabelBBox>
GetGroundTruthEx(
float[] rgGtData,
int nNumGt,
int nBackgroundLabelId,
bool bUseDifficultGt)
890 Dictionary<int, LabelBBox> rgAllGtBboxes =
new Dictionary<int, LabelBBox>();
892 for (
int i = 0; i < nNumGt; i++)
894 int nStartIdx = i * 8;
895 int nItemId = (int)rgGtData[nStartIdx];
899 int nLabel = (int)rgGtData[nStartIdx + 1];
900 m_log.
CHECK_NE(nBackgroundLabelId, nLabel,
"Found the background label in the dataset!");
902 bool bDifficult = (rgGtData[nStartIdx + 7] == 0) ?
false :
true;
904 if (!bUseDifficultGt && bDifficult)
908 rgGtData[nStartIdx + 4],
909 rgGtData[nStartIdx + 5],
910 rgGtData[nStartIdx + 6],
915 if (!rgAllGtBboxes.ContainsKey(nItemId))
916 rgAllGtBboxes.Add(nItemId,
new LabelBBox());
918 rgAllGtBboxes[nItemId].Add(nLabel, bbox);
921 return rgAllGtBboxes;
935 public void Match(List<NormalizedBBox> rgGtBboxes, List<NormalizedBBox> rgPredBboxes,
int nLabel,
MultiBoxLossParameter.
MatchType match_type,
float fOverlapThreshold,
bool bIgnoreCrossBoundaryBbox, out List<int> rgMatchIndices, out List<float> rgMatchOverlaps)
937 int nNumPred = rgPredBboxes.Count;
942 List<int> rgGtIndices =
new List<int>();
947 nNumGt = rgGtBboxes.Count;
948 for (
int i = 0; i < nNumGt; i++)
957 for (
int i = 0; i < rgGtBboxes.Count; i++)
959 if (rgGtBboxes[i].label == nLabel)
971 Dictionary<int, Dictionary<int, float>> rgOverlaps =
new Dictionary<int, Dictionary<int, float>>();
972 for (
int i = 0; i < nNumPred; i++)
974 rgOverlaps.Add(i,
new Dictionary<int, float>());
978 rgMatchIndices.Add(-2);
982 for (
int j = 0; j < nNumGt; j++)
984 int nGtIdx = rgGtIndices[j];
985 float fOverlap =
JaccardOverlap(rgPredBboxes[i], rgGtBboxes[nGtIdx]);
986 if (fOverlap > 1e-6f)
988 rgMatchOverlaps[i] = Math.Max(rgMatchOverlaps[i], fOverlap);
989 rgOverlaps[i].Add(j, fOverlap);
995 List<int> rgGtPool =
new List<int>();
996 for (
int i = 0; i < nNumGt; i++)
1002 while (rgGtPool.Count > 0)
1006 float fMaxOverlap = -1;
1008 foreach (KeyValuePair<
int, Dictionary<int, float>> kv
in rgOverlaps)
1013 if (rgMatchIndices[i] != -1)
1016 for (
int p = 0; p < rgGtPool.Count; p++)
1018 int j = rgGtPool[p];
1021 if (!kv.Value.ContainsKey(j))
1025 if (kv.Value[j] > fMaxOverlap)
1031 fMaxOverlap = kv.Value[j];
1043 m_log.
CHECK_EQ(rgMatchIndices[nMaxIdx], -1,
"The match index at index=" + nMaxIdx.ToString() +
" should be -1.");
1044 rgMatchIndices[nMaxIdx] = rgGtIndices[nMaxGtIdx];
1045 rgMatchOverlaps[nMaxIdx] = fMaxOverlap;
1048 rgGtPool.Remove(nMaxGtIdx);
1061 foreach (KeyValuePair<
int, Dictionary<int, float>> kv
in rgOverlaps)
1066 if (rgMatchIndices[i] != -1)
1070 float fMaxOverlap = -1;
1072 for (
int j = 0; j < nNumGt; j++)
1075 if (!kv.Value.ContainsKey(j))
1079 float fOverlap = kv.Value[j];
1083 if (fOverlap >= fOverlapThreshold && fOverlap > fMaxOverlap)
1086 fMaxOverlap = fOverlap;
1091 if (nMaxGtIdx != -1)
1093 m_log.
CHECK_EQ(rgMatchIndices[i], -1,
"The match index at index=" + i.ToString() +
" should be -1.");
1094 rgMatchIndices[i] = rgGtIndices[nMaxGtIdx];
1095 rgMatchOverlaps[i] = fMaxOverlap;
1101 m_log.
FAIL(
"Unknown matching type '" + match_type.ToString() +
"'!");
1113 if (bbox.
xmin < 0 || bbox.
xmin > 1)
1116 if (bbox.
ymin < 0 || bbox.
ymin > 1)
1119 if (bbox.
xmax < 0 || bbox.
xmax > 1)
1122 if (bbox.
ymax < 0 || bbox.
ymax > 1)
1142 public List<LabelBBox>
DecodeAll(List<LabelBBox> rgAllLocPreds, List<NormalizedBBox> rgPriorBboxes, List<List<float>> rgrgfPrioVariances,
int nNum,
bool bShareLocation,
int nNumLocClasses,
int nBackgroundLabelId,
PriorBoxParameter.
CodeType codeType,
bool bVarianceEncodedInTarget,
bool bClip)
1144 List<LabelBBox> rgAllDecodedBboxes =
new List<LabelBBox>();
1146 m_log.
CHECK_EQ(rgAllLocPreds.Count, nNum,
"The number of Loc Preds does not equal the expected Num!");
1148 for (
int i = 0; i < nNum; i++)
1153 for (
int c = 0; c < nNumLocClasses; c++)
1155 int nLabel = (bShareLocation) ? -1 : c;
1158 if (nLabel == nBackgroundLabelId)
1162 if (!rgAllLocPreds[i].Contains(nLabel))
1163 m_log.
FAIL(
"Could not find the location predictions for label '" + nLabel.ToString() +
"'!");
1165 List<NormalizedBBox> rgLabelLocPreds = rgAllLocPreds[i][nLabel];
1166 decode_bboxes[nLabel] =
Decode(rgPriorBboxes, rgrgfPrioVariances, codeType, bVarianceEncodedInTarget, bClip, rgLabelLocPreds);
1169 rgAllDecodedBboxes.Add(decode_bboxes);
1172 return rgAllDecodedBboxes;
1185 public List<NormalizedBBox>
Decode(List<NormalizedBBox> rgPriorBbox, List<List<float>> rgrgfPriorVariance,
PriorBoxParameter.
CodeType code_type,
bool bEncodeVarianceInTarget,
bool bClip, List<NormalizedBBox> rgBbox)
1187 m_log.
CHECK_EQ(rgPriorBbox.Count, rgrgfPriorVariance.Count,
"The number of prior boxes must match the number of variance lists.");
1188 m_log.
CHECK_EQ(rgPriorBbox.Count, rgBbox.Count,
"The number of prior boxes must match the number of boxes.");
1189 int nNumBoxes = rgPriorBbox.Count;
1192 m_log.
CHECK_EQ(rgrgfPriorVariance[0].Count, 4,
"The variance lists must have 4 items.");
1194 List<NormalizedBBox> rgDecodeBoxes =
new List<NormalizedBBox>();
1196 for (
int i = 0; i < nNumBoxes; i++)
1198 NormalizedBBox decode_box =
Decode(rgPriorBbox[i], rgrgfPriorVariance[i], code_type, bEncodeVarianceInTarget, bClip, rgBbox[i]);
1199 rgDecodeBoxes.Add(decode_box);
1202 return rgDecodeBoxes;
1222 if (bEncodeVarianceInTarget)
1233 m_log.
CHECK_EQ(rgfPriorVariance.Count, 4,
"The variance must have 4 values!");
1234 foreach (
float fVar
in rgfPriorVariance)
1236 m_log.
CHECK_GT(fVar, 0,
"Each variance must be greater than 0.");
1240 prior_bbox.
ymin + rgfPriorVariance[1] * bbox.
ymin,
1241 prior_bbox.
xmax + rgfPriorVariance[2] * bbox.
xmax,
1242 prior_bbox.
ymax + rgfPriorVariance[3] * bbox.
ymax);
1248 float fPriorWidth = prior_bbox.
xmax - prior_bbox.
xmin;
1249 m_log.
CHECK_GT(fPriorWidth, 0,
"The prior width must be greater than zero.");
1250 float fPriorHeight = prior_bbox.
ymax - prior_bbox.
ymin;
1251 m_log.
CHECK_GT(fPriorHeight, 0,
"The prior height must be greater than zero.");
1252 float fPriorCenterX = (prior_bbox.
xmin + prior_bbox.
xmax) / 2;
1253 float fPriorCenterY = (prior_bbox.
ymin + prior_bbox.
ymax) / 2;
1255 float fDecodeBboxCenterX;
1256 float fDecodeBboxCenterY;
1257 float fDecodeBboxWidth;
1258 float fDecodeBboxHeight;
1260 if (bEncodeVarianceInTarget)
1263 fDecodeBboxCenterX = bbox.
xmin * fPriorWidth + fPriorCenterX;
1264 fDecodeBboxCenterY = bbox.
ymin * fPriorHeight + fPriorCenterY;
1265 fDecodeBboxWidth = (float)Math.Exp(bbox.
xmax) * fPriorWidth;
1266 fDecodeBboxHeight = (float)Math.Exp(bbox.
ymax) * fPriorHeight;
1271 fDecodeBboxCenterX = rgfPriorVariance[0] * bbox.
xmin * fPriorWidth + fPriorCenterX;
1272 fDecodeBboxCenterY = rgfPriorVariance[1] * bbox.
ymin * fPriorHeight + fPriorCenterY;
1273 fDecodeBboxWidth = (float)Math.Exp(rgfPriorVariance[2] * bbox.
xmax) * fPriorWidth;
1274 fDecodeBboxHeight = (float)Math.Exp(rgfPriorVariance[3] * bbox.
ymax) * fPriorHeight;
1277 decode_bbox =
new NormalizedBBox(fDecodeBboxCenterX - fDecodeBboxWidth / 2,
1278 fDecodeBboxCenterY - fDecodeBboxHeight / 2,
1279 fDecodeBboxCenterX + fDecodeBboxWidth / 2,
1280 fDecodeBboxCenterY + fDecodeBboxHeight / 2);
1286 float fPriorWidth = prior_bbox.
xmax - prior_bbox.
xmin;
1287 m_log.
CHECK_GT(fPriorWidth, 0,
"The prior width must be greater than zero.");
1288 float fPriorHeight = prior_bbox.
ymax - prior_bbox.
ymin;
1289 m_log.
CHECK_GT(fPriorHeight, 0,
"The prior height must be greater than zero.");
1291 if (bEncodeVarianceInTarget)
1295 prior_bbox.
ymin + bbox.
ymin * fPriorHeight,
1296 prior_bbox.
xmax + bbox.
xmax * fPriorWidth,
1297 prior_bbox.
ymax + bbox.
ymax * fPriorHeight);
1302 m_log.
CHECK_EQ(rgfPriorVariance.Count, 4,
"The variance must have 4 values!");
1303 foreach (
float fVar
in rgfPriorVariance)
1305 m_log.
CHECK_GT(fVar, 0,
"Each variance must be greater than 0.");
1309 prior_bbox.
ymin + rgfPriorVariance[1] * bbox.
ymin * fPriorHeight,
1310 prior_bbox.
xmax + rgfPriorVariance[2] * bbox.
xmax * fPriorWidth,
1311 prior_bbox.
ymax + rgfPriorVariance[3] * bbox.
ymax * fPriorHeight);
1317 m_log.
FAIL(
"Unknown code type '" + code_type.ToString());
1321 decode_bbox.
size =
Size(decode_bbox);
1323 decode_bbox =
Clip(decode_bbox);
1344 if (bEncodeVarianceInTarget)
1354 m_log.
CHECK_EQ(rgfPriorVariance.Count, 4,
"The variance must have 4 values!");
1355 foreach (
float fVar
in rgfPriorVariance)
1357 m_log.
CHECK_GT(fVar, 0,
"Each variance must be greater than 0.");
1361 (bbox.
ymin - prior_bbox.
ymin) / rgfPriorVariance[1],
1362 (bbox.
xmax - prior_bbox.
xmax) / rgfPriorVariance[2],
1363 (bbox.
ymax - prior_bbox.
ymax) / rgfPriorVariance[3]);
1369 float fPriorWidth = prior_bbox.
xmax - prior_bbox.
xmin;
1370 m_log.
CHECK_GT(fPriorWidth, 0,
"The prior width must be greater than zero.");
1371 float fPriorHeight = prior_bbox.
ymax - prior_bbox.
ymin;
1372 m_log.
CHECK_GT(fPriorHeight, 0,
"The prior height must be greater than zero.");
1373 float fPriorCenterX = (prior_bbox.
xmin + prior_bbox.
xmax) / 2;
1374 float fPriorCenterY = (prior_bbox.
ymin + prior_bbox.
ymax) / 2;
1376 float fBboxWidth = bbox.
xmax - bbox.
xmin;
1377 m_log.
CHECK_GT(fBboxWidth, 0,
"The bbox width must be greater than zero.");
1378 float fBboxHeight = bbox.
ymax - bbox.
ymin;
1379 m_log.
CHECK_GT(fBboxHeight, 0,
"The bbox height must be greater than zero.");
1380 float fBboxCenterX = (bbox.
xmin + bbox.
xmax) / 2;
1381 float fBboxCenterY = (bbox.
ymin + bbox.
ymax) / 2;
1383 if (bEncodeVarianceInTarget)
1385 encode_bbox =
new NormalizedBBox((fBboxCenterX - fPriorCenterX) / fPriorWidth,
1386 (fBboxCenterY - fPriorCenterY) / fPriorHeight,
1387 (
float)Math.Log(fBboxWidth / fPriorWidth),
1388 (
float)Math.Log(fBboxHeight / fPriorHeight));
1393 m_log.
CHECK_EQ(rgfPriorVariance.Count, 4,
"The variance must have 4 values!");
1394 foreach (
float fVar
in rgfPriorVariance)
1396 m_log.
CHECK_GT(fVar, 0,
"Each variance must be greater than 0.");
1399 encode_bbox =
new NormalizedBBox((fBboxCenterX - fPriorCenterX) / fPriorWidth / rgfPriorVariance[0],
1400 (fBboxCenterY - fPriorCenterY) / fPriorHeight / rgfPriorVariance[1],
1401 (
float)Math.Log(fBboxWidth / fPriorWidth) / rgfPriorVariance[2],
1402 (
float)Math.Log(fBboxHeight / fPriorHeight) / rgfPriorVariance[3]);
1409 float fPriorWidth = prior_bbox.
xmax - prior_bbox.
xmin;
1410 m_log.
CHECK_GT(fPriorWidth, 0,
"The prior width must be greater than zero.");
1411 float fPriorHeight = prior_bbox.
ymax - prior_bbox.
ymin;
1412 m_log.
CHECK_GT(fPriorHeight, 0,
"The prior height must be greater than zero.");
1413 float fPriorCenterX = (prior_bbox.
xmin + prior_bbox.
xmax) / 2;
1414 float fPriorCenterY = (prior_bbox.
ymin + prior_bbox.
ymax) / 2;
1416 if (bEncodeVarianceInTarget)
1419 (bbox.
ymin - prior_bbox.
ymin) / fPriorHeight,
1420 (bbox.
xmax - prior_bbox.
xmax) / fPriorWidth,
1421 (bbox.
ymax - prior_bbox.
ymax) / fPriorHeight);
1426 m_log.
CHECK_EQ(rgfPriorVariance.Count, 4,
"The variance must have 4 values!");
1427 foreach (
float fVar
in rgfPriorVariance)
1429 m_log.
CHECK_GT(fVar, 0,
"Each variance must be greater than 0.");
1433 (bbox.
ymin - prior_bbox.
ymin) / fPriorHeight / rgfPriorVariance[1],
1434 (bbox.
xmax - prior_bbox.
xmax) / fPriorWidth / rgfPriorVariance[2],
1435 (bbox.
ymax - prior_bbox.
ymax) / fPriorHeight / rgfPriorVariance[3]);
1441 m_log.
FAIL(
"Unknown code type '" + code_type.ToString());
1459 float fXCenter = (bbox.
xmin + bbox.
xmax) / 2;
1460 float fYCenter = (bbox.
ymin + bbox.
ymax) / 2;
1462 if ((fXCenter >= src_bbox.
xmin && fXCenter <= src_bbox.
xmax) &&
1463 (fYCenter >= src_bbox.
ymin && fYCenter <= src_bbox.
ymax))
1470 float fBboxCoverage =
Coverage(bbox, src_bbox);
1478 m_log.
FAIL(
"Unknown emit type!");
1492 float fIntersectSize =
Size(intersectBBox);
1494 if (fIntersectSize > 0)
1496 float fBbox1Size =
Size(bbox1);
1497 return fBbox1Size / fIntersectSize;
1511 float fSrcWidth = srcBbox.
xmax - srcBbox.
xmin;
1512 float fSrcHeight = srcBbox.
ymax - srcBbox.
ymin;
1515 srcBbox.
ymin + bbox.
ymin * fSrcHeight,
1516 srcBbox.
xmax + bbox.
xmax * fSrcWidth,
1530 float fIntersectWidth = intersect_bbox.
xmax - intersect_bbox.
xmin;
1531 float fIntersectHeight = intersect_bbox.
ymax - intersect_bbox.
ymin;
1535 fIntersectWidth += 1;
1536 fIntersectHeight += 1;
1539 if (fIntersectWidth > 0 && fIntersectHeight > 0)
1541 float fIntersectSize = fIntersectWidth * fIntersectHeight;
1542 float fBbox1Size =
Size(bbox1);
1543 float fBbox2Size =
Size(bbox2);
1544 return fIntersectSize / (fBbox1Size + fBbox2Size - fIntersectSize);
1559 int height = (int)szImg.Height;
1560 int width = (
int)szImg.Width;
1565 float fResizeHeight = p.
height;
1566 float fResizeWidth = p.
width;
1567 float fResizeAspect = fResizeWidth / fResizeHeight;
1570 float fAspect = (float)width / (
float)height;
1576 temp_bbox =
Clip(temp_bbox);
1577 return Scale(temp_bbox, height, width);
1585 if (fAspect > fResizeAspect)
1587 fPadding = (fResizeHeight - fResizeWidth / fAspect) / 2;
1588 fymin = fPadding / fResizeHeight;
1589 fymax = 1.0f - fPadding / fResizeHeight;
1593 fPadding = (fResizeWidth - fResizeHeight * fAspect) / 2;
1594 fxmin = fPadding / fResizeWidth;
1595 fxmax = 1.0f - fPadding / fResizeWidth;
1599 temp_bbox =
Clip(temp_bbox);
1600 return Scale(temp_bbox, height, width);
1603 if (nHeightScale == 0 || nWidthScale == 0)
1605 temp_bbox =
Clip(temp_bbox);
1606 return Scale(temp_bbox, height, width);
1610 temp_bbox =
Scale(temp_bbox, nHeightScale, nWidthScale);
1611 return Clip(temp_bbox, height, width);
1615 m_log.
FAIL(
"Unknown resize mode '" + p.
resize_mode.ToString() +
"'!");
1622 temp_bbox =
Clip(temp_bbox);
1624 return Scale(temp_bbox, height, width);
1637 proj_bbox = bbox.
Clone();
1643 float src_width = src.
xmax - src.
xmin;
1644 float src_height = src.
ymax - src.
ymin;
1646 (bbox.
ymin - src.
ymin) / src_height,
1647 (bbox.
xmax - src.
xmin) / src_width,
1648 (bbox.
ymax - src.
ymin) / src_height,
1650 proj_bbox =
Clip(proj_bbox);
1652 float fSize =
Size(proj_bbox);
1675 float fOrigAspect = (float)nWidth / (
float)nHeight;
1676 float fResizeHeight = param.
height;
1677 float fResizeWidth = param.
width;
1678 float fResizeAspect = fResizeWidth / fResizeHeight;
1680 if (fOrigAspect < fResizeAspect)
1681 fResizeHeight = fResizeWidth / fOrigAspect;
1683 fResizeWidth = fResizeHeight * fOrigAspect;
1685 float fCropHeight = fResizeHeight * (crop_bbox.
ymax - crop_bbox.
ymin);
1686 float fCropWidth = fResizeWidth * (crop_bbox.
xmax - crop_bbox.
xmin);
1687 m_log.
CHECK_GE(fCropWidth, fWidthScale,
"The crop width must be >= the width scale!");
1688 m_log.
CHECK_GE(fCropHeight, fHeightScale,
"The crop height must be >= the height scale!");
1690 bbox.
Set(bbox.
xmin * fCropWidth / fWidthScale,
1691 bbox.
xmax * fCropWidth / fWidthScale,
1692 bbox.
ymin * fCropHeight / fHeightScale,
1693 bbox.
ymax * fCropHeight / fHeightScale);
1726 clipped.
xmin = Math.Max(Math.Min(bbox.
xmin, fWidth), 0.0f);
1727 clipped.
ymin = Math.Max(Math.Min(bbox.
ymin, fHeight), 0.0f);
1728 clipped.
xmax = Math.Max(Math.Min(bbox.
xmax, fWidth), 0.0f);
1729 clipped.
ymax = Math.Max(Math.Min(bbox.
ymax, fHeight), 0.0f);
1748 bool bNormalized = !(nWidth > 1 || nHeight > 1);
1749 scaled.
size =
Size(scaled, bNormalized);
1767 float fWidth = bbox.
xmax - bbox.
xmin;
1768 float fHeight = bbox.
ymax - bbox.
ymin;
1771 return fWidth * fHeight;
1773 return (fWidth + 1) * (fHeight + 1);
1786 public void FindMatches(List<LabelBBox> rgAllLocPreds, DictionaryMap<List<NormalizedBBox>> rgAllGtBboxes, List<NormalizedBBox> rgPriorBboxes, List<List<float>> rgrgPriorVariances,
MultiBoxLossParameter p, out List<DictionaryMap<List<float>>> rgAllMatchOverlaps, out List<DictionaryMap<List<int>>> rgAllMatchIndices)
1788 rgAllMatchOverlaps =
new List<DictionaryMap<List<float>>>();
1789 rgAllMatchIndices =
new List<DictionaryMap<List<int>>>();
1792 m_log.
CHECK_GE(nNumClasses, 1,
"The num_classes should not be less than 1.");
1795 int nLocClasses = (bShareLocation) ? 1 : nNumClasses;
1805 int nNum = rgAllLocPreds.Count;
1806 for (
int i = 0; i < nNum; i++)
1808 DictionaryMap<List<int>> rgMatchIndices =
new DictionaryMap<List<int>>(
null);
1809 DictionaryMap<List<float>> rgMatchOverlaps =
new DictionaryMap<List<float>>(
null);
1812 if (!rgAllGtBboxes.Map.ContainsKey(i))
1815 rgAllMatchIndices.Add(rgMatchIndices);
1816 rgAllMatchOverlaps.Add(rgMatchOverlaps);
1821 List<NormalizedBBox> rgGtBboxes = rgAllGtBboxes[i];
1822 if (!bUsePriorForMatching)
1824 for (
int c = 0; c < nLocClasses; c++)
1826 int nLabel = (bShareLocation) ? -1 : c;
1829 if (!bShareLocation && nLabel == nBackgroundLabelId)
1833 bool bClipBbox =
false;
1834 List<NormalizedBBox> rgLocBBoxes =
Decode(rgPriorBboxes, rgrgPriorVariances, codeType, bEncodeVarianceInTarget, bClipBbox, rgAllLocPreds[i][nLabel]);
1836 List<int> rgMatchIndices1;
1837 List<float> rgMatchOverlaps1;
1838 Match(rgGtBboxes, rgLocBBoxes, nLabel, matchType, fOverlapThreshold, bIgnoreCrossBoundaryBbox, out rgMatchIndices1, out rgMatchOverlaps1);
1840 rgMatchIndices[nLabel] = rgMatchIndices1;
1841 rgMatchOverlaps[nLabel] = rgMatchOverlaps1;
1847 List<int> rgTempMatchIndices =
new List<int>();
1848 List<float> rgTempMatchOverlaps =
new List<float>();
1851 Match(rgGtBboxes, rgPriorBboxes, nLabel, matchType, fOverlapThreshold, bIgnoreCrossBoundaryBbox, out rgTempMatchIndices, out rgTempMatchOverlaps);
1855 rgMatchIndices[nLabel] = rgTempMatchIndices;
1856 rgMatchOverlaps[nLabel] = rgTempMatchOverlaps;
1861 List<int> rgGtLabels =
new List<int>();
1862 for (
int g = 0; g < rgGtBboxes.Count; g++)
1864 rgGtLabels.Add(rgGtBboxes[g].label);
1868 for (
int c = 0; c < nLocClasses; c++)
1871 if (c == nBackgroundLabelId)
1874 rgMatchIndices[c] = rgTempMatchIndices;
1875 rgMatchOverlaps[c] = rgTempMatchOverlaps;
1877 for (
int m = 0; m < rgTempMatchIndices.Count; m++)
1879 if (rgTempMatchIndices[m] > -1)
1881 int nGtIdx = rgTempMatchIndices[m];
1882 m_log.
CHECK_LT(nGtIdx, rgGtLabels.Count,
"The gt index is larger than the number of gt labels.");
1883 if (c == rgGtLabels[nGtIdx])
1884 rgMatchIndices[c][m] = nGtIdx;
1891 rgAllMatchIndices.Add(rgMatchIndices);
1892 rgAllMatchOverlaps.Add(rgMatchOverlaps);
1904 int nNumMatches = 0;
1906 for (
int i = 0; i < nNum; i++)
1908 Dictionary<int, List<int>> rgMatchIndices = rgAllMatchIndices[i].Map;
1910 foreach (KeyValuePair<
int, List<int>> kv
in rgMatchIndices)
1912 List<int> rgMatchIndex = kv.Value;
1914 for (
int m = 0; m < rgMatchIndex.Count; m++)
1916 if (rgMatchIndex[m] > -1)
1937 if (nMatchIdx == -1 && fMatchOverlap < fNegOverlap)
1966 public int MineHardExamples(
Blob<T> blobConf, List<LabelBBox> rgAllLocPreds, DictionaryMap<List<NormalizedBBox>> rgAllGtBBoxes, List<NormalizedBBox> rgPriorBboxes, List<List<float>> rgrgPriorVariances, List<DictionaryMap<List<float>>> rgAllMatchOverlaps,
MultiBoxLossParameter p, List<DictionaryMap<List<int>>> rgAllMatchIndices, List<List<int>> rgAllNegIndices, out
int nNumNegs)
1968 int nNum = rgAllLocPreds.Count;
1973 if (nNumMatches == 0)
1976 int nNumPriors = rgPriorBboxes.Count;
1977 m_log.
CHECK_EQ(nNumPriors, rgrgPriorVariances.Count,
"The number of priors must be the same as the number of prior variances.");
1981 m_log.
CHECK_GE(nNumClasses, 1,
"num_classes should be at least 1.");
1996 float fNmsThreshold = 0;
2009 List<List<float>> rgAllConfLoss =
ComputeConfLoss(rgConfData, nNum, nNumPriors, nNumClasses, nBackgroundLabelId, confLossType, rgAllMatchIndices, rgAllGtBBoxes);
2010 List<List<float>> rgAllLocLoss =
new List<List<float>>();
2019 rgLocShape[1] = nNumMatches * 4;
2020 blobLocPred.
Reshape(rgLocShape);
2021 blobLocGt.
Reshape(rgLocShape);
2022 EncodeLocPrediction(rgAllLocPreds, rgAllGtBBoxes, rgAllMatchIndices, rgPriorBboxes, rgrgPriorVariances, p, blobLocPred, blobLocGt);
2024 rgAllLocLoss =
ComputeLocLoss(blobLocPred, blobLocGt, rgAllMatchIndices, nNum, nNumPriors, locLossType);
2029 for (
int i = 0; i < nNum; i++)
2031 List<float> rgLocLoss =
Utility.
Create<
float>(nNumPriors, 0.0f);
2032 rgAllLocLoss.Add(rgLocLoss);
2036 Stopwatch sw =
new Stopwatch();
2039 for (
int i = 0; i < nNum; i++)
2041 DictionaryMap<List<int>> rgMatchIndices = rgAllMatchIndices[i];
2042 DictionaryMap<List<float>> rgMatchOverlaps = rgAllMatchOverlaps[i];
2045 List<float> rgConfLoss = rgAllConfLoss[i];
2046 List<float> rgLocLoss = rgAllLocLoss[i];
2047 List<float> rgLoss =
new List<float>();
2049 for (
int j = 0; j < rgConfLoss.Count; j++)
2051 rgLoss.Add(rgConfLoss[j] + rgLocLoss[j]);
2055 List<int> rgSelIndices =
new List<int>();
2056 List<int> rgNegIndices =
new List<int>();
2058 foreach (KeyValuePair<
int, List<int>> kv
in rgMatchIndices.Map)
2060 int nLabel = kv.Key;
2064 List<KeyValuePair<float, int>> rgLossIndices =
new List<KeyValuePair<float, int>>();
2066 for (
int m = 0; m < rgMatchIndices[nLabel].Count; m++)
2068 if (
IsEligibleMining(miningType, rgMatchIndices[nLabel][m], rgMatchOverlaps[nLabel][m], fNegOverlap))
2070 rgLossIndices.Add(
new KeyValuePair<float, int>(rgLoss[m], m));
2079 for (
int m = 0; m < rgMatchIndices[nLabel].Count; m++)
2081 if (rgMatchIndices[nLabel][m] > -1)
2085 nNumSel = Math.Min((
int)(nNumPos * fNegPosRatio), nNumSel);
2089 m_log.
CHECK_GT(nSampleSize, 0,
"The sample size must be greater than 0 for HARD_EXAMPLE mining.");
2090 nNumSel = Math.Min(nSampleSize, nNumSel);
2097 List<float> rgSelLoss =
new List<float>();
2098 List<NormalizedBBox> rgSelBoxes =
new List<NormalizedBBox>();
2100 if (bUsePriorForNms)
2102 for (
int m = 0; m < rgMatchIndices[nLabel].Count; m++)
2104 if (
IsEligibleMining(miningType, rgMatchIndices[nLabel][m], rgMatchOverlaps[nLabel][m], fNegOverlap))
2106 rgSelLoss.Add(rgLoss[m]);
2107 rgSelBoxes.Add(rgPriorBboxes[m]);
2114 bool bClipBbox =
false;
2115 List<NormalizedBBox> rgLocBBoxes =
Decode(rgPriorBboxes, rgrgPriorVariances, codeType, bEncodeVarianceInTarget, bClipBbox, rgAllLocPreds[i][nLabel]);
2117 for (
int m = 0; m < rgMatchIndices[nLabel].Count; m++)
2119 if (
IsEligibleMining(miningType, rgMatchIndices[nLabel][m], rgMatchOverlaps[nLabel][m], fNegOverlap))
2121 rgSelLoss.Add(rgLoss[m]);
2122 rgSelBoxes.Add(rgLocBBoxes[m]);
2128 List<int> rgNmsIndices =
ApplyNMS(rgSelBoxes, rgSelLoss, fNmsThreshold, nTopK);
2129 if (rgNmsIndices.Count < nNumSel)
2130 m_log.
WriteLine(
"WARNING: Not enough samples after NMS: " + rgNmsIndices.Count.ToString());
2133 nNumSel = Math.Min(rgNmsIndices.Count, nNumSel);
2134 for (
int n = 0; n < nNumSel; n++)
2136 rgSelIndices.Insert(0, rgLossIndices[rgNmsIndices[n]].Value);
2142 rgLossIndices = rgLossIndices.OrderByDescending(p1 => p1.Key).ToList();
2143 for (
int n = 0; n < nNumSel; n++)
2145 rgSelIndices.Insert(0, rgLossIndices[n].Value);
2150 for (
int m = 0; m < rgMatchIndices[nLabel].Count; m++)
2152 if (rgMatchIndices[nLabel][m] > -1)
2156 rgMatchIndices[nLabel][m] = -1;
2160 else if (rgMatchIndices[nLabel][m] == -1)
2162 if (rgSelIndices.Contains(m))
2164 rgNegIndices.Add(m);
2171 rgAllNegIndices.Add(rgNegIndices);
2173 if (sw.Elapsed.TotalMilliseconds > 1000)
2175 double dfPct = (double)(i+1) / nNum;
2176 m_log.
WriteLine(
"Mining at " + dfPct.ToString(
"P") +
", " + (i+1).ToString(
"N0") +
" of " + nNum.ToString(
"N0") +
"...");
2195 public void EncodeLocPrediction(List<LabelBBox> rgAllLocPreds, DictionaryMap<List<NormalizedBBox>> rgAllGtBboxes, List<DictionaryMap<List<int>>> rgAllMatchIndices, List<NormalizedBBox> rgPriorBboxes, List<List<float>> rgrgPriorVariances,
MultiBoxLossParameter p,
Blob<T> blobLocPred,
Blob<T> blobLocGt)
2197 int nLocPredData = blobLocPred.
count();
2198 float[] rgLocPredData =
new float[nLocPredData];
2199 int nLocGtData = blobLocGt.
count();
2200 float[] rgLocGtData =
new float[nLocGtData];
2202 int nNum = rgAllLocPreds.Count;
2210 for (
int i = 0; i < nNum; i++)
2212 foreach (KeyValuePair<
int, List<int>> kv
in rgAllMatchIndices[i].Map)
2214 int nLabel = kv.Key;
2215 List<int> rgMatchIndex = kv.Value;
2217 m_log.
CHECK(rgAllLocPreds[i].Contains(nLabel),
"The all local pred must contain the label '" + nLabel.ToString() +
"'!");
2218 List<NormalizedBBox> rgLocPred = rgAllLocPreds[i][nLabel];
2220 for (
int j = 0; j < rgMatchIndex.Count; j++)
2222 if (rgMatchIndex[j] <= -1)
2226 int nGtIdx = rgMatchIndex[j];
2227 m_log.
CHECK(rgAllGtBboxes.Map.ContainsKey(i),
"All gt bboxes should contain '" + i.ToString() +
"'!");
2228 m_log.
CHECK_LT(nGtIdx, rgAllGtBboxes[i].Count,
"The ground truth index should be less than the number of ground truths at '" + i.ToString() +
"'!");
2230 m_log.
CHECK_LT(j, rgPriorBboxes.Count,
"The prior bbox count is too small!");
2231 NormalizedBBox gtEncode =
Encode(rgPriorBboxes[j], rgrgPriorVariances[j], codeType, bEncodeVarianceInTarget, gtBbox);
2233 rgLocGtData[nCount * 4 + 0] = gtEncode.
xmin;
2234 rgLocGtData[nCount * 4 + 1] = gtEncode.
ymin;
2235 rgLocGtData[nCount * 4 + 2] = gtEncode.
xmax;
2236 rgLocGtData[nCount * 4 + 3] = gtEncode.
ymax;
2239 m_log.
CHECK_LT(j, rgLocPred.Count,
"The loc pred count is too small!");
2245 if (!bUsePriorForMatching)
2247 bool bClipBbox =
false;
2248 matchBbox =
Decode(rgPriorBboxes[j], rgrgPriorVariances[j], codeType, bEncodeVarianceInTarget, bClipBbox, rgLocPred[j]);
2253 rgLocPredData[nCount * 4 + 0] = (matchBbox.
xmin < 0 || matchBbox.
xmin > 1) ? gtEncode.
xmin : rgLocPred[j].xmin;
2254 rgLocPredData[nCount * 4 + 1] = (matchBbox.
ymin < 0 || matchBbox.
ymin > 1) ? gtEncode.
ymin : rgLocPred[j].ymin;
2255 rgLocPredData[nCount * 4 + 2] = (matchBbox.
xmax < 0 || matchBbox.
xmax > 1) ? gtEncode.
xmax : rgLocPred[j].xmax;
2256 rgLocPredData[nCount * 4 + 3] = (matchBbox.
ymax < 0 || matchBbox.
ymax > 1) ? gtEncode.
ymax : rgLocPred[j].ymax;
2260 rgLocPredData[nCount * 4 + 0] = rgLocPred[j].xmin;
2261 rgLocPredData[nCount * 4 + 1] = rgLocPred[j].ymin;
2262 rgLocPredData[nCount * 4 + 2] = rgLocPred[j].xmax;
2263 rgLocPredData[nCount * 4 + 3] = rgLocPred[j].ymax;
2266 if (bEncodeVarianceInTarget)
2268 for (
int k = 0; k < 4; k++)
2270 m_log.
CHECK_GT(rgrgPriorVariances[j][k], 0,
"The variance at " + j.ToString() +
", " + k.ToString() +
" must be greater than zero.");
2271 rgLocPredData[nCount * 4 + k] /= rgrgPriorVariances[j][k];
2272 rgLocGtData[nCount * 4 + k] /= rgrgPriorVariances[j][k];
2299 int nConfPredData = blobConfPred.
count();
2300 float[] rgConfPredData =
new float[nConfPredData];
2301 int nConfGtData = blobConfGt.
count();
2302 float[] rgConfGtData =
new float[nConfGtData];
2303 int nConfDataOffset = 0;
2304 int nConfGtDataOffset = 0;
2308 m_log.
CHECK_GE(nNumClasses, 1,
"The the num_classes should not be less than 1.");
2312 if (bMapObjectToAgnostic)
2314 if (nBackgroundLabelId >= 0)
2315 m_log.
CHECK_EQ(nNumClasses, 2,
"There should be 2 classes when mapping obect to agnostic with a background label.");
2317 m_log.
CHECK_EQ(nNumClasses, 1,
"There should only b 1 class when mapping object to agnostic with no background label.");
2325 m_log.
WriteLine(
"WARNING: do_neg_mining is depreciated, using mining_type instead.");
2334 for (
int i = 0; i < nNum; i++)
2336 if (rgAllGtBBoxes.Map.ContainsKey(i))
2339 DictionaryMap<List<int>> rgMatchIndicies = rgAllMatchIndices[i];
2341 foreach (KeyValuePair<
int, List<int>> kv
in rgAllMatchIndices[i].Map)
2343 List<int> rgMatchIndex = kv.Value;
2344 m_log.
CHECK_EQ(rgMatchIndex.Count, nNumPriors,
"The match index count should equal the number of priors '" + nNumPriors.ToString() +
"'!");
2346 for (
int j = 0; j < nNumPriors; j++)
2348 if (rgMatchIndex[j] <= -1)
2351 int nGtLabel = (bMapObjectToAgnostic) ? nBackgroundLabelId + 1 : rgAllGtBBoxes[i][rgMatchIndex[j]].label;
2352 int nIdx = (bDoNegMining) ? nCount : j;
2354 switch (confLossType)
2357 rgConfGtData[nConfGtDataOffset + nIdx] = nGtLabel;
2361 rgConfGtData[nConfGtDataOffset + nIdx * nNumClasses + nGtLabel] = 1;
2365 m_log.
FAIL(
"Unknown conf loss type.");
2371 Array.Copy(rgfConfData, nConfDataOffset + j * nNumClasses, rgConfPredData, nCount * nNumClasses, nNumClasses);
2381 for (
int n = 0; n < rgAllNegIndices[i].Count; n++)
2383 int j = rgAllNegIndices[i][n];
2384 m_log.
CHECK_LT(j, nNumPriors,
"The number of priors is too small!");
2386 Array.Copy(rgfConfData, nConfDataOffset + j * nNumClasses, rgConfPredData, nCount * nNumClasses, nNumClasses);
2388 switch (confLossType)
2391 rgConfGtData[nConfGtDataOffset + nCount] = nBackgroundLabelId;
2395 if (nBackgroundLabelId >= 0 && nBackgroundLabelId < nNumClasses)
2396 rgConfGtData[nConfGtDataOffset + nCount * nNumClasses + nBackgroundLabelId] = 1;
2400 m_log.
FAIL(
"Unknown conf loss type.");
2410 nConfDataOffset += nNumPriors * nNumClasses;
2412 nConfGtDataOffset += nNumPriors;
2431 List<List<float>> rgLocAllLoss =
new List<List<float>>();
2432 int nLocCount = blobLocPred.
count();
2433 m_log.
CHECK_EQ(nLocCount, blobLocGt.
count(),
"The loc pred and loc gt must have the same count!");
2434 float[] rgfDiff =
null;
2438 m_blobDiff.ReshapeLike(blobLocPred);
2439 m_cuda.
sub(nLocCount, blobLocPred.
gpu_data, blobLocGt.
gpu_data, m_blobDiff.mutable_gpu_data);
2440 rgfDiff =
Utility.ConvertVecF<T>(m_blobDiff.mutable_cpu_data);
2445 for (
int i = 0; i < nNum; i++)
2447 List<float> rgLocLoss =
Utility.
Create<
float>(nNumPriors, 0.0f);
2449 foreach (KeyValuePair<
int, List<int>> kv
in rgAllMatchIndices[i].Map)
2451 List<int> rgMatchIndex = kv.Value;
2452 m_log.
CHECK_EQ(nNumPriors, rgMatchIndex.Count,
"The match index count at " + i.ToString() +
" is too small.");
2454 for (
int j = 0; j < rgMatchIndex.Count; j++)
2456 if (rgMatchIndex[j] <= -1)
2461 for (
int k = 0; k < 4; k++)
2463 float fVal = rgfDiff[nCount * 4 + k];
2467 float fAbsVal = Math.Abs(fVal);
2470 dfLoss += 0.5 * fVal * fVal;
2472 dfLoss += fAbsVal - 0.5;
2476 dfLoss += 0.5 * fVal * fVal;
2480 m_log.
FAIL(
"Unknown loc loss type!");
2484 rgLocLoss[j] = (float)dfLoss;
2489 rgLocAllLoss.Add(rgLocLoss);
2492 return rgLocAllLoss;
The LabelBBox manages a bounding box used in SSD.
void Add(int nLabel, NormalizedBBox bbox)
Add a new bbox to the label.
The Log class provides general output in text form.
void CHECK(bool b, string str)
Test a flag for true.
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
void FAIL(string str)
Causes a failure which throws an exception with the desciptive text.
void CHECK_EQ(double df1, double df2, string str)
Test whether one number is equal to another.
void CHECK_NE(double df1, double df2, string str)
Test whether one number is not-equal to another.
void CHECK_GT(double df1, double df2, string str)
Test whether one number is greater than another.
void CHECK_LE(double df1, double df2, string str)
Test whether one number is less than or equal to another.
void CHECK_GE(double df1, double df2, string str)
Test whether one number is greater than or equal to another.
void CHECK_LT(double df1, double df2, string str)
Test whether one number is less than another.
The NormalizedBBox manages a bounding box used in SSD.
float ymax
Get/set the y maximum.
float xmax
Get/set the x maximum.
NormalizedBBox Clone()
Return a copy of the object.
float xmin
Get/set the x minimum.
bool difficult
Get/set the difficulty.
float size
Get/set the size.
float ymin
Get/set the y minimum.
void Set(float fxmin, float fymin, float fxmax, float fymax, int? nLabel=null, bool? bDifficult=null, float? fScore=null, float? fSize=null)
Set the values of the NormalizedBbox.
int label
Get/set the label.
The Utility class provides general utility funtions.
static List< int > Create(int nCount, int nStart, int nInc)
Create a new List and fill it with values starting with start and incrementing by inc.
static double[] ConvertVec(float[] rgf)
Convert an array of float to an array of generics.
The BBox class processes the NormalizedBBox data used with SSD.
List< NormalizedBBox > GetPrior(float[] rgPriorData, int nNumPriors, out List< List< float > > rgPriorVariances)
Get the prior boundary boxes from the rgPriorData.
float Coverage(NormalizedBBox bbox1, NormalizedBBox bbox2)
Compute the coverage of bbox1 by bbox2.
void FindMatches(List< LabelBBox > rgAllLocPreds, DictionaryMap< List< NormalizedBBox > > rgAllGtBboxes, List< NormalizedBBox > rgPriorBboxes, List< List< float > > rgrgPriorVariances, MultiBoxLossParameter p, out List< DictionaryMap< List< float > > > rgAllMatchOverlaps, out List< DictionaryMap< List< int > > > rgAllMatchIndices)
Find matches between prediction bboxes and ground truth bboxes.
DictionaryMap< List< NormalizedBBox > > GetGroundTruth(float[] rgGtData, int nNumGt, int nBackgroundLabelId, bool bUseDifficultGt)
Create a set of ground truth bounding boxes from the rgGtData.
NormalizedBBox Clip(NormalizedBBox bbox, float fHeight=1.0f, float fWidth=1.0f)
Clip the BBox to a set range.
bool MeetEmitConstraint(NormalizedBBox src_bbox, NormalizedBBox bbox, EmitConstraint emit_constraint)
Check if a bbox meets the emit constraint w.r.t the src_bbox.
float Size(NormalizedBBox bbox, bool bNormalized=true)
Calculate the size of a BBox.
List< int > ApplyNMS(List< NormalizedBBox > rgBBoxes, List< float > rgScores, float fThreshold, int nTopK)
Do non maximum supression given bboxes and scores.
List< int > CumSum(List< Tuple< float, int > > rgPairs)
Calculate the cumulative sum of a set of pairs.
List< List< float > > ComputeConfLoss(float[] rgConfData, int nNum, int nNumPredsPerClass, int nNumClasses, int nBackgroundLabelId, MultiBoxLossParameter.ConfLossType loss_type)
Compute the confidence loss for each prior from rgConfData.
BBoxUtility(CudaDnn< T > cuda, Log log)
The constructor.
List< LabelBBox > GetLocPredictions(float[] rgLocData, int nNum, int nNumPredsPerClass, int nNumLocClasses, bool bShareLocation)
Create a set of local predictions from the rgLocData.
bool Project(NormalizedBBox src, NormalizedBBox bbox, out NormalizedBBox proj_bbox)
Project one bbox onto another.
void Dispose()
Clean up all resources.
List< int > ApplyNMS(List< NormalizedBBox > rgBBoxes, List< float > rgScores, float fThreshold, int nTopK, bool bReuseOverlaps, out Dictionary< int, Dictionary< int, float > > rgOverlaps)
Do non maximum supression given bboxes and scores.
List< Dictionary< int, List< float > > > GetConfidenceScores(float[] rgConfData, int nNum, int nNumPredsPerClass, int nNumClasses)
Calculate the confidence predictions from rgConfData.
NormalizedBBox Decode(NormalizedBBox prior_bbox, List< float > rgfPriorVariance, PriorBoxParameter.CodeType code_type, bool bEncodeVarianceInTarget, bool bClip, NormalizedBBox bbox)
Decode a bounding box.
NormalizedBBox Locate(NormalizedBBox srcBbox, NormalizedBBox bbox)
Locate bbox in the coordinate system of the source Bbox.
NormalizedBBox Scale(NormalizedBBox bbox, int nHeight, int nWidth)
Scale the BBox to a set range.
List< LabelBBox > DecodeAll(List< LabelBBox > rgAllLocPreds, List< NormalizedBBox > rgPriorBboxes, List< List< float > > rgrgfPrioVariances, int nNum, bool bShareLocation, int nNumLocClasses, int nBackgroundLabelId, PriorBoxParameter.CodeType codeType, bool bVarianceEncodedInTarget, bool bClip)
Decode all bboxes in a batch.
void ApplyNMSFast(List< NormalizedBBox > rgBBoxes, List< float > rgScores, float fScoreThreshold, float fNmsThreshold, float fEta, int nTopK, out List< int > rgIndices)
Do a fast non maximum supression given bboxes and scores.
Dictionary< int, Dictionary< int, List< NormalizedBBox > > > GetDetectionResults(float[] rgData, int nNumDet, int nBackgroundLabelId)
Get detection results from rgData.
void Extrapolate(ResizeParameter param, int nHeight, int nWidth, NormalizedBBox crop_bbox, NormalizedBBox bbox)
Extrapolate the transformed bbox if height_scale and width_scale are explicitly provied,...
bool IsEligibleMining(MultiBoxLossParameter.MiningType miningType, int nMatchIdx, float fMatchOverlap, float fNegOverlap)
Returns whether or not mining is eligible given the mining type and match index.
List< NormalizedBBox > Decode(List< NormalizedBBox > rgPriorBbox, List< List< float > > rgrgfPriorVariance, PriorBoxParameter.CodeType code_type, bool bEncodeVarianceInTarget, bool bClip, List< NormalizedBBox > rgBbox)
Decode a set of bounding box.
bool IsCrossBoundary(NormalizedBBox bbox)
Returns whether or not the bbox is overlaps outside the range [0,1]
List< List< float > > ComputeLocLoss(Blob< T > blobLocPred, Blob< T > blobLocGt, List< DictionaryMap< List< int > > > rgAllMatchIndices, int nNum, int nNumPriors, MultiBoxLossParameter.LocLossType lossType)
Compute the localization loss per matched prior.
Dictionary< int, LabelBBox > GetGroundTruthEx(float[] rgGtData, int nNumGt, int nBackgroundLabelId, bool bUseDifficultGt)
Create a set of ground truth bounding boxes from the rgGtData.
float JaccardOverlap(NormalizedBBox bbox1, NormalizedBBox bbox2, bool bNormalized=true)
Calculates the Jaccard overlap between two bounding boxes.
NormalizedBBox Intersect(NormalizedBBox bbox1, NormalizedBBox bbox2)
Create the intersection of two bounding boxes.
List< List< float > > ComputeConfLoss(float[] rgConfData, int nNum, int nNumPredsPerClass, int nNumClasses, int nBackgroundLabelId, MultiBoxLossParameter.ConfLossType loss_type, List< DictionaryMap< List< int > > > rgAllMatchIndices, DictionaryMap< List< NormalizedBBox > > rgAllGtBoxes)
Compute the confidence loss for each prior from rgConfData.
void EncodeLocPrediction(List< LabelBBox > rgAllLocPreds, DictionaryMap< List< NormalizedBBox > > rgAllGtBboxes, List< DictionaryMap< List< int > > > rgAllMatchIndices, List< NormalizedBBox > rgPriorBboxes, List< List< float > > rgrgPriorVariances, MultiBoxLossParameter p, Blob< T > blobLocPred, Blob< T > blobLocGt)
Encode the localization prediction and ground truth for each matched prior.
int MineHardExamples(Blob< T > blobConf, List< LabelBBox > rgAllLocPreds, DictionaryMap< List< NormalizedBBox > > rgAllGtBBoxes, List< NormalizedBBox > rgPriorBboxes, List< List< float > > rgrgPriorVariances, List< DictionaryMap< List< float > > > rgAllMatchOverlaps, MultiBoxLossParameter p, List< DictionaryMap< List< int > > > rgAllMatchIndices, List< List< int > > rgAllNegIndices, out int nNumNegs)
Mine the hard examples from the batch.
void EncodeConfPrediction(float[] rgfConfData, int nNum, int nNumPriors, MultiBoxLossParameter p, List< DictionaryMap< List< int > > > rgAllMatchIndices, List< List< int > > rgAllNegIndices, DictionaryMap< List< NormalizedBBox > > rgAllGtBBoxes, Blob< T > blobConfPred, Blob< T > blobConfGt)
Encode the confidence predictions and ground truth for each matched prior.
float ComputeAP(List< Tuple< float, int > > rgTp, int nNumPos, List< Tuple< float, int > > rgFp, ApVersion apVersion, out List< float > rgPrec, out List< float > rgRec)
Compute the average precision given true positive and false positive vectors.
NormalizedBBox Encode(NormalizedBBox prior_bbox, List< float > rgfPriorVariance, PriorBoxParameter.CodeType code_type, bool bEncodeVarianceInTarget, NormalizedBBox bbox)
Encode a bounding box.
void Match(List< NormalizedBBox > rgGtBboxes, List< NormalizedBBox > rgPredBboxes, int nLabel, MultiBoxLossParameter.MatchType match_type, float fOverlapThreshold, bool bIgnoreCrossBoundaryBbox, out List< int > rgMatchIndices, out List< float > rgMatchOverlaps)
Find matches between a list of two bounding boxes.
NormalizedBBox Output(NormalizedBBox bbox, SizeF szImg, ResizeParameter p)
Output the predicted bbox on the actual image.
List< List< float > > ComputeConfLoss(float[] rgConfData, int nNum, int nNumPredsPerClass, int nNumClasses, int nBackgroundLabelId, MultiBoxLossParameter.ConfLossType loss_type, List< Dictionary< int, List< int > > > rgAllMatchIndices, Dictionary< int, List< NormalizedBBox > > rgAllGtBoxes)
Compute the confidence loss for each prior from rgConfData.
int CountNumMatches(List< DictionaryMap< List< int > > > rgAllMatchIndices, int nNum)
Counts the number of matches in the list of maps.
The Blob is the main holder of data that moves through the Layers of the Net.
T[] mutable_cpu_data
Get data from the GPU and bring it over to the host, or Set data from the Host and send it over to th...
void Reshape(int nNum, int nChannels, int nHeight, int nWidth, bool? bUseHalfSize=null)
DEPRECIATED; use
int count()
Returns the total number of items in the Blob.
long gpu_data
Returns the data GPU handle used by the CudaDnn connection.
The CudaDnn object is the main interface to the Low-Level Cuda C++ DLL.
void sub(int n, long hA, long hB, long hY, int nAOff=0, int nBOff=0, int nYOff=0, int nB=0)
Subtracts B from A and places the result in Y.
bool Active
When active, the parameter is used, otherwise it is ignored.
Specifies the parameters for the EmitConstraint used with SSD.
EmitType emit_type
Get/set the emit type.
float emit_overlap
Get/set the emit overlap used with MIN_OVERLAP.
EmitType
Specifies the emit type.
Specifies the parameters for the MultiBoxLossParameter.
float overlap_threshold
Get/set the overlap threshold (default = 0.5).
MatchType
Defines the matching method used during training.
PriorBoxParameter.CodeType code_type
Get/set the coding method for the bounding box.
float neg_overlap
Get/set the negative overlap upperbound for the unmatched predictions (default = 0....
LocLossType
Defines the localization loss types.
int sample_size
Get/set the number of samples (default = 64).
float neg_pos_ratio
Get/set the negative/positive ratio (default = 3.0).
bool share_location
Get/sets whether or not the bounding box is shared among different classes (default = true).
MiningType
Defines the mining type used during training.
NonMaximumSuppressionParameter nms_param
Get/set the parameters used for the non maximum suppression during hard example training.
LocLossType loc_loss_type
Get/set the localization loss type (default = SMOOTH_L1).
uint background_label_id
Get/set the background label id.
ConfLossType
Defines the confidence loss types.
bool encode_variance_in_target
Get/set whether or not to encode the variance of the prior box in the loc loss target instead of in t...
bool ignore_cross_boundary_bbox
Get/set whether or not to ignore cross boundary bbox during matching (default = false)....
bool map_object_to_agnostic
Get/set whether or not to map all object classes to an agnostic class (default = false)....
bool? do_neg_mining
DEPRECIATED: Get/set whether or not to perform negative mining (default = false).
bool bp_inside
Get/set whether or not to only backpropagate on corners which are inside of the image region when enc...
ConfLossType conf_loss_type
Get/set the confidence loss type (default = SOFTMAX).
bool use_prior_for_matching
Get/set whether or not to use prior for matching.
uint num_classes
Get/set the number of classes to be predicted - required!
MiningType mining_type
Get/set the mining type used during training (default = MAX_NEGATIVE).
bool use_prior_for_nms
Get/set whether or not to use the prior bbox for nms.
MatchType match_type
Get/set the matching method used during training (default = PER_PREDICTION).
int? top_k
Get/set the maximum number of results kept.
float nms_threshold
Get/set the threshold to be used in nms.
Specifies the parameters for the PriorBoxParameter.
CodeType
Defines the encode/decode type.
Specifies the parameters for the ResizeParameter for use with SSD.
uint height
Get/set the resizing height.
uint width
Get/set the resizing width.
ResizeMode
Defines the resizing mode.
ResizeMode resize_mode
Get/set the resizing mode.
uint width_scale
Get/set the resizing width scale used with FIT_SMALL_SIZE_mode.
uint height_scale
Get/set the resizing height scale used with FIT_SMALL_SIZE mode.
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
ApVersion
Defines the different way of computing average precision.
The MyCaffe.common namespace contains common MyCaffe classes.
The MyCaffe.param.ssd namespace contains all SSD related parameter objects that correspond to the nat...
The MyCaffe.param namespace contains parameters used to create models.
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...