diff --git a/modules/tracking/CMakeLists.txt b/modules/tracking/CMakeLists.txt index 35a9136161b..d8f20edfad7 100644 --- a/modules/tracking/CMakeLists.txt +++ b/modules/tracking/CMakeLists.txt @@ -1,2 +1,2 @@ set(the_description "Tracking API") -ocv_define_module(tracking opencv_imgproc opencv_core opencv_video opencv_highgui OPTIONAL opencv_datasets WRAP python) +ocv_define_module(tracking opencv_imgproc opencv_core opencv_video opencv_highgui opencv_plot OPTIONAL opencv_datasets WRAP python) diff --git a/modules/tracking/samples/benchmark.cpp b/modules/tracking/samples/benchmark.cpp index 3fb8c9971d3..16e49562394 100644 --- a/modules/tracking/samples/benchmark.cpp +++ b/modules/tracking/samples/benchmark.cpp @@ -1,415 +1,355 @@ -#include -#include -#include -#include +#include "opencv2/core/utility.hpp" +#include "opencv2/highgui.hpp" +#include "opencv2/tracking.hpp" +#include "opencv2/videoio.hpp" +#include "opencv2/plot.hpp" +#include +#include #include -#include -#include -#include - -const int CMDLINEMAX = 30; - int ASSESS_TILL = INT_MAX; -const int LINEMAX = 40; using namespace std; using namespace cv; -/* TODO: - do normalization ala Kalal's assessment protocol for TLD - */ - -static Mat image; -static bool paused; -static bool saveImageKey; -static vector palette; - -void print_table(char* videos[],int videoNum,char* algorithms[],int algNum,const vector >& results,char* tableName); - -static int lineToRect(char* line,Rect2d& res){ - char * ptr=line,*pos=ptr; - if(line==NULL || line[0]=='\0'){ - return -1; - } - if(strcmp(line,"NaN,NaN,NaN,NaN\n")==0){ - res.height=res.width=-1.0; - return 0; - } - - double nums[4]={0}; - for(int i=0; i<4 && (ptr=strpbrk(ptr,"0123456789-"))!= NULL;i++,ptr=pos){ - nums[i]=strtod(ptr,&pos); - if(pos==ptr){ - printf("lineToRect had problems with decoding line %s\n",line); - return -1; - } - } - res.x=cv::min(nums[0],nums[2]); - res.y=cv::min(nums[1],nums[3]); - res.width=cv::abs(nums[0]-nums[2]); - res.height=cv::abs(nums[1]-nums[3]); - return 0; -} -static inline double overlap(Rect2d r1,Rect2d r2){ - if(r1.width<0 || r2.width<0 || r1.height<0 || r1.width<0)return -1.0; - double a1=r1.area(), a2=r2.area(), a0=(r1&r2).area(); - return a0/(a1+a2-a0); -} -static void help(){ - cout << "\nThis example shows the functionality of \"Long-term optical tracking API\"" - "-- pause video [p] and draw a bounding box around the target to start the tracker\n" - "Example of is in opencv_extra/testdata/cv/tracking/\n" - "Call:\n" - "./tracker [] ...\n" - << endl; - - cout << "\n\nConsole keys: \n" - "\t-s - save images\n" - "\t-l=100 - assess only, say, first 100 frames\n"; - - cout << "\n\nHot keys: \n" - "\tq - quit the program\n" - "\tp - pause video\n"; - exit(EXIT_SUCCESS); +// TODO: do normalization ala Kalal's assessment protocol for TLD + +static const Scalar gtColor = Scalar(0, 255, 0); + +static Scalar getNextColor() +{ + const int num = 6; + static Scalar colors[num] = {Scalar(160, 0, 0), Scalar(0, 0, 160), Scalar(0, 160, 160), + Scalar(160, 160, 0), Scalar(160, 0, 160), Scalar(20, 50, 160)}; + static int id = 0; + return colors[id < num ? id++ : num - 1]; } -static void parseCommandLineArgs(int argc, char** argv,char* videos[],char* gts[], - int* vc,char* algorithms[],char* initBoxes[][CMDLINEMAX],int* ac,char keys[CMDLINEMAX][LINEMAX]){ - - *ac=*vc=0; - for(int i=1;i> one.y; + input.ignore(numeric_limits::max(), ','); + input >> one.width; + input.ignore(numeric_limits::max(), ','); + input >> one.height; + input.ignore(numeric_limits::max(), '\n'); + if (input.good()) + res.push_back(one); + } + } + if (!omitname.empty()) + { + ifstream input(omitname.c_str()); + if (!input.is_open()) + CV_Error(Error::StsError, "Failed to open file"); + while (input) + { + unsigned int a = 0, b = 0; + input >> a >> b; + input.ignore(numeric_limits::max(), '\n'); + if (a > 0 && b > 0 && a < res.size() && b < res.size()) + { + if (a > b) + swap(a, b); + for (vector::iterator i = res.begin() + a; i != res.begin() + b; ++i) { - if( ptr == NULL ) - keys[j][0]='\0'; - else - strcpy(keys[j], ptr+1); + *i = Rect2d(); } } - continue; - } - bool isVideo=false; - for(int j=0,len=(int)strlen(argv[i]);j 0. && box.height > 0.; } +const int LTRC_COUNT = 100; + +struct AlgoWrap +{ + AlgoWrap(const string &name_) + : tracker(Tracker::create(name_)), lastState(NotFound), name(name_), color(getNextColor()), + numTotal(0), numResponse(0), numPresent(0), numCorrect_0(0), numCorrect_0_5(0), + timeTotal(0), auc(LTRC_COUNT + 1, 0) + { + } + + enum State + { + NotFound, + Overlap_None, + Overlap_0, + Overlap_0_5, + }; + + Ptr tracker; + bool lastRes; + Rect2d lastBox; + State lastState; + + // visual + string name; + Scalar color; + + // results + int numTotal; // frames passed to tracker + int numResponse; // frames where tracker had response + int numPresent; // frames where ground truth result present + int numCorrect_0; // frames where overlap with GT > 0 + int numCorrect_0_5; // frames where overlap with GT > 0.5 + int64 timeTotal; // ticks + vector auc; // number of frames for each overlap percent + + void eval(const Mat &frame, const Rect2d >Box, bool isVerbose) + { + // RUN + lastBox = Rect2d(); + int64 frameTime = getTickCount(); + lastRes = tracker->update(frame, lastBox); + frameTime = getTickCount() - frameTime; + + // RESULTS + double intersectArea = (gtBox & lastBox).area(); + double unionArea = (gtBox | lastBox).area(); + numTotal++; + numResponse += (lastRes && isGoodBox(lastBox)) ? 1 : 0; + numPresent += isGoodBox(gtBox) ? 1 : 0; + double overlap = unionArea > 0. ? intersectArea / unionArea : 0.; + numCorrect_0 += overlap > 0. ? 1 : 0; + numCorrect_0_5 += overlap > 0.5 ? 1 : 0; + auc[std::min(std::max((size_t)(overlap * LTRC_COUNT), (size_t)0), (size_t)LTRC_COUNT)]++; + timeTotal += frameTime; + + if (isVerbose) + cout << name << " - " << overlap << endl; + + if (isGoodBox(gtBox) != isGoodBox(lastBox)) lastState = NotFound; + else if (overlap > 0.5) lastState = Overlap_0_5; + else if (overlap > 0.0001) lastState = Overlap_0; + else lastState = Overlap_None; + } + + void draw(Mat &image, const Point &textPoint) const + { + if (lastRes) + rectangle(image, lastBox, color, 2, LINE_8); + string suf; + switch (lastState) + { + case AlgoWrap::NotFound: suf = " X"; break; + case AlgoWrap::Overlap_None: suf = " ~"; break; + case AlgoWrap::Overlap_0: suf = " +"; break; + case AlgoWrap::Overlap_0_5: suf = " ++"; break; } + putText(image, name + suf, textPoint, FONT_HERSHEY_PLAIN, 1, color, 1, LINE_AA); } -} -void print_table(char* videos[],int videoNum,char* algorithms[],int algNum,const vector >& results,char* tableName){ - printf("\n%s",tableName); - vector grid(1+algNum,0); - char spaces[100];memset(spaces,' ',100); - for(int i=0;i p_ = plot::createPlot2d(getLTRC()); + p_->render(img); } - printf("%.*s ",(int)grid[0],spaces); - for(int i=0;i > >results; -}; -class CorrectFrames : public AssessmentRes::Assessment{ -public: - CorrectFrames(double tol):tol_(tol),len_(1),correctFrames_(1){} - int printf(char* buf){return sprintf(buf,"%d/%d",correctFrames_,len_);} - int printName(char* buf){return sprintf(buf,(char*)"Num of correct frames (overlap>%g)\n",tol_);} - void assess(const Rect2d& ethalon,const Rect2d& res){len_++;if(overlap(ethalon,res)>tol_)correctFrames_++;} -private: - double tol_; - int len_; - int correctFrames_; -}; -class AvgTime : public AssessmentRes::Assessment{ -public: - AvgTime(double res):res_(res){} - int printf(char* buf){return sprintf(buf,"%gms",res_);} - int printName(char* buf){return sprintf(buf,(char*)"Average frame tracking time\n");} - void assess(const Rect2d& /*ethalon*/,const Rect2d&/* res*/){}; -private: - double res_; -}; -class PRF : public AssessmentRes::Assessment{ -public: - PRF():occurences_(0),responses_(0),true_responses_(0){}; - int printName(char* buf){return sprintf(buf,(char*)"PRF\n");} - int printf(char* buf){return sprintf(buf,"%g/%g/%g",(1.0*true_responses_)/responses_,(1.0*true_responses_)/occurences_, - (2.0*true_responses_)/(responses_+occurences_));} - void assess(const Rect2d& ethalon,const Rect2d& res){ - if(res.height>=0)responses_++; - if(ethalon.height>=0)occurences_++; - if(ethalon.height>=0 && res.height>=0)true_responses_++; + void stat(ostream &out) const + { + out << name << endl; + out << setw(20) << "Overlap > 0 " << setw(20) << (double)numCorrect_0 / numTotal * 100 + << "%" << setw(20) << numCorrect_0 << endl; + out << setw(20) << "Overlap > 0.5" << setw(20) << (double)numCorrect_0_5 / numTotal * 100 + << "%" << setw(20) << numCorrect_0_5 << endl; + + double p = (double)numCorrect_0_5 / numResponse; + double r = (double)numCorrect_0_5 / numPresent; + double f = 2 * p * r / (p + r); + out << setw(20) << "Precision" << setw(20) << p * 100 << "%" << endl; + out << setw(20) << "Recall " << setw(20) << r * 100 << "%" << endl; + out << setw(20) << "f-measure" << setw(20) << f * 100 << "%" << endl; + out << setw(20) << "AUC" << setw(20) << calcAUC() << endl; + + double s = (timeTotal / getTickFrequency()) / numTotal; + out << setw(20) << "Performance" << setw(20) << s * 1000 << " ms/frame" << setw(20) << 1 / s + << " fps" << endl; } -private: - int occurences_,responses_,true_responses_; }; -AssessmentRes::AssessmentRes(int algnum):len(0),results(algnum){ - for(int i=0;i<(int)results.size();i++){ - results[i].push_back(Ptr(new CorrectFrames(0.0))); - results[i].push_back(Ptr(new CorrectFrames(0.5))); - results[i].push_back(Ptr(new PRF())); + +inline ostream &operator<<(ostream &out, const AlgoWrap &w) { w.stat(out); return out; } + +inline vector initAlgorithms(const string &algList) +{ + vector res; + istringstream input(algList); + for (;;) + { + char one[30]; + input.getline(one, 30, ','); + if (!input) + break; + cout << " " << one << " - "; + AlgoWrap a(one); + if (a.tracker) + { + res.push_back(a); + cout << "OK"; + } + else + { + cout << "FAILED"; + } + cout << endl; } + return res; } -static AssessmentRes assessment(char* video,char* gt_str, char* algorithms[],char* initBoxes_str[],int algnum){ - char buf[200]; - int start_frame=0; - int linecount=0; - Rect2d boundingBox; - vector averageMillisPerFrame(algnum,0.0); - static int videoNum=0; - videoNum++; - - FILE* gt=fopen(gt_str,"r"); - if(gt==NULL){ - printf("cannot open the ground truth file %s\n",gt_str); - exit(EXIT_FAILURE); - } - for(linecount=0;fgets(buf,sizeof(buf),gt)!=NULL;linecount++); - if(linecount==0){ - printf("ground truth file %s has no lines\n",gt_str); - exit(EXIT_FAILURE); - } - fseek(gt,0,SEEK_SET); - if(fgets(buf,sizeof(buf),gt)==NULL){ - printf("ground truth file %s has no lines\n",gt_str); - exit(EXIT_FAILURE); - } - - std::vector initBoxes(algnum); - for(int i=0;i >trackers(algnum); - for(int i=0;i,,,')}" + "{start|0|starting frame}" + "{num|0|frame number (0 for all)}" + "{omit||file with omit ranges (each line describes occluded frames: ' ')}" + "{plot|false|plot LTR curves at the end}" + "{v|false|print each frame info}" + "{@algos||comma-separated algorithm names}"; + CommandLineParser p(argc, argv, keys); + if (p.has("help")) + { + p.printMessage(); + return 0; + } + int startFrame = p.get("start"); + int frameCount = p.get("num"); + string videoFile = p.get("video"); + string gtFile = p.get("gt"); + string omitFile = p.get("omit"); + string algList = p.get("@algos"); + bool doPlot = p.get("plot"); + bool isVerbose = p.get("v"); + if (!p.check()) + { + p.printErrors(); + return 0; + } + + cout << "Reading GT from " << gtFile << " ... "; + vector gt = readGT(gtFile, omitFile); + if (gt.empty()) + CV_Error(Error::StsError, "Failed to read GT file"); + cout << gt.size() << " boxes" << endl; + + cout << "Opening video " << videoFile << " ... "; + VideoCapture cap; + cap.open(videoFile); + if (!cap.isOpened()) + CV_Error(Error::StsError, "Failed to open video file"); + cap.set(CAP_PROP_POS_FRAMES, startFrame); + cout << "at frame " << startFrame << endl; + + // INIT + vector algos = initAlgorithms(algList); + Mat frame, image; + cap >> frame; + for (vector::iterator i = algos.begin(); i != algos.end(); ++i) + i->tracker->init(frame, gt[0]); + + // DRAW + { + namedWindow(window, WINDOW_AUTOSIZE); + frame.copyTo(image); + rectangle(image, gt[0], gtColor, 2, LINE_8); + imshow(window, image); + } + + bool paused = false; + int frameId = 0; + cout << "Hot keys:" << endl << " q - exit" << endl << " p - pause" << endl; + for (;;) + { + if (!paused) + { + cap >> frame; + if (frame.empty()) + { + cout << "Done - video end" << endl; + break; + } + frameId++; + if (isVerbose) + cout << endl << "Frame " << frameId << endl; + // EVAL + for (vector::iterator i = algos.begin(); i != algos.end(); ++i) + i->eval(frame, gt[frameId], isVerbose); + // DRAW + { + Point textPoint(1, 16); + frame.copyTo(image); + rectangle(image, gt[frameId], gtColor, 2, LINE_8); + putText(image, "GROUND TRUTH", textPoint, FONT_HERSHEY_PLAIN, 1, gtColor, 1, LINE_AA); + for (vector::iterator i = algos.begin(); i != algos.end(); ++i) + { + textPoint.y += 14; + i->draw(image, textPoint); + } + imshow(window, image); + } } - exit(EXIT_FAILURE); - } - } - - cap >> frame; - frame.copyTo( image ); - if(lineToRect(buf,boundingBox)<0){ - if(gt!=NULL){ - fclose(gt); - } - exit(EXIT_FAILURE); - } - rectangle( image, boundingBox,palette[0], 2, 1 ); - for(int i=0;i<(int)trackers.size();i++){ - rectangle(image,initBoxes[i],palette[i+1], 2, 1 ); - if( !trackers[i]->init( frame, initBoxes[i] ) ){ - printf("could not initialize tracker %s with box %s at video %s\n",algorithms[i],initBoxes_str[i],video); - if(gt!=NULL){ - fclose(gt); + + char c = (char)waitKey(1); + if (c == 'q') + { + cout << "Done - manual exit" << endl; + break; } - exit(EXIT_FAILURE); - } - } - imshow( "Tracking API", image ); - - int frameCounter = 0; - AssessmentRes res((int)trackers.size()); - - for ( ;; ){ - if( !paused ){ - cap >> frame; - if(frame.empty()){ - break; - } - frame.copyTo( image ); - - if(fgets(buf,sizeof(buf),gt)==NULL){ - printf("ground truth is over\n"); - break; - } - if(lineToRect(buf,boundingBox)<0){ - if(gt!=NULL){ - fclose(gt); - } - exit(EXIT_FAILURE); - } - rectangle( image, boundingBox,palette[0], 2, 1 ); - putText(image, "GROUND TRUTH", Point(1,16 + 0*14), FONT_HERSHEY_SIMPLEX, 0.5, palette[0],2); - - frameCounter++; - for(int i=0;i<(int)trackers.size();i++){ - bool trackerRes=true; - clock_t start;start=clock(); - trackerRes=trackers[i]->update( frame, initBoxes[i] ); - start=clock()-start; - averageMillisPerFrame[i]+=1000.0*start/CLOCKS_PER_SEC; - if( trackerRes == false ) - { - initBoxes[i].height=initBoxes[i].width=-1.0; - } - else - { - rectangle( image, initBoxes[i], palette[i+1], 2, 1 ); - putText(image, algorithms[i], Point(1,16 + (i+1)*14), FONT_HERSHEY_SIMPLEX, 0.5, palette[i+1],2); - } - for(int j=0;j<(int)res.results[i].size();j++) - res.results[i][j]->assess(boundingBox,initBoxes[i]); - } - imshow( "Tracking API", image ); - if(saveImageKey){ - char inbuf[LINEMAX]; - sprintf(inbuf,"image%d_%d.jpg",videoNum,frameCounter); - imwrite(inbuf,image); - } - - if((frameCounter+1)>=ASSESS_TILL){ - break; - } - - char c = (char) waitKey( 2 ); - if( c == 'q' ) - break; - if( c == 'p' ) - paused = !paused; - } - } - if(gt!=NULL){ - fclose(gt); - } - destroyWindow( "Tracking API"); - - res.len=linecount; - res.videoName=video; - for(int i=0;i<(int)res.results.size();i++) - res.results[i].push_back(Ptr(new AvgTime(averageMillisPerFrame[i]/res.len))); - return res; -} + else if (c == 'p') + { + paused = !paused; + } + if (frameCount && frameId >= frameCount) + { + cout << "Done - max frame count" << endl; + break; + } + } + + // STAT + for (vector::iterator i = algos.begin(); i != algos.end(); ++i) + cout << "==========" << endl << *i << endl; + + if (doPlot) + { + Mat img(300, 300, CV_8UC3); + for (vector::iterator i = algos.begin(); i != algos.end(); ++i) + { + i->plotLTRC(img); + imshow("LTR curve for " + i->name, img); + } + waitKey(0); + } -int main( int argc, char** argv ){ - palette.push_back(Scalar(255,0,0));//BGR, blue - palette.push_back(Scalar(0,0,255));//red - palette.push_back(Scalar(0,255,255));//yellow - palette.push_back(Scalar(255,255,0));//orange - int vcount=0,acount=0; - char* videos[CMDLINEMAX],*gts[CMDLINEMAX],*algorithms[CMDLINEMAX],*initBoxes[CMDLINEMAX][CMDLINEMAX]; - char keys[CMDLINEMAX][LINEMAX]; - strcpy(keys[0],"-s"); - strcpy(keys[1],"-a"); - - parseCommandLineArgs(argc,argv,videos,gts,&vcount,algorithms,initBoxes,&acount,keys); - - saveImageKey=(keys[0][0]=='\0'); - if( strcmp(keys[1],"-a") != 0 ) - ASSESS_TILL = atoi(keys[1]); - else - ASSESS_TILL = INT_MAX; - - CV_Assert(acount results; - for(int i=0;i > resultStrings(vcount); - vector nameStrings; - for(int i=0;iprintName(nameStrings[tableCount])printf(resultStrings[videoCount][algoCount]); - } - print_table(videos,vcount,algorithms,acount,resultStrings,nameStrings[tableCount]); - } - return 0; + return 0; }