Cntk: Evaluate in Visual C++

Created on 2 Oct 2017  路  4Comments  路  Source: microsoft/CNTK

I made a faster_rcnn_eval_VGG16_e2e_native.model by Python.
And I want to detect on C++ now. I successfully load model by "CNTK::Function::Load".

I made this code followed by JavaExample.

#include<iostream>
#include<string>
#include<unordered_map>
#include<opencv2/opencv.hpp>
#include"CNTKLibrary.h"
#include"CNTKLibraryInternals.h"
namespace CNTK {
    static CNTK::ValuePtr CreateDenseFloat(const CNTK::NDShape& sampleShape, const std::vector<std::vector<float>>& sequences,
                                           const CNTK::DeviceDescriptor& device, bool readOnly = false) {
        return CNTK::Value::Create<float>(sampleShape, sequences, device, readOnly);
    }
    static CNTK::ValuePtr CreateDenseFloat(const CNTK::NDShape& sampleShape, const std::vector<std::vector<float>>& sequences,
                                           const std::vector<bool>& sequenceStartFlags, const CNTK::DeviceDescriptor& device, bool readOnly = false) {
        return CNTK::Value::Create<float>(sampleShape, sequences, sequenceStartFlags, device, readOnly);
    }
}
cv::Mat LoadResizeAndPad(std::string path, int width, int height, int pad_value = 114) {
    cv::Mat img = cv::imread(path);
    int target_w = width;
    int target_h = height;
    if (img.cols > img.rows) {
        target_h = img.rows*width / img.cols;
    } else {
        target_w = img.cols*height / img.rows;
    }
    cv::resize(img, img, cv::Size(target_w, target_h), 0, 0, cv::INTER_NEAREST);
    int top = std::max(0, (height - target_h) / 2);
    int left = std::max(0, (width - target_w) / 2);
    int bottom = height - top - target_h;
    int right = width - left - target_w;
    cv::copyMakeBorder(img, img, top, bottom, left, right, cv::BORDER_CONSTANT, cv::Scalar(pad_value, pad_value, pad_value));
    return img;
}
//https://github.com/Microsoft/CNTK/blob/release/2.2/Tests/EndToEndTests/EvalClientTests/JavaEvalTest/src/Main.java
//https://github.com/Microsoft/CNTK/blob/master/bindings/common/CNTKValueExtend.i
int main() {
    try {
        CNTK::DeviceDescriptor device = CNTK::DeviceDescriptor::UseDefaultDevice();
        std::wstring model_path = L"../../faster_rcnn_eval_VGG16_e2e_native.model";
        CNTK::FunctionPtr model = CNTK::Function::Load(model_path, device);
        for (auto&e : model->Arguments()) {
            std::wcout << e.AsString() << std::endl;
        }
        for (auto&e : model->Outputs()) {
            std::wcout << e.AsString() << std::endl;
        }

        CNTK::Variable input_var = model->Arguments()[0];
        CNTK::Variable output_var = model->Outputs()[0];

        CNTK::NDShape input_shape = input_var.Shape();
        int image_width = input_shape.Dimensions()[0];
        int image_height = input_shape.Dimensions()[1];
        int image_channel = input_shape.Dimensions()[2];
        int image_size = input_shape.TotalSize();

        cv::Mat img = LoadResizeAndPad("C:/Users/spring/Desktop/FasterRCNN_folder/05/CNTK/PretrainedModels/CNTK-Samples-2-2/Examples/Image/DataSets/MyDataSet/positive/0019.jpg", image_width, image_height);
        std::vector<float> float_vec;
        for (int c = 0; c < img.channels(); c++) {
            for (int h = 0; h < img.rows; h++) {
                for (int w = 0; w < img.cols; w++) {
                    cv::Scalar color = img.at<cv::Vec3b>(h, w);
                    if (c == 0) {
                        float_vec.push_back(color[0]);
                    } else if (c == 1) {
                        float_vec.push_back(color[1]);
                    } else {
                        float_vec.push_back(color[2]);
                    }
                }
            }
        }
        std::vector<std::vector<float>> float_vec2;
        float_vec2.push_back(float_vec);
        CNTK::ValuePtr input_val = CNTK::CreateDenseFloat(input_shape, float_vec2, device);

        std::unordered_map<CNTK::Variable, CNTK::ValuePtr> input_data_map;
        input_data_map.insert(std::make_pair(input_var, input_val));

        std::unordered_map<CNTK::Variable, CNTK::ValuePtr> output_data_map;
        output_data_map.insert(std::make_pair(output_var, nullptr));

        model->Evaluate(input_data_map, output_data_map);

        std::vector<std::vector<float>> output_buffer;
        output_data_map[output_var].get()->CopyVariableValueTo<float>(input_var, output_buffer);

    } catch (std::exception &e) {
        std::cerr << e.what() << std::endl;
    }
    return 0;
}

But This code has an error.

Please, help me.

Most helpful comment

to add to my post above, once I call evaluate, I extract the output like so:

std::vector< std::vector<float> > vClsPred;
std::vector< std::vector<float> > vROIS;
std::vector< std::vector<float> > vbboxRegr;
outputs[outputVariables[0]]->CopyVariableValueTo(outputVariables[0], vClsPred);//cls_pred - the class probabilities for each ROI
outputs[outputVariables[1]]->CopyVariableValueTo(outputVariables[1], vROIS);//rpn_rois - the absolute pixel coordinates of the candidate rois
outputs[outputVariables[2]]->CopyVariableValueTo(outputVariables[2], vbboxRegr);//bbox_regr - the regression coefficients per class for each ROI

All 4 comments

You are only adding 1 input, there are two, the image and a second containing 6 values. I got this from looking at the python scripts in the CNTK example. Also need to prepare the outputs. The code below is what I have and seems to work OK.

    //Second Input
    std::vector<float> vInput2;
    //from the python script
    //# dims = pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height
    //dims = (self._pad_width, self._pad_height, target_w, target_h, img_width, img_height)
    //img_dims - (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
        //as an example, the values in comments are for an image of w=752,h=610, padded to the required 850x850
    vInput2.push_back(D.m_nPadWidth);//850
    vInput2.push_back(D.m_nPadHeight);//850
    vInput2.push_back(D.m_nTargetWidth);//850
    vInput2.push_back(D.m_nTargetHeight);//689
    vInput2.push_back(D.m_nOriginalWidth);//752
    vInput2.push_back(D.m_nOriginalHeight);//610
    ValuePtr inputValue2 = CNTK::Value::CreateBatch(inputVariables[1].Shape(), vInput2, device);

    std::unordered_map<CNTK::Variable, CNTK::ValuePtr> inputs;
    inputs[inputVariables[0]] = inputValue1;
    inputs[inputVariables[1]] = inputValue2;

//prepare output buffers
NDShape outputShape1 = outputVariables[0].Shape();
NDShape outputShape2 = outputVariables[1].Shape();
NDShape outputShape3 = outputVariables[2].Shape();
CNTK::ValuePtr outputValue1;
CNTK::ValuePtr outputValue2;
CNTK::ValuePtr outputValue3;
std::unordered_map<CNTK::Variable, CNTK::ValuePtr> outputs = { { outputVariables[0], outputValue1 },{ outputVariables[1], outputValue2 },{ outputVariables[2], outputValue3 } };

//Evaluate the network
try
{
    modelFuncPtr->Evaluate(inputs, outputs, device);
}
catch (...)
{ 
    ATLASSERT(0);
    return false;
}

to add to my post above, once I call evaluate, I extract the output like so:

std::vector< std::vector<float> > vClsPred;
std::vector< std::vector<float> > vROIS;
std::vector< std::vector<float> > vbboxRegr;
outputs[outputVariables[0]]->CopyVariableValueTo(outputVariables[0], vClsPred);//cls_pred - the class probabilities for each ROI
outputs[outputVariables[1]]->CopyVariableValueTo(outputVariables[1], vROIS);//rpn_rois - the absolute pixel coordinates of the candidate rois
outputs[outputVariables[2]]->CopyVariableValueTo(outputVariables[2], vbboxRegr);//bbox_regr - the regression coefficients per class for each ROI

It works fine. Thank you...
You are my god.
Thanks thanks TT

After extracting the output vectors above, there is an additional step needed to get the actual rectangles on the image. You can see this in the python scripts in the CNTK example. I found C++ code that does this here and modified it to get the code below. Note, that the 'BBox' and 'Point4f' classes can be found in that project also. The BBox class has a 'id' member that is the class category, a 'confidence' parameter and the 4 values for x1, y1, x2, and y2 for the actual rectangle. I don't really understand exactly what it is doing, but the code combines the 3 outputs vectors to provide the single output we are after. It seems to work alright :-).

long nSize = vbboxRegr[0].size() / cls_num / 4;
box_num = 300;
//not sure about this
if (nSize < box_num)
{
    box_num = nSize;
}
ProcessResults(850, 850, cls_num, box_num, vClsPred, vROIS, vbboxRegr, results);
//iterate through the actual rectangles below
for (std::vector<BBox<float> >::iterator i = results.begin(); i != results.end(); ++i)
{
    i->id;//category
    i->confidence;
    i->Point[0];//left
    i->Point[1];//top
    i->Point[2];//right
    i->Point[3];//bottom
}
void CFasterRCNNTest::ProcessResults(long width, long height, long cls_num, long box_num, 
    const std::vector< std::vector<float> >& vClsPred, 
    const std::vector< std::vector<float> >& vROIS, 
    const std::vector< std::vector<float> >& vbboxRegr, std::vector<BBox<float> >& results)
{
    for (int cls = 1; cls < cls_num; cls++) {//start at 1 to avoid background class
        std::vector<BBox<float> > bbox;
        for (int i = 0; i < box_num; i++) {
            float score = vClsPred[0][i * cls_num + cls];

            Point4f<float> roi(vROIS[0][(i * 4) + 0],
                vROIS[0][(i * 4) + 1],
                vROIS[0][(i * 4) + 2],
                vROIS[0][(i * 4) + 3]);

            Point4f<float> delta(vbboxRegr[0][(i * cls_num + cls) * 4 + 0],
                vbboxRegr[0][(i * cls_num + cls) * 4 + 1],
                vbboxRegr[0][(i * cls_num + cls) * 4 + 2],
                vbboxRegr[0][(i * cls_num + cls) * 4 + 3]);

            Point4f<float> box = bbox_transform_inv(roi, delta);
            box[0] = std::max(0.0f, box[0]);
            box[1] = std::max(0.0f, box[1]);
            box[2] = std::min(width - 1.f, box[2]);
            box[3] = std::min(height - 1.f, box[3]);

            bbox.push_back(BBox<float>(box, score, cls));
        }
        sort(bbox.begin(), bbox.end());
        std::vector<bool> select(box_num, true);
        float test_score_thresh = 0.4;
        float test_nms = 0.4;
        for (int i = 0; i < box_num; i++)
        {
            if (select[i])
            {
                if (bbox[i].confidence < test_score_thresh) break;
                for (int j = i + 1; j < box_num; j++)
                {
                    if (select[j])
                    {
                        if (get_iou(bbox[i], bbox[j]) > test_nms) {
                            select[j] = false;
                        }
                    }
                }
                results.push_back(bbox[i]);
            }
        }
    }
}
Was this page helpful?
0 / 5 - 0 ratings

Related issues

SudharakaP picture SudharakaP  路  5Comments

colino picture colino  路  4Comments

Prasandhmcw picture Prasandhmcw  路  5Comments

pallashadow picture pallashadow  路  5Comments

christopher5106 picture christopher5106  路  5Comments