I made a faster_rcnn_eval_VGG16_e2e_native.model by Python.
And I want to detect on C++ now. I successfully load model by "CNTK::Function::Load".
I made this code followed by JavaExample.
#include<iostream>
#include<string>
#include<unordered_map>
#include<opencv2/opencv.hpp>
#include"CNTKLibrary.h"
#include"CNTKLibraryInternals.h"
namespace CNTK {
static CNTK::ValuePtr CreateDenseFloat(const CNTK::NDShape& sampleShape, const std::vector<std::vector<float>>& sequences,
const CNTK::DeviceDescriptor& device, bool readOnly = false) {
return CNTK::Value::Create<float>(sampleShape, sequences, device, readOnly);
}
static CNTK::ValuePtr CreateDenseFloat(const CNTK::NDShape& sampleShape, const std::vector<std::vector<float>>& sequences,
const std::vector<bool>& sequenceStartFlags, const CNTK::DeviceDescriptor& device, bool readOnly = false) {
return CNTK::Value::Create<float>(sampleShape, sequences, sequenceStartFlags, device, readOnly);
}
}
cv::Mat LoadResizeAndPad(std::string path, int width, int height, int pad_value = 114) {
cv::Mat img = cv::imread(path);
int target_w = width;
int target_h = height;
if (img.cols > img.rows) {
target_h = img.rows*width / img.cols;
} else {
target_w = img.cols*height / img.rows;
}
cv::resize(img, img, cv::Size(target_w, target_h), 0, 0, cv::INTER_NEAREST);
int top = std::max(0, (height - target_h) / 2);
int left = std::max(0, (width - target_w) / 2);
int bottom = height - top - target_h;
int right = width - left - target_w;
cv::copyMakeBorder(img, img, top, bottom, left, right, cv::BORDER_CONSTANT, cv::Scalar(pad_value, pad_value, pad_value));
return img;
}
//https://github.com/Microsoft/CNTK/blob/release/2.2/Tests/EndToEndTests/EvalClientTests/JavaEvalTest/src/Main.java
//https://github.com/Microsoft/CNTK/blob/master/bindings/common/CNTKValueExtend.i
int main() {
try {
CNTK::DeviceDescriptor device = CNTK::DeviceDescriptor::UseDefaultDevice();
std::wstring model_path = L"../../faster_rcnn_eval_VGG16_e2e_native.model";
CNTK::FunctionPtr model = CNTK::Function::Load(model_path, device);
for (auto&e : model->Arguments()) {
std::wcout << e.AsString() << std::endl;
}
for (auto&e : model->Outputs()) {
std::wcout << e.AsString() << std::endl;
}
CNTK::Variable input_var = model->Arguments()[0];
CNTK::Variable output_var = model->Outputs()[0];
CNTK::NDShape input_shape = input_var.Shape();
int image_width = input_shape.Dimensions()[0];
int image_height = input_shape.Dimensions()[1];
int image_channel = input_shape.Dimensions()[2];
int image_size = input_shape.TotalSize();
cv::Mat img = LoadResizeAndPad("C:/Users/spring/Desktop/FasterRCNN_folder/05/CNTK/PretrainedModels/CNTK-Samples-2-2/Examples/Image/DataSets/MyDataSet/positive/0019.jpg", image_width, image_height);
std::vector<float> float_vec;
for (int c = 0; c < img.channels(); c++) {
for (int h = 0; h < img.rows; h++) {
for (int w = 0; w < img.cols; w++) {
cv::Scalar color = img.at<cv::Vec3b>(h, w);
if (c == 0) {
float_vec.push_back(color[0]);
} else if (c == 1) {
float_vec.push_back(color[1]);
} else {
float_vec.push_back(color[2]);
}
}
}
}
std::vector<std::vector<float>> float_vec2;
float_vec2.push_back(float_vec);
CNTK::ValuePtr input_val = CNTK::CreateDenseFloat(input_shape, float_vec2, device);
std::unordered_map<CNTK::Variable, CNTK::ValuePtr> input_data_map;
input_data_map.insert(std::make_pair(input_var, input_val));
std::unordered_map<CNTK::Variable, CNTK::ValuePtr> output_data_map;
output_data_map.insert(std::make_pair(output_var, nullptr));
model->Evaluate(input_data_map, output_data_map);
std::vector<std::vector<float>> output_buffer;
output_data_map[output_var].get()->CopyVariableValueTo<float>(input_var, output_buffer);
} catch (std::exception &e) {
std::cerr << e.what() << std::endl;
}
return 0;
}
But This code has an error.

Please, help me.
You are only adding 1 input, there are two, the image and a second containing 6 values. I got this from looking at the python scripts in the CNTK example. Also need to prepare the outputs. The code below is what I have and seems to work OK.
//Second Input
std::vector<float> vInput2;
//from the python script
//# dims = pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height
//dims = (self._pad_width, self._pad_height, target_w, target_h, img_width, img_height)
//img_dims - (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
//as an example, the values in comments are for an image of w=752,h=610, padded to the required 850x850
vInput2.push_back(D.m_nPadWidth);//850
vInput2.push_back(D.m_nPadHeight);//850
vInput2.push_back(D.m_nTargetWidth);//850
vInput2.push_back(D.m_nTargetHeight);//689
vInput2.push_back(D.m_nOriginalWidth);//752
vInput2.push_back(D.m_nOriginalHeight);//610
ValuePtr inputValue2 = CNTK::Value::CreateBatch(inputVariables[1].Shape(), vInput2, device);
std::unordered_map<CNTK::Variable, CNTK::ValuePtr> inputs;
inputs[inputVariables[0]] = inputValue1;
inputs[inputVariables[1]] = inputValue2;
//prepare output buffers
NDShape outputShape1 = outputVariables[0].Shape();
NDShape outputShape2 = outputVariables[1].Shape();
NDShape outputShape3 = outputVariables[2].Shape();
CNTK::ValuePtr outputValue1;
CNTK::ValuePtr outputValue2;
CNTK::ValuePtr outputValue3;
std::unordered_map<CNTK::Variable, CNTK::ValuePtr> outputs = { { outputVariables[0], outputValue1 },{ outputVariables[1], outputValue2 },{ outputVariables[2], outputValue3 } };
//Evaluate the network
try
{
modelFuncPtr->Evaluate(inputs, outputs, device);
}
catch (...)
{
ATLASSERT(0);
return false;
}
to add to my post above, once I call evaluate, I extract the output like so:
std::vector< std::vector<float> > vClsPred;
std::vector< std::vector<float> > vROIS;
std::vector< std::vector<float> > vbboxRegr;
outputs[outputVariables[0]]->CopyVariableValueTo(outputVariables[0], vClsPred);//cls_pred - the class probabilities for each ROI
outputs[outputVariables[1]]->CopyVariableValueTo(outputVariables[1], vROIS);//rpn_rois - the absolute pixel coordinates of the candidate rois
outputs[outputVariables[2]]->CopyVariableValueTo(outputVariables[2], vbboxRegr);//bbox_regr - the regression coefficients per class for each ROI
It works fine. Thank you...
You are my god.
Thanks thanks TT
After extracting the output vectors above, there is an additional step needed to get the actual rectangles on the image. You can see this in the python scripts in the CNTK example. I found C++ code that does this here and modified it to get the code below. Note, that the 'BBox' and 'Point4f' classes can be found in that project also. The BBox class has a 'id' member that is the class category, a 'confidence' parameter and the 4 values for x1, y1, x2, and y2 for the actual rectangle. I don't really understand exactly what it is doing, but the code combines the 3 outputs vectors to provide the single output we are after. It seems to work alright :-).
long nSize = vbboxRegr[0].size() / cls_num / 4;
box_num = 300;
//not sure about this
if (nSize < box_num)
{
box_num = nSize;
}
ProcessResults(850, 850, cls_num, box_num, vClsPred, vROIS, vbboxRegr, results);
//iterate through the actual rectangles below
for (std::vector<BBox<float> >::iterator i = results.begin(); i != results.end(); ++i)
{
i->id;//category
i->confidence;
i->Point[0];//left
i->Point[1];//top
i->Point[2];//right
i->Point[3];//bottom
}
void CFasterRCNNTest::ProcessResults(long width, long height, long cls_num, long box_num,
const std::vector< std::vector<float> >& vClsPred,
const std::vector< std::vector<float> >& vROIS,
const std::vector< std::vector<float> >& vbboxRegr, std::vector<BBox<float> >& results)
{
for (int cls = 1; cls < cls_num; cls++) {//start at 1 to avoid background class
std::vector<BBox<float> > bbox;
for (int i = 0; i < box_num; i++) {
float score = vClsPred[0][i * cls_num + cls];
Point4f<float> roi(vROIS[0][(i * 4) + 0],
vROIS[0][(i * 4) + 1],
vROIS[0][(i * 4) + 2],
vROIS[0][(i * 4) + 3]);
Point4f<float> delta(vbboxRegr[0][(i * cls_num + cls) * 4 + 0],
vbboxRegr[0][(i * cls_num + cls) * 4 + 1],
vbboxRegr[0][(i * cls_num + cls) * 4 + 2],
vbboxRegr[0][(i * cls_num + cls) * 4 + 3]);
Point4f<float> box = bbox_transform_inv(roi, delta);
box[0] = std::max(0.0f, box[0]);
box[1] = std::max(0.0f, box[1]);
box[2] = std::min(width - 1.f, box[2]);
box[3] = std::min(height - 1.f, box[3]);
bbox.push_back(BBox<float>(box, score, cls));
}
sort(bbox.begin(), bbox.end());
std::vector<bool> select(box_num, true);
float test_score_thresh = 0.4;
float test_nms = 0.4;
for (int i = 0; i < box_num; i++)
{
if (select[i])
{
if (bbox[i].confidence < test_score_thresh) break;
for (int j = i + 1; j < box_num; j++)
{
if (select[j])
{
if (get_iou(bbox[i], bbox[j]) > test_nms) {
select[j] = false;
}
}
}
results.push_back(bbox[i]);
}
}
}
}
Most helpful comment
to add to my post above, once I call evaluate, I extract the output like so: