I'll answer this myself. Any improve advice and suggestions will be appreciated.
First,since onnxruntime can load onnx from buffer,go load onnx file and change dim param.
std::ifstream ifs(model_path, std::ios_base::binary);
std::string content((std::istreambuf_iterator<char>(ifs)), (std::istreambuf_iterator<char>()));
onnx::ModelProto pb;
pb.ParseFromString(content);
pb.mutable_graph()->mutable_input()->at(0).mutable_type()->mutable_tensor_type()->mutable_shape()->mutable_dim()->at(0).set_dim_param("None");
size_t size = pb.ByteSizeLong();
void* buffer = malloc(size);
pb.SerializeToArray(buffer, size);
session_ = std::make_unique<Ort::Session>(*env, buffer, size, session_options_);
free(buffer);
And one batch predcit example.
// [beginI,endI)
template<int YN, int XN>
void RefreshPredict(std::vector<std::array<float, YN>>& predict, const std::vector<std::array<float, XN>>& ins, int beginI, int endI) const {
std::vector<std::array<float, XN>>& nonConstIns = const_cast<std::vector<std::array<float, XN>>&>(ins);
auto what_dims = input_node_dims;
what_dims[0] = endI - beginI;
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, nonConstIns[beginI].data(), XN * what_dims[0], what_dims.data(), what_dims.size());
assert(input_tensor.IsTensor());
std::vector<const char*> output_node_names;
output_node_names.resize(1);
output_node_names[0] = output_name.c_str();
auto output_tensors = session_->Run(Ort::RunOptions{ nullptr }, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
float* floatarr = output_tensors.front().GetTensorMutableData<float>();
float* pret = predict[beginI].data();
for (int i = 0; i < YN * what_dims[0]; i++) {
pret[i] = floatarr[i];
}
}
Hello - is this a duplicate of https://github.com/microsoft/onnxruntime/issues/2725 (and the referenced issues within the issue thread) ?
Yes @hariharans29 .I'll close it a few hours later. Thanks help @hariharans29.
Thanks for confirming. I ll close it for you.
And one batch predcit example.
// [beginI,endI) template<int YN, int XN> void RefreshPredict(std::vector<std::array<float, YN>>& predict, const std::vector<std::array<float, XN>>& ins, int beginI, int endI) const { std::vector<std::array<float, XN>>& nonConstIns = const_cast<std::vector<std::array<float, XN>>&>(ins); auto what_dims = input_node_dims; what_dims[0] = endI - beginI; auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, nonConstIns[beginI].data(), XN * what_dims[0], what_dims.data(), what_dims.size()); assert(input_tensor.IsTensor()); std::vector<const char*> output_node_names; output_node_names.resize(1); output_node_names[0] = output_name.c_str(); auto output_tensors = session_->Run(Ort::RunOptions{ nullptr }, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1); float* floatarr = output_tensors.front().GetTensorMutableData<float>(); float* pret = predict[beginI].data(); for (int i = 0; i < YN * what_dims[0]; i++) { pret[i] = floatarr[i]; } }
Hey @bitnick10 this looks rad, do you know if this same workflow would work equivalently with the python API?
Thanks @zeryx. This predict funtion well tested in my project, and runs as fast as keras predict only in single thread. You can modify this code and write benchmark code as your wanted.
Most helpful comment
And one batch predcit example.