custom plugin parameter incorrect when parameter number more than 10.
customPlugin = plugin_creator.create_plugin("customPluginA",
trt.PluginFieldCollection([
trt.PluginField("img_size",
np.array([768], dtype=np.float32),
trt.PluginFieldType.FLOAT32),
trt.PluginField("variances",
np.ascontiguousarray(
[0.1, 0.1, 0.05, 0.2, 0.2, 0.2],
dtype=np.float32),
trt.PluginFieldType.FLOAT32),
trt.PluginField("size_threshold",
np.array([15000], dtype=np.float32),
trt.PluginFieldType.FLOAT32),
trt.PluginField("out_size",
np.array([48], dtype=np.float32),
trt.PluginFieldType.FLOAT32),
trt.PluginField("threshold",
np.array([0.01], dtype=np.float32),
trt.PluginFieldType.FLOAT32),
trt.PluginField("high_size_count",
np.array([3], dtype=np.int32),
trt.PluginFieldType.INT32),
trt.PluginField("low_size_count",
np.array([8], dtype=np.int32),
trt.PluginFieldType.INT32),
trt.PluginField("nms_topk",
np.array([-1], dtype=np.int32),
trt.PluginFieldType.INT32),
trt.PluginField("clip", np.array([1], dtype=np.int8),
trt.PluginFieldType.INT8),
trt.PluginField("force_suppress",
np.array([1], dtype=np.int8),
trt.PluginFieldType.INT8),
trt.PluginField("nms_threshold",
np.array([0.00001], dtype=np.float32),
trt.PluginFieldType.FLOAT32),
trt.PluginField("center_bias",
np.array([0.4], dtype=np.float32),
trt.PluginFieldType.FLOAT32)
]))
out = network.add_plugin_v2([data, cls_prob, loc_pred, anchors], customPlugin)
img_size: 6.01021e-37
variances: 0.1 0.1 0.05 0.2 0.2 0.2
size_threshold: 6.00981e-37
out_size: 48
threshold: 0.01
high_size_cout: 3
low_size_count: 8
nms_topk: -1
clip: 1
force_suppress: 1
nms_threshold: 1e-05
center_bias: 0.4
the first and third parameters value are incorrect, and if I add one more the fourth parameter value will be incorrect as well.
However if the parameter number less than 10, all parameters will correct.
Does anyone have a clue?
TensorRT Version: TensorRT-6.0.1.5-cuda10.0
GPU Type: GeForce GTX 1080
Nvidia Driver Version: 430.26
CUDA Version: cuda10.0
CUDNN Version: 7.6
Operating System + Version: Ubuntu 18.04
Python Version (if applicable): python 3.6
TensorFlow Version (if applicable):
PyTorch Version (if applicable):
Baremetal or Container (if container which image + tag):
Hi @Mengman,
Can you provide a minimal, but complete example that I can copy+paste+run to reproduce this?
@rmccorm4
here is my code
customPlugin.h
#ifndef CUSTOM_PLUGIN_H
#define CUSTOM_PLUGIN_H
#include "NvInferPlugin.h"
#include <vector>
#include <string.h>
#include <iostream>
using namespace nvinfer1;
using namespace std;
class CustomPlugin : public IPluginV2
{
public:
CustomPlugin(const string name);
CustomPlugin() = delete;
int getNbOutputs() const override;
Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) override;
int initialize() override;
void terminate() override;
size_t getWorkspaceSize(int batchSize) const override;
int enqueue(int batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override;
size_t getSerializationSize() const override;
void serialize(void *buffer) const override;
void configureWithFormat(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) override;
bool supportsFormat(DataType type, PluginFormat format) const override;
const char *getPluginType() const override;
const char *getPluginVersion() const override;
void destroy() override;
IPluginV2 *clone() const override;
void setPluginNamespace(const char *pluginNamespace) override;
const char *getPluginNamespace() const override;
private:
const string mLayerName;
string mNamespace;
};
class CustomPluginCreator : public IPluginCreator
{
public:
CustomPluginCreator();
const char *getPluginName() const override;
const char *getPluginVersion() const override;
const PluginFieldCollection *getFieldNames() override;
IPluginV2 *createPlugin(const char *name, const PluginFieldCollection *fc) override;
IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) override;
void setPluginNamespace(const char *pluginNamespace) override;
const char *getPluginNamespace() const override;
private:
static PluginFieldCollection mFC;
static vector<PluginField> mPluginAttributes;
string mNamespace;
};
#endif
customPlugin.cpp
#include "NvInfer.h"
#include "customPlugin.h"
#include <vector>
#include <string.h>
#include <iostream>
using namespace nvinfer1;
using namespace std;
namespace
{
static const char *CUSTOM_PLUGIN_NAME{"CustomPlugin"};
static const char *CUSTOM_PLUGIN_VERSION{"1"};
}
PluginFieldCollection CustomPluginCreator::mFC{};
vector<PluginField> CustomPluginCreator::mPluginAttributes;
CustomPlugin::CustomPlugin(const string name) :mLayerName(name) {}
int CustomPlugin::getNbOutputs() const
{
return 1;
}
Dims CustomPlugin::getOutputDimensions(int index, const Dims *inputs, int nbInputDims) {
Dims output;
return output;
}
int CustomPlugin::initialize()
{
return 0;
}
void CustomPlugin::terminate() {}
int CustomPlugin::enqueue(int batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream)
{
return -1;
}
size_t CustomPlugin::getWorkspaceSize(int batchSize) const {
return 0;
}
void CustomPlugin::configureWithFormat(const Dims *inputs, int nbInputs, const Dims *outputs, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) {}
bool CustomPlugin::supportsFormat(DataType type, PluginFormat format) const
{
return true;
}
size_t CustomPlugin::getSerializationSize() const
{
return sizeof(mLayerName);
}
void CustomPlugin::serialize(void *buffer) const {}
void CustomPlugin::destroy() { delete this; }
void CustomPlugin::setPluginNamespace(const char *libNamespace)
{
mNamespace = libNamespace;
}
const char *CustomPlugin::getPluginNamespace() const
{
return mNamespace.c_str();
}
IPluginV2 *CustomPlugin::clone() const
{
return new CustomPlugin(mLayerName);
}
const char *CustomPlugin::getPluginType() const
{
return CUSTOM_PLUGIN_NAME;
}
const char *CustomPlugin::getPluginVersion() const
{
return CUSTOM_PLUGIN_VERSION;
}
CustomPluginCreator::CustomPluginCreator()
{
mPluginAttributes.emplace_back(PluginField("var1", nullptr, PluginFieldType::kINT32, 1));
mPluginAttributes.emplace_back(PluginField("var2", nullptr, PluginFieldType::kINT32, 1));
mPluginAttributes.emplace_back(PluginField("var3", nullptr, PluginFieldType::kINT32, 1));
mPluginAttributes.emplace_back(PluginField("var4", nullptr, PluginFieldType::kINT32, 1));
mPluginAttributes.emplace_back(PluginField("var5", nullptr, PluginFieldType::kINT32, 1));
mPluginAttributes.emplace_back(PluginField("var6", nullptr, PluginFieldType::kINT32, 1));
mPluginAttributes.emplace_back(PluginField("var7", nullptr, PluginFieldType::kINT32, 1));
mPluginAttributes.emplace_back(PluginField("var8", nullptr, PluginFieldType::kINT32, 1));
mPluginAttributes.emplace_back(PluginField("var9", nullptr, PluginFieldType::kINT32, 1));
mPluginAttributes.emplace_back(PluginField("var10", nullptr, PluginFieldType::kINT32, 1));
mPluginAttributes.emplace_back(PluginField("var11", nullptr, PluginFieldType::kINT32, 1));
// Fill PluginFieldCollection with PluginField arguments metadata
mFC.nbFields = mPluginAttributes.size();
cout << "Plugin attribute number: " << mPluginAttributes.size() << endl;
mFC.fields = mPluginAttributes.data();
}
const char *CustomPluginCreator::getPluginName() const
{
return CUSTOM_PLUGIN_NAME;
}
const char *CustomPluginCreator::getPluginVersion() const
{
return CUSTOM_PLUGIN_VERSION;
}
const PluginFieldCollection *CustomPluginCreator::getFieldNames()
{
return &mFC;
}
IPluginV2 *CustomPluginCreator::createPlugin(const char *name, const PluginFieldCollection *fc)
{
const PluginField *fields = fc->fields;
for (int i = 0; i < fc->nbFields; i++)
{
const char *name = fields[i].name;
auto data = fields[i].data;
cout << name << ": " << static_cast<int>(*(static_cast<const int *>(data))) << "\n";
}
return new CustomPlugin(name);
}
IPluginV2 *CustomPluginCreator::deserializePlugin(const char *name, const void *serialData, size_t serialLength)
{
return new CustomPlugin(name);
}
void CustomPluginCreator::setPluginNamespace(const char *pluginNamespace)
{
mNamespace = pluginNamespace;
}
const char *CustomPluginCreator::getPluginNamespace() const
{
return mNamespace.c_str();
}
REGISTER_TENSORRT_PLUGIN(CustomPluginCreator);
custom_plugin_test.py
import ctypes
import numpy as np
import tensorrt as trt
logger = trt.Logger(trt.Logger.WARNING)
ctypes.cdll.LoadLibrary('./plugin/CustomPlugin.so')
def get_plugin_creator(plugin_name):
trt.init_libnvinfer_plugins(logger, '')
plugin_creator_list = trt.get_plugin_registry().plugin_creator_list
creator = None
for c in plugin_creator_list:
if c.name == plugin_name:
creator = c
return creator
if __name__ == '__main__':
plugin_creator = get_plugin_creator('CustomPlugin')
if plugin_creator is None:
print("Plugin CustomPlugin not found.")
exit()
builder = trt.Builder(logger)
network = builder.create_network()
builder.max_batch_size = 2
builder.max_workspace_size = 1 << 28
data_shape = (3, 64, 64)
data = network.add_input("data", trt.DataType.FLOAT, data_shape)
customPlugin = plugin_creator.create_plugin("CustomPlugin", trt.PluginFieldCollection([
trt.PluginField("var1", np.array([1], dtype=np.int32), trt.PluginFieldType.INT32),
trt.PluginField("var2", np.array([2], dtype=np.int32), trt.PluginFieldType.INT32),
trt.PluginField("var3", np.array([3], dtype=np.int32), trt.PluginFieldType.INT32),
trt.PluginField("var4", np.array([4], dtype=np.int32), trt.PluginFieldType.INT32),
trt.PluginField("var5", np.array([5], dtype=np.int32), trt.PluginFieldType.INT32),
trt.PluginField("var6", np.array([6], dtype=np.int32), trt.PluginFieldType.INT32),
trt.PluginField("var7", np.array([7], dtype=np.int32), trt.PluginFieldType.INT32),
trt.PluginField("var8", np.array([8], dtype=np.int32), trt.PluginFieldType.INT32),
trt.PluginField("var9", np.array([9], dtype=np.int32), trt.PluginFieldType.INT32),
trt.PluginField("var10", np.array([10], dtype=np.int32), trt.PluginFieldType.INT32)
]))
out = network.add_plugin_v2([data], customPlugin)
network.mark_output(out.get_output(0))
engine = builder.build_cuda_engine(network)
context = engine.create_execution_context()
makefile
CUDA_PATH = /usr/local/cuda-10.0
GCC = g++
NVCC = $(CUDA_PATH)/bin/nvcc
CCFLAGS = -g -std=c++11 -DNDEBUG
INCLUDES += -I. -I$(CUDA_PATH)/include -I/usr/local/TensorRT-6.0.1.5-cuda10.0/include
LDFLAGS := -L/usr/local/cuda-10.0/lib64 -L/usr/local/TensorRT-6.0.1.5-cuda10.0/lib $(LDFLAGS)
LDFLAGS += -lnvinfer -lcudart
SO = CustomPlugin.so
OBJ = $(shell find . -name '*.o')
D = $(shell find . -name '*.d')
DEP = $(OBJ:.o=.d)
all: $(SO) $(CUDA_BIN)
CustomPlugin.so: customPlugin.o
-include $(DEP)
clean:
rm -rf $(SO) $(CUDA_BIN) $(OBJ) $(D)
%.o: %.cpp
$(GCC) $(CCFLAGS) -fPIC -MD -MP $(INCLUDES) -o $@ -c $<
%.o: %.cu
$(NVCC) $(CCFLAGS) -M -MT $@ $(INCLUDES) -o $(@:.o=.d) $<
$(NVCC) $(CCFLAGS) $(INCLUDES) -Xcompiler -fPIC -arch=sm_30 -o $@ -c $<
$(SO):
$(GCC) $(CCFLAGS) -shared -o $@ $+ $(LDFLAGS)
# output
var1: 35277840
var2: 35295520
var3: 35294608
var4: 4
var5: 5
var6: 6
var7: 7
var8: 8
var9: 9
var10: 10
Hi @Mengman,
Does this issue still occur in TensorRT 7?
@rmccorm4
I have a same issue with TRT7 too.
My custom layer is modified version of embLayerNormPlugin and has 10 inputs, 2 outputs, and 7 more embedding matrices.
// embLayerNormPlugin.cu => create_plugin function
if (field_name.compare("bert_embeddings_layernorm_gamma") == 0)
{
gLogVerbose << "Building bert_embeddings_layernorm_gamma...\n";
gamma.values = fc->fields[i].data;
gamma.count = fc->fields[i].length;
gamma.type = fieldTypeToDataType(fc->fields[i].type);
float* host_mem = new float[gamma.count *sizeof(float)];
memcpy(host_mem, fc->fields[i].data, fc->fields[i].length * sizeof(float) );
float debug = 0;
for (int i = 0; i < fc->fields[i].length; i++) {
debug += host_mem[i];
}
std::cout << "gamma length " << fc->fields[i].length << " " << debug << std::endl;
}
word -6565.2744
pos -490.8802
token -44.405117
pos_x -223.92572
pos xtl -223.92572
pos xbr -223.92572
pos_y -83.62639
pos y tl -83.62639
pos y br -83.62639
line -821.17566
gamma 5.6698227
beta 14.203701
beta length 768 -3.74458
gamma length 768 2.63594
word length 91812096 0.82379
pos length 393216 1.87292
tok length 1536 0.831615
pos x length 23040 -5.90863
pos xtl length 23040 -5.90863
pos xbr length 23040 -5.90863
pos y length 23040 5.03566
pos y tl length 23040 5.03566
pos y br length 23040 5.03566
line emb length 393216 -3.0207
@Mengman Did you solve your problem? If so, can you share some tips please ..?
I've been suffered with this a week.
@dhkim0225 No, I didn't solve it.
@rmccorm4 I update to TensorRT-7.0.0.11-cuda10.0 this problem still exists
@Mengman @dhkim0225 looking into it now
Here's an interesting observation...
Initializing the PluginFieldCollection all in one step like in your example,
plugin_field_collection = trt.PluginFieldCollection(
[trt.PluginField("var{}".format(i), np.array([i], dtype=np.int32), trt.PluginFieldType.INT32) for i in range(1, 12)]
)
customPlugin = plugin_creator.create_plugin("CustomPlugin", plugin_field_collection)
I see the bad behavior:
Plugin attribute number: 11
var1: 238160944
var2: 28542944
var3: 25671968
var4: 25672304
var5: 5
var6: 6
var7: 7
var8: 8
var9: 9
var10: 10
var11: 11
However, if you first create the list of plugin_fields, and then separately create a PluginFieldCollection from this list:
plugin_fields = [trt.PluginField("var{}".format(i), np.array([i], dtype=np.int32), trt.PluginFieldType.INT32) for i in range(1, 12)]
plugin_field_collection = trt.PluginFieldCollection(plugin_fields)
customPlugin = plugin_creator.create_plugin("CustomPlugin", plugin_field_collection)
The output seems fine:
Plugin attribute number: 11
var1: 1
var2: 2
var3: 3
var4: 4
var5: 5
var6: 6
var7: 7
var8: 8
var9: 9
var10: 10
var11: 11
Hm, but if I separate that logic out to functions, I still get some issues:
# custom_plugin_test.py
import ctypes
import numpy as np
import tensorrt as trt
logger = trt.Logger(trt.Logger.WARNING)
ctypes.cdll.LoadLibrary('./CustomPlugin.so')
def get_plugin_creator(plugin_name):
trt.init_libnvinfer_plugins(logger, '')
plugin_creator_list = trt.get_plugin_registry().plugin_creator_list
creator = None
for c in plugin_creator_list:
if c.name == plugin_name:
creator = c
return creator
def create_pfc_one_step():
plugin_field_collection = trt.PluginFieldCollection([trt.PluginField("var{}".format(i), np.array([i], dtype=np.int32), trt.PluginFieldType.INT32) for i in range(1, 12)])
return plugin_field_collection
def create_pfc_two_steps():
plugin_fields = [trt.PluginField("var{}".format(i), np.array([i], dtype=np.int32), trt.PluginFieldType.INT32) for i in range(1, 12)]
plugin_field_collection = trt.PluginFieldCollection(plugin_fields)
return plugin_field_collection
if __name__ == '__main__':
plugin_creator = get_plugin_creator('CustomPlugin')
if plugin_creator is None:
print("Plugin CustomPlugin not found.")
exit()
builder = trt.Builder(logger)
network = builder.create_network()
builder.max_batch_size = 2
builder.max_workspace_size = 1 << 28
data_shape = (3, 64, 64)
data = network.add_input("data", trt.DataType.FLOAT, data_shape)
plugin_field_collection1_func = create_pfc_one_step()
plugin_field_collection2_func = create_pfc_two_steps()
plugin_field_collection1_inline = trt.PluginFieldCollection([trt.PluginField("var{}".format(i), np.array([i], dtype=np.int32), trt.PluginFieldType.INT32) for i in range(1, 12)])
plugin_fields = [trt.PluginField("var{}".format(i), np.array([i], dtype=np.int32), trt.PluginFieldType.INT32) for i in range(1, 12)]
plugin_field_collection2_inline = trt.PluginFieldCollection(plugin_fields)
print("-"*30)
print("1 step func")
customPlugin = plugin_creator.create_plugin("CustomPlugin", plugin_field_collection1_func)
print("-"*30)
print("2 step func")
customPlugin = plugin_creator.create_plugin("CustomPlugin", plugin_field_collection2_func)
print("-"*30)
print("1 step in-line")
customPlugin = plugin_creator.create_plugin("CustomPlugin", plugin_field_collection1_inline)
print("-"*30)
print("2 step in-line")
customPlugin = plugin_creator.create_plugin("CustomPlugin", plugin_field_collection2_inline)
Only the 2-step in-line method had correct values:
root@c93e539a3cc0:/mnt# python custom_plugin_test.py
Plugin attribute number: 11
------------------------------
1 step func
nbFields: 11
var1: 1
var2: 1
var3: 36844752
var4: 1
var5: 36904016
var6: 36896873
var7: 9
var8: 8
var9: -1202920192
var10: 29187680
var11: 36904320
------------------------------
2 step func
nbFields: 11
var1: 36904016
var2: 36896873
var3: 9
var4: 8
var5: -1202920192
var6: 29187680
var7: 36904320
var8: 246481001
var9: 1
var10: 2
var11: 3
------------------------------
1 step in-line
nbFields: 11
var1: -1202920192
var2: 29187680
var3: 36904320
var4: 246481001
var5: 1
var6: 2
var7: 3
var8: 4
var9: 5
var10: 6
var11: 7
------------------------------
2 step in-line
nbFields: 11
var1: 1
var2: 2
var3: 3
var4: 4
var5: 5
var6: 6
var7: 7
var8: 8
var9: 9
var10: 10
var11: 11
Not sure how to explain this, seems like it might be an issue with pybind or something.
@dhkim0225 @Mengman do you see this behavior using the C++ API?
@rmccorm4 OMG .. Thanks soooooooo much again !!!!
I didn't test with C++ api.
Glad it helped! If you get a chance to try the C++ API as well, that would be very helpful, but no worries if you can't 🙂
@rmccorm4
Sadly, this doesn't work for my case :(
Values are still strange.
plugin_fields = [
trt.PluginField(name="bert_embeddings_layernorm_beta",
data=_flatten4plugin_field(w_dict[prefix + 'LayerNorm.beta']),
type=trt.PluginFieldType.FLOAT32),
trt.PluginField(name="bert_embeddings_layernorm_gamma",
data=_flatten4plugin_field(w_dict[prefix+'LayerNorm.gamma']),
type=trt.PluginFieldType.FLOAT32),
trt.PluginField(name="bert_embeddings_word_embeddings",
data=_flatten4plugin_field(w_dict[prefix+'word_embeddings.weight']),
type=trt.PluginFieldType.FLOAT32),
trt.PluginField(name="bert_embeddings_token_type_embeddings",
data=_flatten4plugin_field(w_dict[prefix+'token_type_embeddings.weight']),
type=trt.PluginFieldType.FLOAT32),
trt.PluginField(name="bert_embeddings_position_embeddings",
data=_flatten4plugin_field(w_dict[prefix+'position_embeddings.weight']),
type=trt.PluginFieldType.FLOAT32),
trt.PluginField(name="bert_embeddings_position_x_embeddings",
data=_flatten4plugin_field(w_dict[prefix+'pos_x_embeddings.weight']),
type=trt.PluginFieldType.FLOAT32),
trt.PluginField(name="bert_embeddings_position_x_tl_embeddings",
data=_flatten4plugin_field(w_dict[prefix+'pos_x_embeddings.weight']),
type=trt.PluginFieldType.FLOAT32),
trt.PluginField(name="bert_embeddings_position_x_br_embeddings",
data=_flatten4plugin_field(w_dict[prefix+'pos_x_embeddings.weight']),
type=trt.PluginFieldType.FLOAT32),
trt.PluginField(name="bert_embeddings_position_y_embeddings",
data=_flatten4plugin_field(w_dict[prefix+'pos_y_embeddings.weight']),
type=trt.PluginFieldType.FLOAT32),
trt.PluginField(name="bert_embeddings_position_y_tl_embeddings",
data=_flatten4plugin_field(w_dict[prefix+'pos_y_embeddings.weight']),
type=trt.PluginFieldType.FLOAT32),
trt.PluginField(name="bert_embeddings_position_y_br_embeddings",
data=_flatten4plugin_field(w_dict[prefix+'pos_y_embeddings.weight']),
type=trt.PluginFieldType.FLOAT32),
trt.PluginField(name="bert_embeddings_line_embeddings",
data=_flatten4plugin_field(w_dict[prefix+'line_embeddings.weight']),
type=trt.PluginFieldType.FLOAT32),
trt.PluginField(name='output_fp16',
data=use_fp16,
type=trt.PluginFieldType.INT32)]
pfc = trt.PluginFieldCollection(plugin_fields)
I printed out pluginfield's data attribute, and found something strange things (maybe...?)
for pf in plugin_fields:
print(pf.data)
All of plugin field's address are same.
<capsule object NULL at 0x7f9b51d7efc0>
<capsule object NULL at 0x7f9b51d7efc0>
<capsule object NULL at 0x7f9b51d7efc0>
<capsule object NULL at 0x7f9b51d7efc0>
<capsule object NULL at 0x7f9b51d7efc0>
<capsule object NULL at 0x7f9b51d7efc0>
<capsule object NULL at 0x7f9b51d7efc0>
<capsule object NULL at 0x7f9b51d7efc0>
<capsule object NULL at 0x7f9b51d7efc0>
<capsule object NULL at 0x7f9b51d7efc0>
<capsule object NULL at 0x7f9b51d7efc0>
<capsule object NULL at 0x7f9b51d7efc0>
numpy version (1.14.0 ~ 1.18.1) didn't work.
beta length 768 -3.74458
gamma length 768 2.63594
word length 91812096 0.82379
tok length 1536 0.831615
pos length 393216 1.87292
pos x length 23040 -5.90863
pos xtl length 23040 -5.90863
pos xbr length 23040 -5.90863
pos y length 23040 5.03566
pos y tl length 23040 5.03566
pos y br length 23040 5.03566
line emb length 393216 -3.0207
After I changed number of arguments 13 to 9 (remove xtl, xbr, ytl, ybr), this issue still occurs...
Yeah, in the original issue I believe the issue occurs at 7/8 args. Will need to look into underlying issue there.
You can verify that in my script above by changing the 12 in range(1, 12) to different numbers.
@rmccorm4 Sorry..
It was due to my misake. Everything works fine with 2-step inline.
Thanks a lot!
What was your mistake? Just curious because it looked fine in your previous comment at a glance.
I'm really shame...
I just reuse a "variable i" in inner for loop when I debug.
for (int i = 0; i < fc->nbFields; i++)
{
if (field_name.compare("bert_embeddings_layernorm_gamma") == 0)
{
gLogVerbose << "Building bert_embeddings_layernorm_gamma...\n";
gamma.values = fc->fields[i].data;
gamma.count = fc->fields[i].length;
gamma.type = fieldTypeToDataType(fc->fields[i].type);
float* host_mem = new float[gamma.count *sizeof(float)];
memcpy(host_mem, fc->fields[i].data, fc->fields[i].length * sizeof(float) );
float debug = 0;
for (int i = 0; i < fc->fields[i].length; i++) { // SUCH A STUPID CODE :(
debug += host_mem[i]; //
}
std::cout << "gamma length " << fc->fields[i].length << " " << debug << std::endl;
}
}
Ah, and I also confirmed about this issue after fix my debug code.
Only 2 step inline works fine.
Everyone makes those mistakes once in a while, thanks for sharing 🙂
Closing based on final comment from @dhkim0225
Most helpful comment
Hm, but if I separate that logic out to functions, I still get some issues:
Only the 2-step in-line method had correct values: