pointnet推理部署--onnxruntime框架
onnxruntime
microsoft/onnxruntime: 是一个用于运行各种机器学习模型的开源库。适合对机器学习和深度学习有兴趣的人,特别是在开发和部署机器学习模型时需要处理各种不同框架和算子的人。特点是支持多种机器学习框架和算子,包括 TensorFlow、PyTorch、Caffe 等,具有高性能和广泛的兼容性。
项目地址:https://gitcode.com/gh_mirrors/on/onnxruntime
免费下载资源
·
classification
以分10类,gpu版本为例。
先将pytorch训练出的pth权重文件转为onnx文件:
import torch
import pointnet_cls
point_num = 1024
class_num = 10
normal_channel = False
model = pointnet_cls.get_model(class_num, normal_channel)
model = model.cuda() #cpu版本需注释此句
model.eval()
checkpoint = torch.load('./cls.pth')
model.load_state_dict(checkpoint['model_state_dict'])
x = (torch.rand(1, 6, point_num) if normal_channel else torch.rand(1, 3, point_num))
x = x.cuda() #cpu版本需注释此句
export_onnx_file = "./cls.onnx"
torch.onnx.export(model,
x,
export_onnx_file,
opset_version = 11)
python推理:
import numpy as np
import onnxruntime
point_num = 1024
def pc_normalize(pc):
centroid = np.mean(pc, axis=0)
pc = pc - centroid
m = np.max(np.sqrt(np.sum(pc**2, axis=1)))
pc = pc / m
return pc
if __name__ == '__main__':
file = './bed_0610.txt'
data = np.loadtxt(file, delimiter=',').astype(np.float32)
point_set = data[:, 0:3]
point_set = point_set[0:point_num, :]
point_set[:, 0:3] = pc_normalize(point_set[:, 0:3])
points = np.reshape(point_set, ((1, point_num, 3)))
points = points.swapaxes(2, 1)
onnx_session = onnxruntime.InferenceSession("cls.onnx", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
input_name = []
for node in onnx_session.get_inputs():
input_name.append(node.name)
output_name = []
for node in onnx_session.get_outputs():
output_name.append(node.name)
inputs = {}
for name in input_name:
inputs[name] = points
outputs = onnx_session.run(None, inputs)[0]
print(np.argmax(outputs))
C++推理:
#include <iostream>
#include <vector>
#include <fstream>
#include <onnxruntime_cxx_api.h>
const int point_num = 1024;
const int class_num = 10;
void pc_normalize(std::vector<float>& points)
{
float mean_x = 0, mean_y = 0, mean_z = 0;
for (size_t i = 0; i < point_num; ++i)
{
mean_x += points[3 * i];
mean_y += points[3 * i + 1];
mean_z += points[3 * i + 2];
}
mean_x /= point_num;
mean_y /= point_num;
mean_z /= point_num;
for (size_t i = 0; i < point_num; ++i)
{
points[3 * i] -= mean_x;
points[3 * i + 1] -= mean_y;
points[3 * i + 2] -= mean_z;
}
float m = 0;
for (size_t i = 0; i < point_num; ++i)
{
if (sqrt(pow(points[3 * i], 2) + pow(points[3 * i + 1], 2) + pow(points[3 * i + 2], 2)) > m)
m = sqrt(pow(points[3 * i], 2) + pow(points[3 * i + 1], 2) + pow(points[3 * i + 2], 2));
}
for (size_t i = 0; i < point_num; ++i)
{
points[3 * i] /= m;
points[3 * i + 1] /= m;
points[3 * i + 2] /= m;
}
}
void classfier(std::vector<float> & points)
{
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "cls");
Ort::SessionOptions session_options;
session_options.SetIntraOpNumThreads(1);
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
OrtCUDAProviderOptions cuda_option;
cuda_option.device_id = 0;
cuda_option.arena_extend_strategy = 0;
cuda_option.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive;
cuda_option.gpu_mem_limit = SIZE_MAX;
cuda_option.do_copy_in_default_stream = 1;
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
session_options.AppendExecutionProvider_CUDA(cuda_option);
const wchar_t* model_path = L"cls.onnx";
Ort::Session session(env, model_path, session_options);
Ort::AllocatorWithDefaultOptions allocator;
std::vector<const char*> input_node_names;
for (size_t i = 0; i < session.GetInputCount(); i++)
{
input_node_names.push_back(session.GetInputName(i, allocator));
}
std::vector<const char*> output_node_names;
for (size_t i = 0; i < session.GetOutputCount(); i++)
{
output_node_names.push_back(session.GetOutputName(i, allocator));
}
const size_t input_tensor_size = 1 * 3 * point_num ;
std::vector<float> input_tensor_values(input_tensor_size);
for (size_t i = 0; i < 3; i++)
{
for (size_t j = 0; j < point_num; j++)
{
input_tensor_values[point_num * i + j] = points[3 * j + i];
}
}
std::vector<int64_t> input_node_dims = { 1, 3, point_num };
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), input_node_dims.size());
std::vector<Ort::Value> inputs;
inputs.push_back(std::move(input_tensor));
std::vector<Ort::Value> outputs = session.Run(Ort::RunOptions{ nullptr }, input_node_names.data(), inputs.data(), input_node_names.size(), output_node_names.data(), output_node_names.size());
const float* rawOutput = outputs[0].GetTensorData<float>();
std::vector<int64_t> outputShape = outputs[0].GetTensorTypeAndShapeInfo().GetShape();
size_t count = outputs[0].GetTensorTypeAndShapeInfo().GetElementCount();
std::vector<float> pred(rawOutput, rawOutput + count);
int predict_label = std::max_element(pred.begin(), pred.end()) - pred.begin();
std::cout << predict_label << std::endl;
}
int main()
{
std::vector<float> points;
float x, y, z, nx, ny, nz;
char ch;
std::ifstream infile("bed_0610.txt");
for (size_t i = 0; i < point_num; i++)
{
infile >> x >> ch >> y >> ch >> z >> ch >> nx >> ch >> ny >> ch >> nz;
points.push_back(x);
points.push_back(y);
points.push_back(z);
}
infile.close();
pc_normalize(points);
classfier(points);
return 0;
}
part segmentation
以分16类50部分,gpu版本为例。
先将pytorch训练出的pth权重文件转为onnx文件:
import torch
import pointnet_part_seg
point_num = 2048
class_num = 16
part_num = 50
normal_channel = False
def to_categorical(y, class_num):
""" 1-hot encodes a tensor """
new_y = torch.eye(class_num)[y.cpu().data.numpy(),]
if (y.is_cuda):
return new_y.cuda()
return new_y
model = pointnet_part_seg.get_model(part_num, normal_channel)
model = model.cuda() #cpu版本需注释此句
model.eval()
checkpoint = torch.load('./part_seg.pth')
model.load_state_dict(checkpoint['model_state_dict'])
x = (torch.rand(1, 6, point_num) if normal_channel else torch.rand(1, 3, point_num))
x = x.cuda() #cpu版本需注释此句
label = torch.randint(0, 1, (1, 1))
label = label.cuda() #cpu版本需注释此句
export_onnx_file = "./part_seg.onnx"
torch.onnx.export(model,
(x, to_categorical(label, class_num)),
export_onnx_file,
opset_version = 11)
python推理:
import numpy as np
import onnxruntime
point_num = 2048
class_num = 16
def to_categorical(y, class_num):
""" 1-hot encodes a tensor """
new_y = np.eye(class_num)[y,]
return new_y.astype(np.float32)
def pc_normalize(pc):
centroid = np.mean(pc, axis=0)
pc = pc - centroid
m = np.max(np.sqrt(np.sum(pc ** 2, axis=1)))
pc = pc / m
return pc
if __name__ == '__main__':
data = np.loadtxt('85a15c26a6e9921ae008cc4902bfe3cd.txt').astype(np.float32)
point_set = data[:, 0:3]
point_set[:, 0:3] = pc_normalize(point_set[:, 0:3])
choice = np.random.choice(point_set.shape[0], point_num, replace=True)
point_set = point_set[choice, :][:, 0:3]
pts = point_set
points = np.reshape(point_set, ((1, point_num, 3)))
points = points.swapaxes(2, 1)
label = np.array([[0]], dtype=np.int32)
onnx_session = onnxruntime.InferenceSession("part_seg.onnx", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
input_name = []
for node in onnx_session.get_inputs():
input_name.append(node.name)
output_name = []
for node in onnx_session.get_outputs():
output_name.append(node.name)
inputs = {}
inputs[input_name[0]] = points
inputs[input_name[1]] = to_categorical(label, class_num)
outputs = onnx_session.run(None, inputs)[0]
cur_pred_val_logits = outputs
cur_pred_val = np.zeros((1, point_num)).astype(np.int32)
logits = cur_pred_val_logits[0, :, :]
cur_pred_val[0, :] = np.argmax(logits, 1)
pts = np.append(pts.reshape(point_num, 3), cur_pred_val[0, :].reshape(point_num, 1), 1)
np.savetxt('pred.txt', pts, fmt='%.06f')
C++推理:
#include <iostream>
#include <vector>
#include <fstream>
#include <ctime>
#include <onnxruntime_cxx_api.h>
const int point_num = 2048;
const int class_num = 16;
const int parts_num = 50;
void pc_normalize(std::vector<float>& points)
{
float mean_x = 0, mean_y = 0, mean_z = 0;
for (size_t i = 0; i < point_num; ++i)
{
mean_x += points[3 * i];
mean_y += points[3 * i + 1];
mean_z += points[3 * i + 2];
}
mean_x /= point_num;
mean_y /= point_num;
mean_z /= point_num;
for (size_t i = 0; i < point_num; ++i)
{
points[3 * i] -= mean_x;
points[3 * i + 1] -= mean_y;
points[3 * i + 2] -= mean_z;
}
float m = 0;
for (size_t i = 0; i < point_num; ++i)
{
if (sqrt(pow(points[3 * i], 2) + pow(points[3 * i + 1], 2) + pow(points[3 * i + 2], 2)) > m)
m = sqrt(pow(points[3 * i], 2) + pow(points[3 * i + 1], 2) + pow(points[3 * i + 2], 2));
}
for (size_t i = 0; i < point_num; ++i)
{
points[3 * i] /= m;
points[3 * i + 1] /= m;
points[3 * i + 2] /= m;
}
}
void resample(std::vector<float>& points)
{
srand((int)time(0));
std::vector<int> choice(point_num);
for (size_t i = 0; i < point_num; i++)
{
choice[i] = rand() % (points.size() / 3);
}
std::vector<float> temp_points(3 * point_num);
for (size_t i = 0; i < point_num; i++)
{
temp_points[3 * i] = points[3 * choice[i]];
temp_points[3 * i + 1] = points[3 * choice[i] + 1];
temp_points[3 * i + 2] = points[3 * choice[i] + 2];
}
points = temp_points;
}
std::vector<int> classfier(std::vector<float> & points, std::vector<float> & labels)
{
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "part_seg");
Ort::SessionOptions session_options;
session_options.SetIntraOpNumThreads(1);
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
OrtCUDAProviderOptions cuda_option;
cuda_option.device_id = 0;
cuda_option.arena_extend_strategy = 0;
cuda_option.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive;
cuda_option.gpu_mem_limit = SIZE_MAX;
cuda_option.do_copy_in_default_stream = 1;
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
session_options.AppendExecutionProvider_CUDA(cuda_option);
const wchar_t* model_path = L"part_seg.onnx";
Ort::Session session(env, model_path, session_options);
Ort::AllocatorWithDefaultOptions allocator;
std::vector<const char*> input_node_names;
for (size_t i = 0; i < session.GetInputCount(); i++)
{
input_node_names.push_back(session.GetInputName(i, allocator));
}
std::vector<const char*> output_node_names;
for (size_t i = 0; i < session.GetOutputCount(); i++)
{
output_node_names.push_back(session.GetOutputName(i, allocator));
}
const size_t input_tensor_size0 = 1 * 3 * point_num;
std::vector<float> input_tensor_values0(input_tensor_size0);
for (size_t i = 0; i < 3; i++)
{
for (size_t j = 0; j < point_num; j++)
{
input_tensor_values0[point_num * i + j] = points[3 * j + i];
}
}
std::vector<int64_t> input_node_dims0 = { 1, 3, point_num };
auto memory_info0 = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor0 = Ort::Value::CreateTensor<float>(memory_info0, input_tensor_values0.data(), input_tensor_size0, input_node_dims0.data(), input_node_dims0.size());
const size_t input_tensor_size1 = 1 * 1 * class_num;
std::vector<float> input_tensor_values1(input_tensor_size0);
for (size_t i = 0; i < class_num; i++)
{
input_tensor_values1[i] = labels[i];
}
std::vector<int64_t> input_node_dims1 = { 1, 1, class_num };
auto memory_info1 = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor1 = Ort::Value::CreateTensor<float>(memory_info1, input_tensor_values1.data(), input_tensor_size1, input_node_dims1.data(), input_node_dims1.size());
std::vector<Ort::Value> inputs;
inputs.push_back(std::move(input_tensor0));
inputs.push_back(std::move(input_tensor1));
std::vector<Ort::Value> outputs = session.Run(Ort::RunOptions{ nullptr }, input_node_names.data(), inputs.data(), input_node_names.size(), output_node_names.data(), output_node_names.size());
const float* rawOutput = outputs[0].GetTensorData<float>();
std::vector<int64_t> outputShape = outputs[0].GetTensorTypeAndShapeInfo().GetShape();
size_t count = outputs[0].GetTensorTypeAndShapeInfo().GetElementCount();
std::vector<float> pred(rawOutput, rawOutput + count);
std::vector<std::vector<float>> preds(point_num, std::vector<float>(parts_num, 0));
for (size_t i = 0; i < point_num; i++)
{
for (size_t j = 0; j < parts_num; j++)
{
preds[i][j] = pred[i * parts_num + j];
}
}
std::vector<int> max_index(point_num, 0);
for (size_t i = 0; i < point_num; i++)
{
max_index[i]= std::max_element(preds[i].begin(), preds[i].end()) - preds[i].begin();
}
return max_index;
}
int main()
{
std::vector<float> points, labels;
float x, y, z, nx, ny, nz, label;
std::ifstream infile("85a15c26a6e9921ae008cc4902bfe3cd.txt");
while (infile >> x >> y >> z >> nx >> ny >> nz >> label)
{
points.push_back(x);
points.push_back(y);
points.push_back(z);
}
for (size_t i = 0; i < class_num; i++)
{
labels.push_back(0.0);
}
labels[0] = 1.0;
infile.close();
pc_normalize(points);
resample(points);
std::vector<int> result = classfier(points, labels);
std::fstream outfile("pred.txt", 'w');
for (size_t i = 0; i < point_num; i++)
{
outfile << points[3 * i] << " " << points[3 * i + 1] << " " << points[3 * i + 2] << " " << result[i]<< std::endl;
}
outfile.close();
return 0;
}
sematic segmentation
以分13类,gpu版本为例。
先将pytorch训练出的pth权重文件转为onnx文件:
import torch
import pointnet_sem_seg
point_num = 4096
class_num = 13
model = pointnet_sem_seg.get_model(class_num)
model = model.cuda() #cpu版本需注释此句
model.eval()
checkpoint = torch.load('sem_seg.pth')
model.load_state_dict(checkpoint['model_state_dict'])
x = torch.rand(1, 9, point_num)
x = x.cuda() #cpu版本需注释此句
export_onnx_file = "./sem_seg.onnx"
torch.onnx.export(model,
x,
export_onnx_file,
opset_version = 11)
python推理代码:
import numpy as np
import onnxruntime
point_num = 4096
class_num = 13
stride = 0.5
block_size = 1.0
if __name__ == '__main__':
data = np.load('Area_1_conferenceRoom_1.npy')
points = data[:,:6]
coord_min, coord_max = np.amin(points, axis=0)[:3], np.amax(points, axis=0)[:3]
grid_x = int(np.ceil(float(coord_max[0] - coord_min[0] - block_size) / stride) + 1)
grid_y = int(np.ceil(float(coord_max[1] - coord_min[1] - block_size) / stride) + 1)
data_room, index_room = np.array([]), np.array([])
for index_y in range(0, grid_y):
for index_x in range(0, grid_x):
s_x = coord_min[0] + index_x * stride
e_x = min(s_x + block_size, coord_max[0])
s_x = e_x - block_size
s_y = coord_min[1] + index_y * stride
e_y = min(s_y + block_size, coord_max[1])
s_y = e_y - block_size
point_idxs = np.where((points[:, 0] >= s_x) & (points[:, 0] <= e_x) & (points[:, 1] >= s_y) & (points[:, 1] <= e_y))[0]
if point_idxs.size == 0:
continue
num_batch = int(np.ceil(point_idxs.size / point_num))
point_size = int(num_batch * point_num)
replace = False if (point_size - point_idxs.size <= point_idxs.size) else True
point_idxs_repeat = np.random.choice(point_idxs, point_size - point_idxs.size, replace=replace)
point_idxs = np.concatenate((point_idxs, point_idxs_repeat))
np.random.shuffle(point_idxs)
data_batch = points[point_idxs, :]
normlized_xyz = np.zeros((point_size, 3))
normlized_xyz[:, 0] = data_batch[:, 0] / coord_max[0]
normlized_xyz[:, 1] = data_batch[:, 1] / coord_max[1]
normlized_xyz[:, 2] = data_batch[:, 2] / coord_max[2]
data_batch[:, 0] = data_batch[:, 0] - (s_x + block_size / 2.0)
data_batch[:, 1] = data_batch[:, 1] - (s_y + block_size / 2.0)
data_batch[:, 3:6] /= 255.0
data_batch = np.concatenate((data_batch, normlized_xyz), axis=1)
data_room = np.vstack([data_room, data_batch]) if data_room.size else data_batch
index_room = np.hstack([index_room, point_idxs]) if index_room.size else point_idxs
data_room = data_room.reshape((-1, point_num, data_room.shape[1]))
index_room = index_room.reshape((-1, point_num))
onnx_session = onnxruntime.InferenceSession("sem_seg.onnx", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
input_name = []
for node in onnx_session.get_inputs():
input_name.append(node.name)
output_name = []
for node in onnx_session.get_outputs():
output_name.append(node.name)
vote_label_pool = np.zeros((points.shape[0], class_num))
num_blocks = data_room.shape[0]
batch_data = np.zeros((1, point_num, 9))
batch_point_index = np.zeros((1, point_num))
for sbatch in range(num_blocks):
start_idx = sbatch
end_idx = min(sbatch + 1, num_blocks)
real_batch_size = end_idx - start_idx
batch_data[0:real_batch_size, ...] = data_room[start_idx:end_idx, ...]
batch_point_index[0:real_batch_size, ...] = index_room[start_idx:end_idx, ...]
inputs = {}
for name in input_name:
inputs[name] = batch_data.swapaxes(2, 1).astype(np.float32)
outputs = onnx_session.run(None, inputs)[0]
batch_pred_label = np.argmax(outputs, 2)
point_idx = batch_point_index[0:real_batch_size, ...]
pred_label = batch_pred_label[0:real_batch_size, ...]
for b in range(pred_label.shape[0]):
for n in range(pred_label.shape[1]):
vote_label_pool[int(point_idx[b, n]), int(pred_label[b, n])] += 1
pred = np.argmax(vote_label_pool, 1)
fout = open('pred.txt', 'w')
for i in range(points.shape[0]):
fout.write('%f %f %f %d\n' % (points[i, 0], points[i, 1], points[i, 2], pred[i]))
fout.close()
C++推理:
#include <iostream>
#include <fstream>
#include <vector>
#include <algorithm>
#include <ctime>
#include <random>
#include <onnxruntime_cxx_api.h>
const int point_num = 4096;
const int class_num = 13;
struct point
{
float m_x, m_y, m_z, m_r, m_g, m_b, m_normal_x, m_normal_y, m_normal_z;
point() :
m_x(0), m_y(0), m_z(0), m_r(0), m_g(0), m_b(0), m_normal_x(0), m_normal_y(0), m_normal_z(0) {}
point(float x, float y, float z, float r, float g, float b) :
m_x(x), m_y(y), m_z(z), m_r(r), m_g(g), m_b(b), m_normal_x(0), m_normal_y(0), m_normal_z(0) {}
point(float x, float y, float z, float r, float g, float b, float normal_x, float normal_y, float normal_z) :
m_x(x), m_y(y), m_z(z), m_r(r), m_g(g), m_b(b), m_normal_x(normal_x), m_normal_y(normal_y), m_normal_z(normal_z) {}
};
int main()
{
float x, y, z, r, g, b, l;
std::vector<point> pts;
std::vector<float> points_x, points_y, points_z;
int points_num = 0;
std::ifstream infile("Area_1_conferenceRoom_1.txt");
while (infile >> x >> y >> z >> r >> g >> b >> l)
{
point pt(x, y, z, r, g, b);
pts.push_back(pt);
points_x.push_back(x);
points_y.push_back(y);
points_z.push_back(z);
points_num++;
}
float x_min = *std::min_element(points_x.begin(), points_x.end());
float y_min = *std::min_element(points_y.begin(), points_y.end());
float z_min = *std::min_element(points_z.begin(), points_z.end());
float x_max = *std::max_element(points_x.begin(), points_x.end());
float y_max = *std::max_element(points_y.begin(), points_y.end());
float z_max = *std::max_element(points_z.begin(), points_z.end());
float stride = 0.5;
float block_size = 1.0;
srand((int)time(0));
int grid_x = ceil((x_max - x_min - block_size) / stride) + 1;
int grid_y = ceil((y_max - y_min - block_size) / stride) + 1;
std::vector<point> data_room;
std::vector<int> index_room;
for (size_t index_y = 0; index_y < grid_y; index_y++)
{
for (size_t index_x = 0; index_x < grid_x; index_x++)
{
float s_x = x_min + index_x * stride;
float e_x = std::min(s_x + block_size, x_max);
s_x = e_x - block_size;
float s_y = y_min + index_y * stride;
float e_y = std::min(s_y + block_size, y_max);
s_y = e_y - block_size;
std::vector<int> point_idxs;
for (size_t i = 0; i < points_num; i++)
{
if (points_x[i] >= s_x && points_x[i] <= e_x && points_y[i] >= s_y && points_y[i] <= e_y)
point_idxs.push_back(i);
}
if (point_idxs.size() == 0)
continue;
int num_batch = ceil(point_idxs.size() * 1.0 / point_num);
int point_size = num_batch * point_num;
bool replace = (point_size - point_idxs.size() <= point_idxs.size() ? false : true);
std::vector<int> point_idxs_repeat;
if (replace)
{
for (size_t i = 0; i < point_size - point_idxs.size(); i++)
{
int id = rand() % point_idxs.size();
point_idxs_repeat.push_back(point_idxs[id]);
}
}
else
{
std::vector<bool> flags(pts.size(), false);
for (size_t i = 0; i < point_size - point_idxs.size(); i++)
{
int id = rand() % point_idxs.size();
while (true)
{
if (flags[id] == false)
{
flags[id] = true;
break;
}
id = rand() % point_idxs.size();
}
point_idxs_repeat.push_back(point_idxs[id]);
}
}
point_idxs.insert(point_idxs.end(), point_idxs_repeat.begin(), point_idxs_repeat.end());
std::random_device rd;
std::mt19937 g(rd()); // 随机数引擎:基于梅森缠绕器算法的随机数生成器
std::shuffle(point_idxs.begin(), point_idxs.end(), g); // 打乱顺序,重新排序(随机序列)
std::vector<point> data_batch;
for (size_t i = 0; i < point_idxs.size(); i++)
{
data_batch.push_back(pts[point_idxs[i]]);
}
for (size_t i = 0; i < point_size; i++)
{
data_batch[i].m_normal_x = data_batch[i].m_x / x_max;
data_batch[i].m_normal_y = data_batch[i].m_y / y_max;
data_batch[i].m_normal_z = data_batch[i].m_z / z_max;
data_batch[i].m_x -= (s_x + block_size / 2.0);
data_batch[i].m_y -= (s_y + block_size / 2.0);
data_batch[i].m_r /= 255.0;
data_batch[i].m_g /= 255.0;
data_batch[i].m_b /= 255.0;
data_room.push_back(data_batch[i]);
index_room.push_back(point_idxs[i]);
}
}
}
int n = point_num, m = index_room.size() / n;
std::vector<std::vector<point>> data_rooms(m, std::vector<point>(n, point()));
std::vector<std::vector<int>> index_rooms(m, std::vector<int>(n, 0));
for (size_t i = 0; i < m; i++)
{
for (size_t j = 0; j < n; j++)
{
data_rooms[i][j] = data_room[i * n + j];
index_rooms[i][j] = index_room[i * n + j];
}
}
std::vector<std::vector<int>> vote_label_pool(points_num, std::vector<int>(class_num, 0));
int num_blocks = data_rooms.size();
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "sem_seg");
Ort::SessionOptions session_options;
session_options.SetIntraOpNumThreads(1);
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
OrtCUDAProviderOptions cuda_option;
cuda_option.device_id = 0;
cuda_option.arena_extend_strategy = 0;
cuda_option.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive;
cuda_option.gpu_mem_limit = SIZE_MAX;
cuda_option.do_copy_in_default_stream = 1;
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
session_options.AppendExecutionProvider_CUDA(cuda_option);
const wchar_t* model_path = L"sem_seg.onnx";
Ort::Session session(env, model_path, session_options);
Ort::AllocatorWithDefaultOptions allocator;
std::vector<const char*> input_node_names;
for (size_t i = 0; i < session.GetInputCount(); i++)
{
input_node_names.push_back(session.GetInputName(i, allocator));
}
std::vector<const char*> output_node_names;
for (size_t i = 0; i < session.GetOutputCount(); i++)
{
output_node_names.push_back(session.GetOutputName(i, allocator));
}
const size_t input_tensor_size = 1 * 9 * point_num;
std::vector<float> input_tensor_values(input_tensor_size);
for (int sbatch = 0; sbatch < num_blocks; sbatch++)
{
int start_idx = sbatch;
int end_idx = std::min(sbatch + 1, num_blocks);
int real_batch_size = end_idx - start_idx;
std::vector<point> batch_data = data_rooms[start_idx];
std::vector<int> point_idx = index_rooms[start_idx];
std::vector<float> batch(point_num * 9);
for (size_t i = 0; i < point_num; i++)
{
batch[9 * i + 0] = batch_data[i].m_x;
batch[9 * i + 1] = batch_data[i].m_y;
batch[9 * i + 2] = batch_data[i].m_z;
batch[9 * i + 3] = batch_data[i].m_r;
batch[9 * i + 4] = batch_data[i].m_g;
batch[9 * i + 5] = batch_data[i].m_b;
batch[9 * i + 6] = batch_data[i].m_normal_x;
batch[9 * i + 7] = batch_data[i].m_normal_y;
batch[9 * i + 8] = batch_data[i].m_normal_z;
}
for (size_t i = 0; i < 9; i++)
{
for (size_t j = 0; j < point_num; j++)
{
input_tensor_values[i * point_num + j] = batch[9 * j + i];
}
}
std::vector<int64_t> input_node_dims = { 1, 9, point_num };
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), input_node_dims.size());
std::vector<Ort::Value> inputs;
inputs.push_back(std::move(input_tensor));
std::vector<Ort::Value> outputs = session.Run(Ort::RunOptions{ nullptr }, input_node_names.data(), inputs.data(), input_node_names.size(), output_node_names.data(), output_node_names.size());
const float* rawOutput = outputs[0].GetTensorData<float>();
std::vector<int64_t> outputShape = outputs[0].GetTensorTypeAndShapeInfo().GetShape();
size_t count = outputs[0].GetTensorTypeAndShapeInfo().GetElementCount();
std::vector<float> pred(rawOutput, rawOutput + count);
std::vector<std::vector<float>> preds(point_num, std::vector<float>(class_num, 0));
for (size_t i = 0; i < point_num; i++)
{
for (size_t j = 0; j < class_num; j++)
{
preds[i][j] = pred[i * class_num + j];
}
}
std::vector<int> pred_label(point_num, 0);
for (size_t i = 0; i < point_num; i++)
{
pred_label[i] = std::max_element(preds[i].begin(), preds[i].end()) - preds[i].begin();
vote_label_pool[point_idx[i]][pred_label[i]] += 1;
}
}
std::ofstream outfile("pred.txt");
for (size_t i = 0; i < points_num; i++)
{
int max_index = std::max_element(vote_label_pool[i].begin(), vote_label_pool[i].end()) - vote_label_pool[i].begin();
outfile << pts[i].m_x << " " << pts[i].m_y << " " << pts[i].m_z << " " << max_index << std::endl;
}
outfile.close();
return 0;
}
注意,由于C++无法直接读取npy格式文件(可以依赖一些库),这里先使用python脚本将npy文件转换成txt文件。
import numpy as np
npy = np.load("Area_1_conferenceRoom_1.npy")
np.savetxt('Area_1_conferenceRoom_1.txt', npy, fmt='%0.06f')
模型的下载地址:pointnet模型权重
GitHub 加速计划 / on / onnxruntime
13.76 K
2.79 K
下载
microsoft/onnxruntime: 是一个用于运行各种机器学习模型的开源库。适合对机器学习和深度学习有兴趣的人,特别是在开发和部署机器学习模型时需要处理各种不同框架和算子的人。特点是支持多种机器学习框架和算子,包括 TensorFlow、PyTorch、Caffe 等,具有高性能和广泛的兼容性。
最近提交(Master分支:1 个月前 )
1bda91fc
### Description
Fixes the problem of running into failure when GPU inputs shuffled
between iterations. 9 天前
52a8c1ca
### Description
Enables using the MLTensor to pass data between models.
### Motivation and Context
Using MLTensor instead of ArrayBuffers reduces the number of copies
between the CPU and devices as well as the renderer and GPU process in
Chromium. 10 天前
更多推荐
已为社区贡献11条内容
所有评论(0)